In [1]:

# Cell 2: Imports
from google.oauth2 import service_account
from googleapiclient.discovery import build
import json
import os

# Cell 3: Configuration
# Replace these with your values
FOLDER_ID = "1uGuGKORFUD50U_wLwGEoV6nTBBblHsV7"  # Your Google Drive folder ID
CREDENTIALS_FILE = r"C:\Users\Shailendra\Desktop\hindi conversion\Gorakphur LPG BP - Google Drive\credentials.json"
OUTPUT_FILE = "jrm_hindi_drive.json"
LOCAL_VERIFY_PATH = None  # or "reports/JRM_HINDI"

print("üìã Configuration:")
print(f"   Folder ID: {FOLDER_ID}")
print(f"   Credentials: {CREDENTIALS_FILE}")
print(f"   Output: {OUTPUT_FILE}")

# Cell 4: Define Extractor Class
class DriveLinksExtractor:
    """Extract preview and download links from Google Drive folder"""
    
    def __init__(self, credentials_file):
        SCOPES = ['https://www.googleapis.com/auth/drive.readonly']
        creds = service_account.Credentials.from_service_account_file(
            credentials_file, scopes=SCOPES)
        self.service = build('drive', 'v3', credentials=creds)
    
    def get_folder_files(self, folder_id):
        query = f"'{folder_id}' in parents and mimeType='application/pdf' and trashed=false"
        results = self.service.files().list(
            q=query,
            fields="files(id, name)",
            pageSize=1000
        ).execute()
        return results.get('files', [])
    
    def generate_links(self, file_id):
        return {
            'preview': f'https://drive.google.com/file/d/{file_id}/preview',
            'download': f'https://drive.google.com/uc?export=download&id={file_id}'
        }
    
    def extract_to_json(self, folder_id, output_file):
        print("üìÇ Fetching files from Google Drive...")
        files = self.get_folder_files(folder_id)
        
        if not files:
            print("‚ö†Ô∏è  No files found!")
            return {}
        
        print(f"‚úÖ Found {len(files)} PDF files\n")
        
        drive_data = {}
        
        print("üîó Generating links...")
        for i, file in enumerate(files, 1):
            filename = file['name']
            file_id = file['id']
            links = self.generate_links(file_id)
            
            drive_data[filename] = {
                'file_id': file_id,
                'preview_url': links['preview'],
                'download_url': links['download'],
                'exists': True
            }
            
            # Progress indicator
            if i % 10 == 0 or i == len(files):
                print(f"   Processed: {i}/{len(files)}")
        
        # Save to JSON
        with open(output_file, 'w', encoding='utf-8') as f:
            json.dump(drive_data, f, indent=2, ensure_ascii=False)
        
        print(f"\n‚úÖ Saved to: {output_file}")
        print(f"üìä Total files: {len(drive_data)}")
        
        return drive_data

# Cell 5: Run Extraction
try:
    extractor = DriveLinksExtractor(CREDENTIALS_FILE)
    data = extractor.extract_to_json(FOLDER_ID, OUTPUT_FILE)
    
    print("\nüéâ Extraction complete!")
    print(f"\nüìÑ Sample output (first 3 files):")
    for i, (filename, info) in enumerate(list(data.items())[:3]):
        print(f"\n{i+1}. {filename}")
        print(f"   Preview: {info['preview_url']}")
        print(f"   Download: {info['download_url']}")
    
except Exception as e:
    print(f"‚ùå Error: {e}")

# Cell 6: (Optional) View the JSON
with open(OUTPUT_FILE, 'r', encoding='utf-8') as f:
    json_data = json.load(f)

print(f"Total files in JSON: {len(json_data)}")
print("\nFirst entry:")
first_key = list(json_data.keys())[0]
print(json.dumps({first_key: json_data[first_key]}, indent=2))

An error occurred: module 'importlib.metadata' has no attribute 'packages_distributions'
üìã Configuration:
   Folder ID: 1uGuGKORFUD50U_wLwGEoV6nTBBblHsV7
   Credentials: C:\Users\Shailendra\Desktop\hindi conversion\Gorakphur LPG BP - Google Drive\credentials.json
   Output: jrm_hindi_drive.json
üìÇ Fetching files from Google Drive...




‚úÖ Found 181 PDF files

üîó Generating links...
   Processed: 10/181
   Processed: 20/181
   Processed: 30/181
   Processed: 40/181
   Processed: 50/181
   Processed: 60/181
   Processed: 70/181
   Processed: 80/181
   Processed: 90/181
   Processed: 100/181
   Processed: 110/181
   Processed: 120/181
   Processed: 130/181
   Processed: 140/181
   Processed: 150/181
   Processed: 160/181
   Processed: 170/181
   Processed: 180/181
   Processed: 181/181

‚úÖ Saved to: jrm_hindi_drive.json
üìä Total files: 181

üéâ Extraction complete!

üìÑ Sample output (first 3 files):

1. IPPL Bottling Plant Haldia to IOCL LPG BP Gorakhpur Bulk.pdf
   Preview: https://drive.google.com/file/d/1fAA4jsN93zxLaM3bkedfgUJwrIBdG6nE/preview
   Download: https://drive.google.com/uc?export=download&id=1fAA4jsN93zxLaM3bkedfgUJwrIBdG6nE

2. IOCL LPG BP Patna to IOCL LPG BP Gorakhpur Bulk.pdf
   Preview: https://drive.google.com/file/d/1sSSTxiCPiIGTeFFOhY1EMJppKcmNJi3V/preview
   Download: https://drive.googl

In [2]:
# extract_drive_links.py
"""
Complete script to extract Google Drive links and make files public
Author: Your Name
Date: 2026-01-01
"""

from google.oauth2 import service_account
from googleapiclient.discovery import build
import json
import os


class DriveLinksExtractor:
    """Extract preview and download links from Google Drive folder"""
    
    def __init__(self, credentials_file):
        """
        Initialize with service account credentials
        Note: Using full drive scope to enable making files public
        """
        SCOPES = ['https://www.googleapis.com/auth/drive']  # Full access (not readonly)
        
        print("üîë Loading credentials...")
        creds = service_account.Credentials.from_service_account_file(
            credentials_file, scopes=SCOPES)
        self.service = build('drive', 'v3', credentials=creds)
        print("‚úÖ Credentials loaded successfully\n")
    
    def get_folder_files(self, folder_id):
        """
        Get all PDF files from a Google Drive folder
        
        Args:
            folder_id (str): Google Drive folder ID
            
        Returns:
            list: List of files with id and name
        """
        query = f"'{folder_id}' in parents and mimeType='application/pdf' and trashed=false"
        
        try:
            results = self.service.files().list(
                q=query,
                fields="files(id, name)",
                pageSize=1000
            ).execute()
            return results.get('files', [])
        except Exception as e:
            print(f"‚ùå Error fetching files: {e}")
            return []
    
    def make_file_public(self, file_id):
        """
        Make a file publicly accessible (anyone with link can view)
        
        Args:
            file_id (str): Google Drive file ID
            
        Returns:
            bool: True if successful, False otherwise
        """
        try:
            self.service.permissions().create(
                fileId=file_id,
                body={
                    'type': 'anyone',
                    'role': 'reader'
                },
                fields='id'
            ).execute()
            return True
        except Exception as e:
            # Silently fail for individual files
            return False
    
    def generate_links(self, file_id):
        """
        Generate preview and download links for a file
        
        Args:
            file_id (str): Google Drive file ID
            
        Returns:
            dict: Dictionary with preview and download URLs
        """
        return {
            'preview': f'https://drive.google.com/file/d/{file_id}/preview',
            'download': f'https://drive.google.com/uc?export=download&id={file_id}'
        }
    
    def extract_to_json(self, folder_id, output_file='jrm_hindi_drive.json', 
                        make_public=True, verify_path=None):
        """
        Extract all files, make them public, and save to JSON
        
        Args:
            folder_id (str): Google Drive folder ID
            output_file (str): Output JSON filename
            make_public (bool): Automatically make files public
            verify_path (str): Optional local path to verify against
            
        Returns:
            dict: Extracted data
        """
        print("üìÇ Fetching files from Google Drive...")
        files = self.get_folder_files(folder_id)
        
        if not files:
            print("‚ö†Ô∏è  No files found! Check:")
            print("   1. Folder ID is correct")
            print("   2. Service account has Editor access to the folder")
            print("   3. Folder contains PDF files")
            return {}
        
        print(f"‚úÖ Found {len(files)} PDF files\n")
        
        # Build the data structure
        drive_data = {}
        success_count = 0
        fail_count = 0
        
        if make_public:
            print("üîì Making files publicly accessible and generating links...")
            print("-" * 60)
        else:
            print("üîó Generating links (without making public)...")
            print("-" * 60)
        
        for i, file in enumerate(files, 1):
            filename = file['name']
            file_id = file['id']
            
            # Make file public if requested
            if make_public:
                success = self.make_file_public(file_id)
                if success:
                    status = "‚úì"
                    success_count += 1
                else:
                    status = "‚úó"
                    fail_count += 1
                print(f"   {status} [{i:3d}/{len(files)}] {filename}")
            else:
                print(f"   ‚úì [{i:3d}/{len(files)}] {filename}")
            
            # Generate links
            links = self.generate_links(file_id)
            
            drive_data[filename] = {
                'file_id': file_id,
                'preview_url': links['preview'],
                'download_url': links['download'],
                'exists': True
            }
        
        print("-" * 60)
        
        # Summary
        if make_public:
            print(f"\nüìä Summary:")
            print(f"   Total files: {len(files)}")
            print(f"   Made public: {success_count} ‚úì")
            if fail_count > 0:
                print(f"   Failed: {fail_count} ‚úó")
                print(f"\n‚ö†Ô∏è  Some files couldn't be made public.")
                print(f"   This might happen if:")
                print(f"   - Service account doesn't have Editor role")
                print(f"   - Files are in a shared drive with restrictions")
        
        # Optional: Verify against local files
        if verify_path:
            self._verify_local(drive_data, verify_path)
        
        # Save to JSON
        with open(output_file, 'w', encoding='utf-8') as f:
            json.dump(drive_data, f, indent=2, ensure_ascii=False)
        
        abs_path = os.path.abspath(output_file)
        print(f"\n‚úÖ Successfully saved to: {output_file}")
        print(f"üìÅ Full path: {abs_path}")
        print(f"üìä Total entries: {len(drive_data)}")
        
        return drive_data
    
    def _verify_local(self, drive_data, local_path):
        """Verify Drive files against local directory"""
        if not os.path.exists(local_path):
            print(f"\n‚ö†Ô∏è  Local path not found: {local_path}")
            print("   Skipping verification...")
            return
        
        local_files = set([f for f in os.listdir(local_path) if f.endswith('.pdf')])
        drive_files = set(drive_data.keys())
        
        missing_in_drive = local_files - drive_files
        extra_in_drive = drive_files - local_files
        
        print("\n" + "="*60)
        print("üìä VERIFICATION REPORT")
        print("="*60)
        print(f"Files in Google Drive: {len(drive_files)}")
        print(f"Files in local folder: {len(local_files)}")
        
        if missing_in_drive:
            print(f"\n‚ö†Ô∏è  Files in local but NOT in Drive ({len(missing_in_drive)}):")
            for f in sorted(list(missing_in_drive)[:5]):
                print(f"   - {f}")
            if len(missing_in_drive) > 5:
                print(f"   ... and {len(missing_in_drive) - 5} more")
        
        if extra_in_drive:
            print(f"\n‚ö†Ô∏è  Files in Drive but NOT in local ({len(extra_in_drive)}):")
            for f in sorted(list(extra_in_drive)[:5]):
                print(f"   - {f}")
            if len(extra_in_drive) > 5:
                print(f"   ... and {len(extra_in_drive) - 5} more")
        
        if not missing_in_drive and not extra_in_drive:
            print("\n‚úÖ Perfect sync! All files match.")
        
        print("="*60)


def main():
    """
    Main function to run the extraction
    """
    print("="*60)
    print("  Google Drive Links Extractor for JRM Hindi PDFs")
    print("="*60)
    print()
    
    # ============================================
    # CONFIGURATION - CHANGE THESE VALUES
    # ============================================
    
    # 1. Your Google Drive folder ID
    # Get it from the folder URL: https://drive.google.com/drive/folders/FOLDER_ID
    FOLDER_ID = "1uGuGKORFUD50U_wLwGEoV6nTBBblHsV7"
    
    # 2. Path to your Service Account credentials JSON file
    CREDENTIALS_FILE = r"C:\Users\Shailendra\Desktop\hindi conversion\Gorakphur LPG BP - Google Drive\credentials.json"
    
    # 3. Output JSON filename
    OUTPUT_FILE = "jrm_hindi_drive.json"
    
    # 4. (Optional) Local path to verify against - set to None to skip verification
    LOCAL_VERIFY_PATH = r"C:\Users\Shailendra\Desktop\hindi conversion\Gorakphur LPG BP - Google Drive\reports\JRM_HINDI"
    
    # 5. Make files publicly accessible
    MAKE_PUBLIC = True
    
    # ============================================
    # VALIDATION
    # ============================================
    
    print("üìã Configuration:")
    print(f"   Folder ID: {FOLDER_ID}")
    print(f"   Credentials: {CREDENTIALS_FILE}")
    print(f"   Output file: {OUTPUT_FILE}")
    print(f"   Make public: {MAKE_PUBLIC}")
    if LOCAL_VERIFY_PATH:
        print(f"   Verify against: {LOCAL_VERIFY_PATH}")
    print()
    
    # Check if credentials file exists
    if not os.path.exists(CREDENTIALS_FILE):
        print(f"‚ùå ERROR: Credentials file not found!")
        print(f"   Looking for: {CREDENTIALS_FILE}")
        print(f"   Please download your Service Account JSON and save it as '{CREDENTIALS_FILE}'")
        print()
        print("Steps to get credentials:")
        print("1. Go to Google Cloud Console")
        print("2. Enable Google Drive API")
        print("3. Create Service Account")
        print("4. Download JSON key")
        return
    
    # Check folder ID
    if FOLDER_ID == "YOUR_FOLDER_ID_HERE" or not FOLDER_ID:
        print("‚ùå ERROR: Please set your FOLDER_ID in the script")
        print()
        print("How to get Folder ID:")
        print("1. Open your Drive folder in browser")
        print("2. Copy the ID from URL:")
        print("   https://drive.google.com/drive/folders/1a2b3c4d5e6f7g8h9")
        print("                                         ^^^^^^^^^^^^^^^^^^^")
        print("3. Paste it in the FOLDER_ID variable")
        return
    
    # Check service account permissions
    print("‚ö†Ô∏è  IMPORTANT: Make sure you've shared the folder with your service account!")
    print("   1. Right-click on Drive folder ‚Üí Share")
    print("   2. Add your service account email (from credentials.json)")
    print("   3. Give it 'Editor' access (not just 'Viewer')")
    print("   4. Click 'Share'")
    print()
    
    input("Press Enter to continue...")
    print()
    
    # ============================================
    # RUN THE EXTRACTION
    # ============================================
    
    try:
        # Create extractor instance
        extractor = DriveLinksExtractor(CREDENTIALS_FILE)
        
        # Extract and save
        data = extractor.extract_to_json(
            folder_id=FOLDER_ID,
            output_file=OUTPUT_FILE,
            make_public=MAKE_PUBLIC,
            verify_path=LOCAL_VERIFY_PATH
        )
        
        if data:
            print("\n" + "="*60)
            print("üéâ SUCCESS! Extraction complete.")
            print("="*60)
            
            # Show sample
            print("\nüìÑ Sample output (first 2 files):")
            for i, (filename, info) in enumerate(list(data.items())[:2]):
                print(f"\n{i+1}. {filename}")
                print(f"   File ID: {info['file_id']}")
                print(f"   Preview: {info['preview_url']}")
                print(f"   Download: {info['download_url']}")
            
            print(f"\nüí° Next steps:")
            print(f"   1. Test a preview link in incognito browser")
            print(f"   2. Use {OUTPUT_FILE} in your Flask app")
            print(f"   3. Deploy your app to Render")
            
        else:
            print("\n‚ùå Extraction failed. Please check the errors above.")
        
    except Exception as e:
        print(f"\n‚ùå FATAL ERROR: {e}")
        print("\nTroubleshooting:")
        print("1. Verify credentials.json is valid")
        print("2. Check that service account has Editor access to folder")
        print("3. Confirm Google Drive API is enabled in your project")
        print("4. Make sure folder ID is correct")


if __name__ == "__main__":
    main()

  Google Drive Links Extractor for JRM Hindi PDFs

üìã Configuration:
   Folder ID: 1uGuGKORFUD50U_wLwGEoV6nTBBblHsV7
   Credentials: C:\Users\Shailendra\Desktop\hindi conversion\Gorakphur LPG BP - Google Drive\credentials.json
   Output file: jrm_hindi_drive.json
   Make public: True
   Verify against: C:\Users\Shailendra\Desktop\hindi conversion\Gorakphur LPG BP - Google Drive\reports\JRM_HINDI

‚ö†Ô∏è  IMPORTANT: Make sure you've shared the folder with your service account!
   1. Right-click on Drive folder ‚Üí Share
   2. Add your service account email (from credentials.json)
   3. Give it 'Editor' access (not just 'Viewer')
   4. Click 'Share'


üîë Loading credentials...
‚úÖ Credentials loaded successfully

üìÇ Fetching files from Google Drive...
‚úÖ Found 181 PDF files

üîì Making files publicly accessible and generating links...
------------------------------------------------------------
   ‚úì [  1/181] IPPL Bottling Plant Haldia to IOCL LPG BP Gorakhpur Bulk.pdf
   ‚úì [