<a href="https://colab.research.google.com/github/KaifAhmad1/code-test/blob/main/Deepfake_data.ipynb" target="_parent"><img src="https://colab.research.google.com/assets/colab-badge.svg" alt="Open In Colab"/></a>

In [1]:
!pip install -qU kaggle

In [7]:
import os
import json
import sys
import pandas as pd
from pathlib import Path

In [8]:
def ensure_directory(directory):
    """Create directory if it doesn't exist"""
    Path(directory).mkdir(parents=True, exist_ok=True)
    return directory

In [9]:
def manual_init_kaggle():
    """Manually initialize Kaggle credentials"""
    print("\n=== Kaggle Manual Initialization ===")
    print("Please enter your Kaggle credentials:")
    username = input("Username: ").strip()
    key = input("API Key: ").strip()

    # Set environment variables first
    os.environ['KAGGLE_USERNAME'] = username
    os.environ['KAGGLE_KEY'] = key

    # Create both possible Kaggle config directories
    config_paths = [
        Path.home() / '.kaggle',
        Path('/root/.config/kaggle')  # For root user
    ]

    success = False
    for config_path in config_paths:
        try:
            # Create directory
            ensure_directory(config_path)

            # Save credentials
            cred_path = config_path / 'kaggle.json'
            credentials = {
                "username": username,
                "key": key
            }

            with open(cred_path, 'w') as f:
                json.dump(credentials, f)

            # Set permissions
            os.chmod(cred_path, 0o600)

            print(f"\n✅ Credentials saved to: {cred_path}")
            success = True

        except Exception as e:
            print(f"Warning: Could not save to {config_path}: {str(e)}")
            continue

    if not success:
        raise Exception("Failed to save credentials to any location")

    return username, key

In [10]:
def verify_kaggle_auth():
    """Verify Kaggle authentication"""
    try:
        from kaggle.api.kaggle_api_extended import KaggleApi
        api = KaggleApi()
        api.authenticate()
        return api
    except Exception as e:
        print(f"\n❌ Authentication Error: {str(e)}")
        return None

In [11]:
def download_dfdc(api, output_dir='dfdc_dataset'):
    """Download and process DFDC dataset"""
    try:
        # Ensure output directory exists
        output_path = Path(output_dir)
        ensure_directory(output_path)

        print("\n📥 Downloading DFDC dataset...")
        print(f"Save location: {output_path.absolute()}")

        # Download dataset
        api.dataset_download_files(
            'selimsef/dfdc-preview-face-forensics',
            path=str(output_path),
            unzip=True
        )

        # Process metadata
        metadata_file = output_path / 'metadata.json'
        if metadata_file.exists():
            with open(metadata_file) as f:
                metadata = json.load(f)

            # Create summary DataFrame
            df = pd.DataFrame(metadata.items(), columns=['video', 'label'])
            df.to_csv(output_path / 'videos_summary.csv', index=False)

            print("\n📊 Dataset Summary:")
            print(f"Total Videos: {len(df)}")
            print(f"Real Videos: {len(df[df['label'] == 'REAL'])}")
            print(f"Fake Videos: {len(df[df['label'] == 'FAKE'])}")

        return True

    except Exception as e:
        print(f"\n❌ Download Error: {str(e)}")
        return False

In [12]:
def main():
    print("\n🎬 DFDC Dataset Downloader (Manual Init)")
    print("=====================================")

    try:
        # Ensure required packages
        os.system('pip install --quiet kaggle pandas')

        # Manual initialization
        username, key = manual_init_kaggle()

        # Verify authentication
        print("\nVerifying Kaggle authentication...")
        api = verify_kaggle_auth()

        if api is None:
            print("\n❌ Authentication failed. Please check your credentials.")
            sys.exit(1)

        print("✅ Authentication successful!")

        # Get output location
        default_dir = "dfdc_dataset"
        user_dir = input(f"\nEnter output directory (press Enter for '{default_dir}'): ").strip()
        output_dir = user_dir if user_dir else default_dir

        # Download dataset
        if download_dfdc(api, output_dir):
            print("\n✅ Download completed successfully!")
            print(f"\n📁 Dataset location: {Path(output_dir).absolute()}")
            print("📝 Check videos_summary.csv for video labels")
        else:
            print("\n❌ Download failed.")
            sys.exit(1)

    except Exception as e:
        print(f"\n❌ Error: {str(e)}")
        print("\n💡 Troubleshooting Tips:")
        print("1. Make sure you're using valid Kaggle credentials")
        print("2. Check your internet connection")
        print("3. Verify you have enough disk space")
        print("4. Try running the script with sudo if permission errors occur")
        sys.exit(1)

if __name__ == "__main__":
    main()


🎬 DFDC Dataset Downloader (Manual Init)

=== Kaggle Manual Initialization ===
Please enter your Kaggle credentials:
Username: kaif8755
API Key: 551a523b812b9721fc1d3ad5add8f667

✅ Credentials saved to: /root/.kaggle/kaggle.json

✅ Credentials saved to: /root/.config/kaggle/kaggle.json

Verifying Kaggle authentication...
✅ Authentication successful!

Enter output directory (press Enter for 'dfdc_dataset'): dfdc_dataset

📥 Downloading DFDC dataset...
Save location: /content/dfdc_dataset
Dataset URL: https://www.kaggle.com/datasets/selimsef/dfdc-preview-face-forensics

❌ Download Error: (403)
Reason: Forbidden
HTTP response headers: HTTPHeaderDict({'Content-Type': 'application/json', 'Date': 'Wed, 29 Jan 2025 08:25:00 GMT', 'Access-Control-Allow-Credentials': 'true', 'Access-Control-Allow-Origin': '*', 'Set-Cookie': 'ka_sessionid=661064b5f1bc53f5d867b17d95e0e1c5; max-age=2626560; path=/, GCLB=CKDt-ISB4K-MXRAD; path=/; HttpOnly', 'Vary': 'Accept-Encoding', 'X-Kaggle-MillisecondsElapsed': 

SystemExit: 1

  warn("To exit: use 'exit', 'quit', or Ctrl-D.", stacklevel=1)
