<a href="https://colab.research.google.com/github/KaifAhmad1/code-test/blob/main/Deepfake_data.ipynb" target="_parent"><img src="https://colab.research.google.com/assets/colab-badge.svg" alt="Open In Colab"/></a>

In [1]:
!pip install -qU kaggle

In [None]:
import os
import json
import sys
import pandas as pd
from pathlib import Path

In [3]:
def init_kaggle_api(username=None, key=None):
    """
    Manually initialize Kaggle API with credentials.
    """
    if username and key:
        os.environ['KAGGLE_USERNAME'] = username
        os.environ['KAGGLE_KEY'] = key

    api = KaggleApi()
    api.authenticate()
    return api

In [4]:
def setup_kaggle_credentials(username, key):
    """
    Set up Kaggle API credentials
    """
    try:
        kaggle_dir = Path.home() / '.kaggle'
        kaggle_dir.mkdir(parents=True, exist_ok=True)

        credentials = {
            "username": username,
            "key": key
        }

        credentials_path = kaggle_dir / 'kaggle.json'
        with open(credentials_path, 'w') as f:
            json.dump(credentials, f)

        os.chmod(credentials_path, 0o600)
        print(f"Credentials saved to: {credentials_path}")
        return True
    except Exception as e:
        print(f"Error setting up credentials: {str(e)}")
        return False

In [5]:
def download_dfdc_dataset(api, subset_size=None):
    """
    Download the DFDC Preview dataset.

    Parameters:
    api (KaggleApi): Initialized Kaggle API instance
    subset_size (int): Number of videos to download (None for all)
    """
    try:
        # DFDC Preview dataset path
        dataset_path = 'selimsef/dfdc-preview-face-forensics'

        # Create download directory
        base_dir = os.path.join(os.path.expanduser('~'), 'Downloads', 'dfdc_dataset')
        os.makedirs(base_dir, exist_ok=True)

        print("Downloading DFDC Preview dataset...")

        # Download the entire dataset
        api.dataset_download_files(dataset_path, path=base_dir, unzip=True)

        # Load metadata
        metadata_path = os.path.join(base_dir, 'metadata.json')
        if os.path.exists(metadata_path):
            with open(metadata_path, 'r') as f:
                metadata = json.load(f)

            # Create a summary DataFrame
            df = pd.DataFrame(metadata.items(), columns=['filename', 'label'])
            print("\nDataset Summary:")
            print(f"Total videos: {len(df)}")
            print(f"Real videos: {len(df[df['label'] == 'REAL'])}")
            print(f"Fake videos: {len(df[df['label'] == 'FAKE'])}")

            # Save summary to CSV
            summary_path = os.path.join(base_dir, 'summary.csv')
            df.to_csv(summary_path, index=False)
            print(f"\nSummary saved to: {summary_path}")

        print(f"\nDownload complete! Files saved to: {base_dir}")
        print("\nFolder structure:")
        print(f"{base_dir}/")
        print("├── metadata.json")
        print("├── summary.csv")
        print("└── videos/")
        return True

    except Exception as e:
        print(f"Error downloading dataset: {str(e)}")
        return False

In [None]:
def main():
    print("Welcome to the DFDC Dataset Downloader!")
    print("This script will download the Deepfake Detection Challenge Preview Dataset.")

    # Get credentials
    username = input("Enter Kaggle username: ")
    key = input("Enter Kaggle API key: ")

    try:
        # Initialize API
        api = init_kaggle_api(username, key)
        print("API initialized successfully!")

        # Offer to save credentials
        save = input("Would you like to save these credentials for future use? (y/n): ")
        if save.lower() == 'y':
            setup_kaggle_credentials(username, key)

        # Download dataset
        if download_dfdc_dataset(api):
            print("\nScript completed successfully!")
            print("\nNote: The dataset includes both real and manipulated videos.")
            print("You can find the labels in the metadata.json file.")
        else:
            print("Script failed to complete.")

    except Exception as e:
        print(f"Error: {str(e)}")
        sys.exit(1)

if __name__ == "__main__":
    main()

Welcome to the DFDC Dataset Downloader!
This script will download the Deepfake Detection Challenge Preview Dataset.
