# Download the videos dataset from [Google Drive](https://drive.google.com/file/d/1LU0gdV3p1ObZ7x7hKgsF-ppyt4-nJbeK/view?usp=sharing)

In [1]:
!pip install gdown



In [2]:
import gdown

file_id = '1LU0gdV3p1ObZ7x7hKgsF-ppyt4-nJbeK'
destination = '/content/dataset.zip'  # Path where the file will be saved
gdown.download(f'https://drive.google.com/uc?id={file_id}', destination, quiet=False)

Downloading...
From (original): https://drive.google.com/uc?id=1LU0gdV3p1ObZ7x7hKgsF-ppyt4-nJbeK
From (redirected): https://drive.google.com/uc?id=1LU0gdV3p1ObZ7x7hKgsF-ppyt4-nJbeK&confirm=t&uuid=dec7b0e0-3430-45cd-aa17-2949bc69989f
To: /content/dataset.zip
100%|██████████| 229M/229M [00:01<00:00, 122MB/s]


'/content/dataset.zip'

In [3]:
import zipfile

with zipfile.ZipFile(destination, 'r') as zip_ref:
    zip_ref.extractall('/content/dataset')

# Dividing the Videos into frames

In [4]:
import cv2
import os
from glob import glob

In [6]:
# Paths to the directories containing the fake and real videos
fake_videos_path = '/content/dataset/fake'
real_videos_path = '/content/dataset/real'

# Directories to save the frames
fake_frames_dir = '/content/images/fake'
real_frames_dir = '/content/images/real'
os.makedirs(fake_frames_dir, exist_ok=True)
os.makedirs(real_frames_dir, exist_ok=True)

In [7]:
def extract_frames_from_videos(videos_path, frames_dir, label):
    # Get all .mp4 files in the directory
    video_files = glob(os.path.join(videos_path, '*.mp4'))

    for video_file in video_files:
        # Open the video file
        cap = cv2.VideoCapture(video_file)
        video_name = os.path.splitext(os.path.basename(video_file))[0]

        frame_count = 0
        while True:
            ret, frame = cap.read()
            if not ret:
                break

            # Save the frame as an image file
            frame_path = os.path.join(frames_dir, f'{label}_{video_name}_frame_{frame_count:04d}.jpg')
            cv2.imwrite(frame_path, frame)

            frame_count += 1

        # Release the video capture object
        cap.release()
        print(f'Total frames extracted from {video_file}: {frame_count}')


In [8]:
# Extract frames from fake videos
extract_frames_from_videos(fake_videos_path, fake_frames_dir, 'fake')

# Extract frames from real videos
extract_frames_from_videos(real_videos_path, real_frames_dir, 'real')

print('Frame extraction completed.')

Total frames extracted from /content/dataset/fake/fake_18.mp4: 272
Total frames extracted from /content/dataset/fake/fake_17.mp4: 785
Total frames extracted from /content/dataset/fake/fake_3.mp4: 108
Total frames extracted from /content/dataset/fake/fake_21.mp4: 342
Total frames extracted from /content/dataset/fake/fake_16.mp4: 151
Total frames extracted from /content/dataset/fake/fake_25.mp4: 123
Total frames extracted from /content/dataset/fake/fake_6.mp4: 97
Total frames extracted from /content/dataset/fake/fake_10.mp4: 62
Total frames extracted from /content/dataset/fake/fake_23.mp4: 191
Total frames extracted from /content/dataset/fake/fake_5.mp4: 86
Total frames extracted from /content/dataset/fake/fake_8.mp4: 55
Total frames extracted from /content/dataset/fake/fake_28.mp4: 404
Total frames extracted from /content/dataset/fake/fake_22.mp4: 201
Total frames extracted from /content/dataset/fake/fake_24.mp4: 304
Total frames extracted from /content/dataset/fake/fake_20.mp4: 461
Tot

In [10]:
import shutil

# Define the directory to zip and the output zip file path
directory_to_zip = '/content/images'
output_zip_path = '/content/images'

# Create a zip file of the directory
shutil.make_archive(output_zip_path, 'zip', directory_to_zip)

print(f'Zip file created: {output_zip_path}')


Zip file created: /content/images


# Put Dataset in Hugging Face

In [5]:
!pip install --upgrade datasets --q

In [1]:
from datasets import Dataset, DatasetDict
from datasets.filesystems import S3FileSystem

ImportError: cannot import name 'S3FileSystem' from 'datasets.filesystems' (/usr/local/lib/python3.10/dist-packages/datasets/filesystems/__init__.py)