In [None]:
!pip install opencv-python numpy dlib librosa torch torchvision torchaudio tqdm ffmpeg-python gdown




In [None]:
import kagglehub

# Download latest version
path = kagglehub.dataset_download("reubensuju/celeb-df-v2")

print("Path to dataset files:", path)

Resuming download from 3679453184 bytes (6290610273 bytes left)...
Resuming download from https://www.kaggle.com/api/v1/datasets/download/reubensuju/celeb-df-v2?dataset_version_number=1 (3679453184/9970063457) bytes left.


100%|██████████| 9.29G/9.29G [04:41<00:00, 22.4MB/s]

Extracting files...





Path to dataset files: /root/.cache/kagglehub/datasets/reubensuju/celeb-df-v2/versions/1


In [None]:
!wget http://dlib.net/files/shape_predictor_68_face_landmarks.dat.bz2
!bzip2 -d shape_predictor_68_face_landmarks.dat.bz2

--2025-03-20 10:06:18--  http://dlib.net/files/shape_predictor_68_face_landmarks.dat.bz2
Resolving dlib.net (dlib.net)... 107.180.26.78
Connecting to dlib.net (dlib.net)|107.180.26.78|:80... connected.
HTTP request sent, awaiting response... 301 Moved Permanently
Location: https://dlib.net/files/shape_predictor_68_face_landmarks.dat.bz2 [following]
--2025-03-20 10:06:18--  https://dlib.net/files/shape_predictor_68_face_landmarks.dat.bz2
Connecting to dlib.net (dlib.net)|107.180.26.78|:443... connected.
HTTP request sent, awaiting response... 200 OK
Length: 64040097 (61M)
Saving to: ‘shape_predictor_68_face_landmarks.dat.bz2’


2025-03-20 10:06:24 (11.3 MB/s) - ‘shape_predictor_68_face_landmarks.dat.bz2’ saved [64040097/64040097]



In [None]:
# Phase 1: Data Collection & Preprocessing
# Project Setup for Deepfake Detection System

# Step 1: Install Dependencies
# Install required libraries
# Run: pip install opencv-python numpy pandas dlib librosa torch torchvision torchaudio tqdm

import os
import cv2
import dlib
import librosa
import numpy as np
import torch
import torchaudio
from torchvision import transforms
from tqdm import tqdm

# Define the project structure
PROJECT_ROOT = "/content/deepfake_detection"
DATA_DIR = "/content/data"
REAL_VIDEOS = "/content/data/real"
FAKE_VIDEOS = "/content/data/fake"
FRAMES_DIR = "/content/data/frames"
AUDIO_DIR = "/content/data/audio"

# Create directories
for folder in [PROJECT_ROOT, DATA_DIR, REAL_VIDEOS, FAKE_VIDEOS, FRAMES_DIR, AUDIO_DIR]:
    os.makedirs(folder, exist_ok=True)

# Step 2: Extract Frames from Videos

def extract_frames(video_path, output_folder, frame_interval=5):
    """Extract frames from a video at specified intervals."""
    cap = cv2.VideoCapture(video_path)
    frame_count = 0
    success, image = cap.read()
    while success:
        if frame_count % frame_interval == 0:
            frame_filename = os.path.join(output_folder, f"frame_{frame_count}.jpg")
            cv2.imwrite(frame_filename, image)
        success, image = cap.read()
        frame_count += 1
    cap.release()

# Step 3: Extract Facial Landmarks

def extract_facial_landmarks(image_path):
    """Detect facial landmarks using dlib."""
    detector = dlib.get_frontal_face_detector()
    predictor = dlib.shape_predictor("shape_predictor_68_face_landmarks.dat")

    image = cv2.imread(image_path)
    gray = cv2.cvtColor(image, cv2.COLOR_BGR2GRAY)
    faces = detector(gray)

    for face in faces:
        landmarks = predictor(gray, face)
        points = [(p.x, p.y) for p in landmarks.parts()]
        return np.array(points)
    return None

# Step 4: Extract Audio Features (MFCCs)

def extract_mfcc(audio_path):
    """Extract MFCC features from an audio file."""
    y, sr = librosa.load(audio_path, sr=16000)
    mfccs = librosa.feature.mfcc(y=y, sr=sr, n_mfcc=13)
    return mfccs.mean(axis=1)

# Process Videos in Dataset

def process_dataset(video_folder, output_frame_folder, output_audio_folder):
    """Process videos to extract frames and audio features."""
    for video_file in tqdm(os.listdir(video_folder)):
        video_path = os.path.join(video_folder, video_file)
        frame_output = os.path.join(output_frame_folder, video_file.split('.')[0])
        audio_output = os.path.join(output_audio_folder, video_file.split('.')[0] + ".wav")

        os.makedirs(frame_output, exist_ok=True)
        extract_frames(video_path, frame_output)

        # Extract and save audio
        os.system(f"ffmpeg -i {video_path} -q:a 0 -map a {audio_output}")
        mfcc_features = extract_mfcc(audio_output)
        print(f"Extracted MFCCs for {video_file}: {mfcc_features}")



In [None]:
import os

# Check if the dataset is downloaded
dataset_path = "/content/data/"
print("Files in dataset directory:", os.listdir(dataset_path))


Files in dataset directory: ['fake', 'real', 'frames', 'audio']


In [None]:
import cv2

def extract_frames(video_path, output_folder, frame_rate=2):
    os.makedirs(output_folder, exist_ok=True)
    cap = cv2.VideoCapture(video_path)
    count = 0
    frame_count = 0

    while cap.isOpened():
        ret, frame = cap.read()
        if not ret:
            break

        if count % frame_rate == 0:
            frame_path = os.path.join(output_folder, f"frame_{frame_count}.jpg")
            cv2.imwrite(frame_path, frame)
            frame_count += 1

        count += 1

    cap.release()

# Set paths
real_videos = "/content/data/real"
fake_videos = "/content/data/fake"
output_frames_real = "/content/data/frames/real"
output_frames_fake = "/content/data/frames/fake"

# Process videos
for video in os.listdir(real_videos):
    extract_frames(os.path.join(real_videos, video), output_frames_real)

for video in os.listdir(fake_videos):
    extract_frames(os.path.join(fake_videos, video), output_frames_fake)

print("✅ Frames extracted successfully!")


✅ Frames extracted successfully!


In [None]:
!pip install librosa numpy matplotlib




In [None]:
import librosa
import librosa.display
import numpy as np
import matplotlib.pyplot as plt

def extract_spectrogram(audio_path, output_folder):
    os.makedirs(output_folder, exist_ok=True)
    y, sr = librosa.load(audio_path, sr=16000)
    spectrogram = librosa.feature.melspectrogram(y=y, sr=sr)
    log_spectrogram = librosa.power_to_db(spectrogram, ref=np.max)

    plt.figure(figsize=(10, 4))
    librosa.display.specshow(log_spectrogram, sr=sr, x_axis='time', y_axis='mel')
    plt.axis('off')

    output_path = os.path.join(output_folder, os.path.basename(audio_path).replace(".mp4", ".png"))
    plt.savefig(output_path, bbox_inches='tight', pad_inches=0)
    plt.close()

print("✅ Spectrograms extracted successfully!")


✅ Spectrograms extracted successfully!


In [None]:
!pip install dlib
!pip install face_recognition


Collecting face_recognition
  Downloading face_recognition-1.3.0-py2.py3-none-any.whl.metadata (21 kB)
Collecting face-recognition-models>=0.3.0 (from face_recognition)
  Downloading face_recognition_models-0.3.0.tar.gz (100.1 MB)
[2K     [90m━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━[0m [32m100.1/100.1 MB[0m [31m7.2 MB/s[0m eta [36m0:00:00[0m
[?25h  Preparing metadata (setup.py) ... [?25l[?25hdone
Downloading face_recognition-1.3.0-py2.py3-none-any.whl (15 kB)
Building wheels for collected packages: face-recognition-models
  Building wheel for face-recognition-models (setup.py) ... [?25l[?25hdone
  Created wheel for face-recognition-models: filename=face_recognition_models-0.3.0-py2.py3-none-any.whl size=100566162 sha256=bd3ce61a2fb1fa4fc8c03e2671c607042237ec5460137bedce4f9764ae2ac8f6
  Stored in directory: /root/.cache/pip/wheels/04/52/ec/9355da79c29f160b038a20c784db2803c2f9fa2c8a462c176a
Successfully built face-recognition-models
Installing collected packages: face-recog

In [None]:
import dlib
import cv2

# Load the face detector and landmark predictor
detector = dlib.get_frontal_face_detector()
predictor = dlib.shape_predictor("shape_predictor_68_face_landmarks.dat")  # Ensure this file exists!

def extract_landmarks(image_path):
    image = cv2.imread(image_path)
    gray = cv2.cvtColor(image, cv2.COLOR_BGR2GRAY)
    faces = detector(gray)

    for face in faces:
        landmarks = predictor(gray, face)
        landmarks_points = [(p.x, p.y) for p in landmarks.parts()]
        return landmarks_points

    return None

print("✅ Model loaded successfully!")


✅ Model loaded successfully!


In [None]:
#PHASE 2

In [None]:
!pip install torch torchvision transformers timm opencv-python numpy matplotlib




In [None]:
real_frames_dir = "/content/data/frames/real"  # Update this path
fake_frames_dir = "/content/data/frames/fake"  # Update this path


In [None]:
import cv2
import os

def extract_frames(video_path, output_folder, frame_rate=10):
    os.makedirs(output_folder, exist_ok=True)
    cap = cv2.VideoCapture(video_path)
    count = 0
    frame_id = 0

    while cap.isOpened():
        ret, frame = cap.read()
        if not ret:
            break
        if count % frame_rate == 0:
            frame_path = os.path.join(output_folder, f"frame_{frame_id}.jpg")
            cv2.imwrite(frame_path, frame)
            frame_id += 1
        count += 1

    cap.release()
    print(f"✅ Extracted {frame_id} frames from {video_path}")

# Example usage
extract_frames("/content/download.mp4", "/content/data/frames/fake")  # Change paths as needed


✅ Extracted 30 frames from /content/download.mp4


In [None]:
import torch
import torchvision.transforms as transforms
from transformers import ViTForImageClassification, ViTFeatureExtractor
from PIL import Image
import os

# Load the ViT model
model_name = "google/vit-base-patch16-224-in21k"
model = ViTForImageClassification.from_pretrained(model_name, num_labels=2)  # 2 labels: Real, Fake
feature_extractor = ViTFeatureExtractor.from_pretrained(model_name)

# Define image transformations
transform = transforms.Compose([
    transforms.Resize((224, 224)),
    transforms.ToTensor(),
    transforms.Normalize(mean=feature_extractor.image_mean, std=feature_extractor.image_std)
])

# Check if GPU is available
device = torch.device("cuda" if torch.cuda.is_available() else "cpu")
model.to(device)
print(f"✅ Model loaded on {device}!")


The secret `HF_TOKEN` does not exist in your Colab secrets.
To authenticate with the Hugging Face Hub, create a token in your settings tab (https://huggingface.co/settings/tokens), set it as secret in your Google Colab and restart your session.
You will be able to reuse this secret in all of your notebooks.
Please note that authentication is recommended but still optional to access public models or datasets.


config.json:   0%|          | 0.00/502 [00:00<?, ?B/s]

model.safetensors:   0%|          | 0.00/346M [00:00<?, ?B/s]

Some weights of ViTForImageClassification were not initialized from the model checkpoint at google/vit-base-patch16-224-in21k and are newly initialized: ['classifier.bias', 'classifier.weight']
You should probably TRAIN this model on a down-stream task to be able to use it for predictions and inference.


preprocessor_config.json:   0%|          | 0.00/160 [00:00<?, ?B/s]



✅ Model loaded on cuda!


In [None]:
import os

# Check if the directory exists
if not os.path.exists(real_frames_dir):
    print(f"❌ Directory not found: {real_frames_dir}")
    # If it doesn't exist, try creating it or check your extraction process
    os.makedirs(real_frames_dir, exist_ok=True)  # Creates the directory if it doesn't exist
    print(f"✅ Created directory: {real_frames_dir}")
else:
    print(f"✅ Directory exists: {real_frames_dir}")

# ... (rest of your DeepfakeDataset code)

❌ Directory not found: /content/data/frames/real
✅ Created directory: /content/data/frames/real


In [None]:
from torch.utils.data import Dataset, DataLoader

class DeepfakeDataset(Dataset):
    def __init__(self, real_dir, fake_dir, transform):
        self.real_images = [os.path.join(real_dir, img) for img in os.listdir(real_dir)]
        self.fake_images = [os.path.join(fake_dir, img) for img in os.listdir(fake_dir)]
        self.transform = transform
        self.labels = [0] * len(self.real_images) + [1] * len(self.fake_images)  # 0 = Real, 1 = Fake
        self.images = self.real_images + self.fake_images

    def __len__(self):
        return len(self.images)

    def __getitem__(self, idx):
        image = Image.open(self.images[idx]).convert("RGB")
        image = self.transform(image)
        label = self.labels[idx]
        return image, torch.tensor(label)

# Define dataset paths
real_frames_dir = "/content/data/frames/real"
fake_frames_dir = "/content/data/frames/fake"

# Create dataset and dataloaders
dataset = DeepfakeDataset(real_frames_dir, fake_frames_dir, transform)
dataloader = DataLoader(dataset, batch_size=16, shuffle=True)

print(f"✅ Loaded {len(dataset)} images (Real: {len(os.listdir(real_frames_dir))}, Fake: {len(os.listdir(fake_frames_dir))})")


✅ Loaded 30 images (Real: 0, Fake: 30)


In [None]:
import torch.optim as optim
import torch.nn as nn

# Define loss function and optimizer
criterion = nn.CrossEntropyLoss()
optimizer = optim.AdamW(model.parameters(), lr=2e-5)

# Training loop
num_epochs = 3  # Adjust based on dataset size
for epoch in range(num_epochs):
    model.train()
    total_loss = 0
    for images, labels in dataloader:
        images, labels = images.to(device), labels.to(device)

        optimizer.zero_grad()
        outputs = model(images).logits
        loss = criterion(outputs, labels)
        loss.backward()
        optimizer.step()

        total_loss += loss.item()

    print(f"🔥 Epoch {epoch+1}/{num_epochs} - Loss: {total_loss / len(dataloader):.4f}")

print("✅ Training Completed!")


🔥 Epoch 1/3 - Loss: 0.6207
🔥 Epoch 2/3 - Loss: 0.4985
🔥 Epoch 3/3 - Loss: 0.3825
✅ Training Completed!


In [None]:
import os

fake_frames_dir = "/content/data/frames/fake"  # Update this if needed
print("🔍 Checking extracted frames...")
print(os.listdir(fake_frames_dir))


🔍 Checking extracted frames...
['frame_13.jpg', 'frame_6.jpg', 'frame_0.jpg', 'frame_12.jpg', 'frame_2.jpg', 'frame_9.jpg', 'frame_18.jpg', 'frame_7.jpg', 'frame_27.jpg', 'frame_1.jpg', 'frame_16.jpg', 'frame_21.jpg', 'frame_10.jpg', 'frame_15.jpg', 'frame_28.jpg', 'frame_5.jpg', 'frame_22.jpg', 'frame_23.jpg', 'frame_24.jpg', 'frame_8.jpg', 'frame_3.jpg', 'frame_11.jpg', 'frame_4.jpg', 'frame_29.jpg', 'frame_25.jpg', 'frame_26.jpg', 'frame_20.jpg', 'frame_19.jpg', 'frame_17.jpg', 'frame_14.jpg']


In [None]:
import cv2
import os

def extract_frames(video_path, output_folder, frame_rate=10):
    os.makedirs(output_folder, exist_ok=True)
    cap = cv2.VideoCapture(video_path)
    count = 0
    frame_id = 0

    while cap.isOpened():
        ret, frame = cap.read()
        if not ret:
            break
        if count % frame_rate == 0:
            frame_path = os.path.join(output_folder, f"frame_{frame_id}.jpg")
            cv2.imwrite(frame_path, frame)
            frame_id += 1
        count += 1

    cap.release()
    print(f"✅ Extracted {frame_id} frames from {video_path}")

# Extract frames from a deepfake video
extract_frames("/content/deepfake_video.mp4", "/content/data/frames/fake")


✅ Extracted 0 frames from /content/deepfake_video.mp4


In [None]:
# List first frame in the fake folder
import os

fake_frames_dir = "/content/data/frames/fake"
fake_frames = os.listdir(fake_frames_dir)
if fake_frames:
    test_image = os.path.join(fake_frames_dir, fake_frames[0])
    print(f"✅ Using {test_image} for testing.")
else:
    print("❌ No frames found! Extract them first.")


✅ Using /content/data/frames/fake/frame_13.jpg for testing.


In [None]:
def predict(image_path):
    model.eval()
    image = Image.open(image_path).convert("RGB")
    image = transform(image).unsqueeze(0).to(device)

    with torch.no_grad():
        output = model(image).logits
        prediction = torch.argmax(output, dim=1).item()

    return "Real" if prediction == 0 else "Fake"

# Test an image
test_image = "/content/data/frames/fake/frame_19.jpg"  # Change to an actual image
print(f"🧐 Prediction: {predict(test_image)}")


🧐 Prediction: Fake
