In [None]:
# Deepfake Detector — Trimmed Demo Notebook
# Paste cells into Colab or run this file as a notebook. This demo *loads a saved model* and
# provides a Gradio UI for uploading a short video and receiving a video-level prediction.
# It also includes a small evaluation cell to run over videos already in Drive.

# 0) Install (run once in Colab)
!pip install -q facenet-pytorch gradio torchvision


In [None]:
# 1) Mount Google Drive and set paths
from google.colab import drive
drive.mount('/content/drive')

import os
BASE = '/content/drive/MyDrive/deepfake_project'
MODEL_PATH = os.path.join(BASE, 'models', 'best_resnet18_aug_balanced.pth')
print('BASE:', BASE)
print('MODEL_PATH:', MODEL_PATH)


In [None]:
# 2) Imports, device, transforms, MTCNN
import torch, torch.nn.functional as F
from torchvision import models, transforms
from facenet_pytorch import MTCNN
from PIL import Image
import numpy as np

device = torch.device('cuda' if torch.cuda.is_available() else 'cpu')
print('Device:', device)

IMG_SIZE = 224
val_transforms = transforms.Compose([
    transforms.Resize((IMG_SIZE, IMG_SIZE)),
    transforms.ToTensor(),
    transforms.Normalize([0.485,0.456,0.406],[0.229,0.224,0.225])
])

mtcnn = MTCNN(image_size=IMG_SIZE, margin=30, keep_all=False, device=device)


In [None]:
# 3) Load model (ResNet18 architecture matching training)
from torchvision import models
import torch.nn as nn

model = models.resnet18(weights=models.ResNet18_Weights.DEFAULT)
model.fc = nn.Linear(model.fc.in_features, 2)
model.load_state_dict(torch.load(MODEL_PATH))
model = model.to(device)
model.eval()
print('Model loaded')


In [None]:
# 4) Utility: predict per-face probabilities and aggregate to video-level
import cv2

def predict_frame_prob_facecrop(img_pil):
    # img_pil = PIL Image
    face_tensor = mtcnn(img_pil)
    if face_tensor is None:
        return None
    t = val_transforms(Image.fromarray((face_tensor.permute(1,2,0).mul(255).byte().numpy()).astype(np.uint8)))
    t = t.unsqueeze(0).to(device)
    with torch.no_grad():
        out = model(t)
        prob = F.softmax(out, dim=1).cpu().numpy()[0]
    return prob

def video_to_prediction_facecrop(video_path, every_n=15, max_frames=120):
    cap = cv2.VideoCapture(video_path)
    frame_idx = 0
    probs = []
    read = 0
    while True:
        ret, frame = cap.read()
        if not ret:
            break
        if frame_idx % every_n == 0:
            pil = Image.fromarray(cv2.cvtColor(frame, cv2.COLOR_BGR2RGB))
            face_tensor = mtcnn(pil)
            if face_tensor is not None:
                # predict using face crop
                t = val_transforms(Image.fromarray((face_tensor.permute(1,2,0).mul(255).byte().numpy()).astype(np.uint8)))
                t = t.unsqueeze(0).to(device)
                with torch.no_grad():
                    out = model(t)
                    pr = F.softmax(out, dim=1).cpu().numpy()[0]
                probs.append(pr)
                read += 1
                if max_frames and read >= max_frames:
                    break
        frame_idx += 1
    cap.release()
    if not probs:
        return None
    avg = np.mean(probs, axis=0)
    return avg  # returns [p_fake, p_real]


In [None]:
# 5) Gradio demo: upload a video -> returns video-level prediction and an example face crop
import gradio as gr

def video_predict_and_example(file, every_n=15, max_frames=120, thresh=0.7):
    # file is a tempfile object or path
    path = file if isinstance(file, str) else file.name
    avg = video_to_prediction_facecrop(path, every_n=every_n, max_frames=max_frames)
    if avg is None:
        return ("No faces detected", None)
    pf, pr = float(avg[0]), float(avg[1])
    pred = 'fake' if pf >= thresh else 'real'
    # also return one face crop as example (first detected)
    cap = cv2.VideoCapture(path)
    frame_idx = 0
    example_img = None
    while True:
        ret, frame = cap.read()
        if not ret: break
        if frame_idx % every_n == 0:
            pil = Image.fromarray(cv2.cvtColor(frame, cv2.COLOR_BGR2RGB))
            face_tensor = mtcnn(pil)
            if face_tensor is not None:
                arr = face_tensor.permute(1,2,0).mul(255).byte().numpy()
                example_img = Image.fromarray(arr)
                break
        frame_idx += 1
    cap.release()
    return (f"pred: {pred}  prob_fake={pf:.3f} prob_real={pr:.3f}", example_img)

iface = gr.Interface(
    fn=video_predict_and_example,
    inputs=[gr.Video(label='Upload MP4'), gr.Slider(5,30,value=15,label='every_n (frame stride)'), gr.Slider(10,300,value=120,label='max_frames'), gr.Slider(0.5,0.9,value=0.7,step=0.05,label='threshold for fake')],
    outputs=[gr.Textbox(label='Result'), gr.Image(label='Example face crop')],
    title='Deepfake Detector — Demo (face-crop aggregation)',
    description='Uploads a short MP4, runs face detection + ResNet model on face crops, and returns a video-level prediction.'
)

# Run with share=True in Colab to get a shareable link
iface.launch(share=True)


In [None]:
# 6) Quick evaluation cell (run locally on Drive videos) — saves a CSV
import glob, pandas as pd
real_videos = sorted(glob.glob(os.path.join(BASE,'raw_videos','real','*.mp4')))
fake_videos = sorted(glob.glob(os.path.join(BASE,'raw_videos','fake','*.mp4')))
all_videos = [(p,'real') for p in real_videos] + [(p,'fake') for p in fake_videos]
rows = []
for path, true_label in all_videos:
    avg = video_to_prediction_facecrop(path, every_n=15, max_frames=120)
    if avg is None:
        rows.append({'video_path':path, 'true_label':true_label, 'pred_class':None, 'prob_fake':None, 'prob_real':None})
    else:
        pf, pr = float(avg[0]), float(avg[1])
        pred = 'fake' if pf >= 0.7 else 'real'
        rows.append({'video_path':path, 'true_label':true_label, 'pred_class':pred, 'prob_fake':pf, 'prob_real':pr})
df = pd.DataFrame(rows)
out_csv = os.path.join(BASE, 'video_predictions_demo.csv')
df.to_csv(out_csv, index=False)
print('Saved:', out_csv)
