In [1]:
import numpy as np
import torch
import torch.nn as nn
from torch.utils.data import DataLoader, WeightedRandomSampler
from sklearn.model_selection import train_test_split
from sklearn.model_selection import StratifiedKFold
from sklearn.metrics import f1_score, precision_score, recall_score
import torch.optim as optim
from torch.utils.data import Dataset
from torch.amp import autocast, GradScaler
from torchvision import transforms, datasets
from PIL import Image
from tqdm import tqdm

In [2]:
from google.colab import drive
drive.mount('/content/drive')

Mounted at /content/drive


In [2]:
!ls /content/drive/MyDrive/Colab_Notebooks/GAIDI/Deepfake/


data				  manipulated	   model_fold4.pth
downloaded_files_log.txt	  model_fold1.pth  model_fold5.pth
faceforensic_downloader_colab.py  model_fold2.pth  originals
Frame_Extraction.ipynb		  model_fold3.pth  Pytorch_Training.ipynb


In [3]:
# zip the files in colab on CPU
# !cd /content/drive/MyDrive/Colab_Notebooks/GAIDI/Deepfake/data && zip -r /content/drive/MyDrive/zipped_frames.zip ./*

[1;30;43mStreaming output truncated to the last 5000 lines.[0m
  adding: real/010.mp4_0008.jpg (deflated 0%)
  adding: real/010.mp4_0009.jpg (deflated 0%)
  adding: real/010.mp4_0010.jpg (deflated 0%)
  adding: real/010.mp4_0011.jpg (deflated 0%)
  adding: real/010.mp4_0012.jpg (deflated 0%)
  adding: real/010.mp4_0013.jpg (deflated 0%)
  adding: real/010.mp4_0014.jpg (deflated 0%)
  adding: real/010.mp4_0015.jpg (deflated 0%)
  adding: real/010.mp4_0016.jpg (deflated 0%)
  adding: real/010.mp4_0017.jpg (deflated 0%)
  adding: real/010.mp4_0018.jpg (deflated 0%)
  adding: real/010.mp4_0019.jpg (deflated 0%)
  adding: real/010.mp4_0020.jpg (deflated 0%)
  adding: real/010.mp4_0021.jpg (deflated 0%)
  adding: real/010.mp4_0022.jpg (deflated 0%)
  adding: real/010.mp4_0023.jpg (deflated 0%)
  adding: real/010.mp4_0024.jpg (deflated 0%)
  adding: real/010.mp4_0025.jpg (deflated 0%)
  adding: real/010.mp4_0026.jpg (deflated 0%)
  adding: real/010.mp4_0027.jpg (deflated 0%)
  adding: real/

In [4]:
# Play an audio beep. Any audio URL will do.
from google.colab import output
output.eval_js('new Audio("https://upload.wikimedia.org/wikipedia/commons/5/57/BP_Just_D.ogg").play()')

In [3]:
#  unzip directly into local directory (on A100)
!unzip /content/drive/MyDrive/zipped_frames.zip -d /content/data

[1;30;43mStreaming output truncated to the last 5000 lines.[0m
  inflating: /content/data/real/010.mp4_0007.jpg  
  inflating: /content/data/real/010.mp4_0008.jpg  
  inflating: /content/data/real/010.mp4_0009.jpg  
  inflating: /content/data/real/010.mp4_0010.jpg  
  inflating: /content/data/real/010.mp4_0011.jpg  
  inflating: /content/data/real/010.mp4_0012.jpg  
  inflating: /content/data/real/010.mp4_0013.jpg  
  inflating: /content/data/real/010.mp4_0014.jpg  
  inflating: /content/data/real/010.mp4_0015.jpg  
  inflating: /content/data/real/010.mp4_0016.jpg  
  inflating: /content/data/real/010.mp4_0017.jpg  
  inflating: /content/data/real/010.mp4_0018.jpg  
  inflating: /content/data/real/010.mp4_0019.jpg  
  inflating: /content/data/real/010.mp4_0020.jpg  
  inflating: /content/data/real/010.mp4_0021.jpg  
  inflating: /content/data/real/010.mp4_0022.jpg  
  inflating: /content/data/real/010.mp4_0023.jpg  
  inflating: /content/data/real/010.mp4_0024.jpg  
  inflating: /con

# Torch Model

## Training modules

In [4]:
## After splitting, we need a way to load images + labels from our lists of X and y — that’s where FrameDataset comes in.

class FrameDataset(Dataset):
    def __init__(self, image_paths, labels, transform = None):
        self.image_paths = image_paths
        self.labels = labels
        self.transform = transform

    def __len__(self):
        return len(self.image_paths)

    def __getitem__(self,idx):
        image = Image.open(self.image_paths[idx]).convert('RGB')
        label = self.labels[idx]

        if self.transform:
            image = self.transform(image)
        return image, label

In [5]:
# Model
class DeepF_CNN(nn.Module):
    def __init__(self):
        super().__init__()
        self.net = nn.Sequential(
            # Block 1
            nn.Conv2d(in_channels=3, out_channels=32, kernel_size=3, padding=1),  # padding='same' → padding=1 when kernel=3
            nn.ReLU(),
            nn.BatchNorm2d(num_features=32),  # because output channels = 32
            nn.MaxPool2d(kernel_size=2),
            nn.Dropout(p=0.2),

            # Block 2
            nn.Conv2d(in_channels=32, out_channels=64, kernel_size=3, padding=1),
            nn.ReLU(),
            nn.BatchNorm2d(num_features=64),
            nn.MaxPool2d(kernel_size=2),
            nn.Dropout(p=0.3),

            # Block 3
            nn.Conv2d(64, 128, kernel_size=3, padding=1),
            nn.ReLU(),
            nn.BatchNorm2d(128),
            nn.AdaptiveAvgPool2d((1, 1)),  # → output shape [batch, 128, 1, 1]
            nn.Flatten(),                  # → [batch, 128]

            nn.Linear(128, 1)
            )
    def forward(self, x):
        return self.net(x)

In [6]:
#Create ES function
class EarlyStopping:
    def __init__(self, patience=4):
        self.patience = patience
        self.counter = 0
        self.best_loss = float('inf')
        self.best_model = None
        self.early_stop = False

    def __call__(self, val_loss, model):
        if val_loss < self.best_loss:
            self.best_loss = val_loss
            self.best_model = model.state_dict() # saves best weight
            self.counter = 0
        else:
            self.counter += 1
            if self.counter >= self.patience:
                self.early_stop = True

# Prepare data

In [7]:
from collections import Counter

transform = transforms.Compose([
    transforms.Resize((224,224)),
    transforms.ToTensor(),
    transforms.Normalize(mean=[0.5]*3, std=[0.5]*3)
])

device = torch.device("cuda" if torch.cuda.is_available() else "cpu")
print("Using device:", device)
# skf = StratifiedKFold(n_splits=3, shuffle=True, random_state=42) # 3 fold TOO SLOW ON MY LAPTOP

# data = datasets.ImageFolder(root='/content/drive/MyDrive/Colab_Notebooks/GAIDI/Deepfake/data', transform=transform)
data = datasets.ImageFolder(root='/content/data', transform=transform) #  <== after copying the dataset to colab local disk with unzip
class_counts = Counter(data.targets)

print(f'data has two classes: {data.classes}, there are {len(data)} images(frames) in data, {class_counts[1]} real video frames, {class_counts[0]} fake video frames')

if ((class_counts[0] * 100) / class_counts[1]) < 45 or ((class_counts[0] * 100) / class_counts[1]) > 55:
    print('classes weights are imbalanced, WeightedRandomSampler is required')
else:
    print('classes weights are balanced, no WeightedRandomSampler required.')

Using device: cuda
data has two classes: ['fake', 'real'], there are 16324 images(frames) in data, 6933 real video frames, 9391 fake video frames
classes weights are imbalanced, WeightedRandomSampler is required


In [8]:
X = np.array([s[0] for s in data.samples])
y = np.array([s[1] for s in data.samples])

In [9]:
X_train, X_val, y_train, y_val = train_test_split(X, y, test_size=0.2, stratify=y)

In [10]:
print(f'{X_train.shape}\n{ X_val.shape}\n{y_train.shape}\n{y_val.shape}')

(13059,)
(3265,)
(13059,)
(3265,)


## 1st run

In [11]:
def train_one_fold(X_train, y_train, X_val, y_val, transform, device, run):
    scaler = GradScaler()

    # Datasets
    train_dataset = FrameDataset(X_train, y_train, transform)
    val_dataset   = FrameDataset(X_val, y_val, transform)


    # Weighted sampler
    class_counts = np.bincount(y_train)
    weights = 1. / class_counts
    sample_weights = weights[y_train]
    sampler = WeightedRandomSampler(sample_weights, len(sample_weights), replacement=True)

    # DataLoaders
    train_loader = DataLoader(train_dataset, batch_size=128, sampler=sampler,
                              num_workers=2, pin_memory=True)
    val_loader = DataLoader(val_dataset, batch_size=128, shuffle=False,
                            num_workers=2, pin_memory=True)

    # Model, loss, optimizer
    model = DeepF_CNN().to(device)
    criterion = nn.BCEWithLogitsLoss()
    optimizer = optim.Adam(model.parameters(), lr=1e-3)
    scheduler = optim.lr_scheduler.ReduceLROnPlateau(optimizer, mode='min', factor=0.5, patience=3)
    early_stopper = EarlyStopping(patience=5)

    for epoch in range(20):
        model.train()
        running_loss, correct, total = 0.0, 0, 0

        for images, labels in tqdm(train_loader, desc=f"Epoch {epoch+1}"):
            images = images.to(device, non_blocking=True)
            labels = labels.float().unsqueeze(1).to(device, non_blocking=True)

            optimizer.zero_grad()
            with autocast(device_type='cuda'):
                outputs = model(images)
                loss = criterion(outputs, labels)

            scaler.scale(loss).backward()
            scaler.step(optimizer)
            scaler.update()

            running_loss += loss.item() * labels.size(0)
            preds = torch.sigmoid(outputs) >= 0.5
            correct += (preds == labels).sum().item()
            total += labels.size(0)

        train_loss = running_loss / total
        train_acc = correct / total

        # Validation
        model.eval()
        val_loss, val_correct, val_total = 0.0, 0, 0
        all_preds, all_labels = [], []

        with torch.no_grad():
            for images, labels in val_loader:
                images = images.to(device, non_blocking=True)
                labels = labels.float().unsqueeze(1).to(device, non_blocking=True)

                with autocast(device_type='cuda'):
                    outputs = model(images)
                    loss = criterion(outputs, labels)

                val_loss += loss.item() * labels.size(0)
                preds = torch.sigmoid(outputs) >= 0.5
                val_correct += (preds == labels).sum().item()
                val_total += labels.size(0)

                all_preds.extend(preds.cpu().numpy())
                all_labels.extend(labels.cpu().numpy())

        val_loss /= val_total
        val_acc = val_correct / val_total
        f1 = f1_score(all_labels, all_preds)
        precision = precision_score(all_labels, all_preds)
        recall = recall_score(all_labels, all_preds)

        print(f"Epoch {epoch+1} | Train Loss: {train_loss:.4f} | Val Loss: {val_loss:.4f} | "
              f"Val Acc: {val_acc:.4f} | F1: {f1:.4f} | Precision: {precision:.4f} | Recall: {recall:.4f}")

        scheduler.step(val_loss)
        early_stopper(val_loss, model)

        if early_stopper.early_stop:
            print("Early stopping triggered.")
            break

    model.load_state_dict(early_stopper.best_model)
    torch.save({
    'epoch': epoch,
    'model_state_dict': model.state_dict(),
    'optimizer_state_dict': optimizer.state_dict(),
    'scheduler_state_dict': scheduler.state_dict(),
    'scaler_state_dict': scaler.state_dict(),  # AMP scaler
}, f'model_fold{run}.pth')

    return model, all_preds, all_labels


## Load model

In [12]:
!ls -lh /content/model_fold2.pth

ls: cannot access '/content/model_fold2.pth': No such file or directory


In [13]:
# Load saved checkpoint
checkpoint = torch.load('/content/drive/MyDrive/Colab_Notebooks/GAIDI/Deepfake/model_fold6.pth', map_location=torch.device('cuda' if torch.cuda.is_available() else 'cpu'))


# Rebuild model, optimizer, scheduler, scaler (must match original setup)
model = DeepF_CNN().to(device)
optimizer = torch.optim.Adam(model.parameters(), lr=1e-3)
scheduler = torch.optim.lr_scheduler.ReduceLROnPlateau(optimizer, mode='min', factor=0.5, patience=3)
scaler = GradScaler()

# Load states
model.load_state_dict(checkpoint['model_state_dict'])
optimizer.load_state_dict(checkpoint['optimizer_state_dict'])
scheduler.load_state_dict(checkpoint['scheduler_state_dict'])
scaler.load_state_dict(checkpoint['scaler_state_dict'])

# Determine where to continue
start_epoch = checkpoint['epoch'] + 1
n_more_epochs = 15

## Next runs

In [14]:
def train_one_fold(X_train, y_train, X_val, y_val, transform, device, run):
    scaler = GradScaler()

    # Datasets
    train_dataset = FrameDataset(X_train, y_train, transform)
    val_dataset   = FrameDataset(X_val, y_val, transform)


    # Weighted sampler
    class_counts = np.bincount(y_train)
    weights = 1. / class_counts
    sample_weights = weights[y_train]
    sampler = WeightedRandomSampler(sample_weights, len(sample_weights), replacement=True)

    # DataLoaders
    train_loader = DataLoader(train_dataset, batch_size=128, sampler=sampler,
                              num_workers=2, pin_memory=True)
    val_loader = DataLoader(val_dataset, batch_size=128, shuffle=False,
                            num_workers=2, pin_memory=True)

    # Model, loss, optimizer
    model = DeepF_CNN().to(device)
    criterion = nn.BCEWithLogitsLoss()
    optimizer = optim.Adam(model.parameters(), lr=1e-3)
    scheduler = optim.lr_scheduler.ReduceLROnPlateau(optimizer, mode='min', factor=0.5, patience=3)
    early_stopper = EarlyStopping(patience=4)

    for epoch in range(start_epoch, start_epoch + n_more_epochs):
        model.train()
        running_loss, correct, total = 0.0, 0, 0

        for images, labels in tqdm(train_loader, desc=f"Epoch {epoch+1}"):
            images = images.to(device, non_blocking=True)
            labels = labels.float().unsqueeze(1).to(device, non_blocking=True)

            optimizer.zero_grad()
            with autocast(device_type='cuda'):
                outputs = model(images)
                loss = criterion(outputs, labels)

            scaler.scale(loss).backward()
            scaler.step(optimizer)
            scaler.update()

            running_loss += loss.item() * labels.size(0)
            preds = torch.sigmoid(outputs) >= 0.5
            correct += (preds == labels).sum().item()
            total += labels.size(0)

        train_loss = running_loss / total
        train_acc = correct / total

        # Validation
        model.eval()
        val_loss, val_correct, val_total = 0.0, 0, 0
        all_preds, all_labels = [], []

        with torch.no_grad():
            for images, labels in val_loader:
                images = images.to(device, non_blocking=True)
                labels = labels.float().unsqueeze(1).to(device, non_blocking=True)

                with autocast(device_type='cuda'):
                    outputs = model(images)
                    loss = criterion(outputs, labels)

                val_loss += loss.item() * labels.size(0)
                preds = torch.sigmoid(outputs) >= 0.5
                val_correct += (preds == labels).sum().item()
                val_total += labels.size(0)

                all_preds.extend(preds.cpu().numpy())
                all_labels.extend(labels.cpu().numpy())

        val_loss /= val_total
        val_acc = val_correct / val_total
        f1 = f1_score(all_labels, all_preds)
        precision = precision_score(all_labels, all_preds)
        recall = recall_score(all_labels, all_preds)

        print(f"Epoch {epoch+1} | Train Loss: {train_loss:.4f} | Val Loss: {val_loss:.4f} | "
              f"Val Acc: {val_acc:.4f} | F1: {f1:.4f} | Precision: {precision:.4f} | Recall: {recall:.4f}")

        scheduler.step(val_loss)
        early_stopper(val_loss, model)

        if early_stopper.early_stop:
            print("Early stopping triggered.")
            break

    model.load_state_dict(early_stopper.best_model)
    torch.save({
    'epoch': epoch,
    'model_state_dict': model.state_dict(),
    'optimizer_state_dict': optimizer.state_dict(),
    'scheduler_state_dict': scheduler.state_dict(),
    'scaler_state_dict': scaler.state_dict(),  # AMP scaler
}, f'model_fold{run}.pth')

    return model, all_preds, all_labels


# Train

In [15]:
model = train_one_fold(X_train, y_train, X_val, y_val, transform, device, run = 7)

Epoch 78: 100%|██████████| 103/103 [01:11<00:00,  1.43it/s]


Epoch 78 | Train Loss: 0.6048 | Val Loss: 0.6445 | Val Acc: 0.6395 | F1: 0.4705 | Precision: 0.6256 | Recall: 0.3771


Epoch 79: 100%|██████████| 103/103 [01:11<00:00,  1.45it/s]


Epoch 79 | Train Loss: 0.4961 | Val Loss: 0.4732 | Val Acc: 0.7877 | F1: 0.7519 | Precision: 0.7468 | Recall: 0.7570


Epoch 80: 100%|██████████| 103/103 [01:08<00:00,  1.50it/s]


Epoch 80 | Train Loss: 0.3893 | Val Loss: 0.3199 | Val Acc: 0.8423 | F1: 0.8268 | Precision: 0.7749 | Recall: 0.8861


Epoch 81: 100%|██████████| 103/103 [01:09<00:00,  1.49it/s]


Epoch 81 | Train Loss: 0.2782 | Val Loss: 0.7587 | Val Acc: 0.8098 | F1: 0.7940 | Precision: 0.7353 | Recall: 0.8630


Epoch 82: 100%|██████████| 103/103 [01:10<00:00,  1.46it/s]


Epoch 82 | Train Loss: 0.2269 | Val Loss: 0.2071 | Val Acc: 0.9038 | F1: 0.8860 | Precision: 0.8925 | Recall: 0.8796


Epoch 83: 100%|██████████| 103/103 [01:09<00:00,  1.48it/s]


Epoch 83 | Train Loss: 0.2077 | Val Loss: 0.3828 | Val Acc: 0.8637 | F1: 0.8588 | Precision: 0.7670 | Recall: 0.9755


Epoch 84: 100%|██████████| 103/103 [01:10<00:00,  1.45it/s]


Epoch 84 | Train Loss: 0.1607 | Val Loss: 0.2745 | Val Acc: 0.9093 | F1: 0.9017 | Precision: 0.8357 | Recall: 0.9791


Epoch 85: 100%|██████████| 103/103 [01:09<00:00,  1.49it/s]


Epoch 85 | Train Loss: 0.1258 | Val Loss: 0.6613 | Val Acc: 0.8101 | F1: 0.8149 | Precision: 0.6954 | Recall: 0.9841


Epoch 86: 100%|██████████| 103/103 [01:09<00:00,  1.49it/s]


Epoch 86 | Train Loss: 0.1361 | Val Loss: 0.1093 | Val Acc: 0.9538 | F1: 0.9454 | Precision: 0.9478 | Recall: 0.9430


Epoch 87: 100%|██████████| 103/103 [01:12<00:00,  1.43it/s]


Epoch 87 | Train Loss: 0.1025 | Val Loss: 0.1447 | Val Acc: 0.9507 | F1: 0.9422 | Precision: 0.9385 | Recall: 0.9459


Epoch 88: 100%|██████████| 103/103 [01:09<00:00,  1.48it/s]


Epoch 88 | Train Loss: 0.1031 | Val Loss: 0.0793 | Val Acc: 0.9669 | F1: 0.9621 | Precision: 0.9377 | Recall: 0.9877


Epoch 89: 100%|██████████| 103/103 [01:09<00:00,  1.48it/s]


Epoch 89 | Train Loss: 0.0802 | Val Loss: 0.0882 | Val Acc: 0.9721 | F1: 0.9669 | Precision: 0.9765 | Recall: 0.9575


Epoch 90: 100%|██████████| 103/103 [01:10<00:00,  1.46it/s]


Epoch 90 | Train Loss: 0.0818 | Val Loss: 0.1193 | Val Acc: 0.9510 | F1: 0.9431 | Precision: 0.9311 | Recall: 0.9553


Epoch 91: 100%|██████████| 103/103 [01:09<00:00,  1.49it/s]


Epoch 91 | Train Loss: 0.1089 | Val Loss: 0.0594 | Val Acc: 0.9697 | F1: 0.9645 | Precision: 0.9607 | Recall: 0.9683


Epoch 92: 100%|██████████| 103/103 [01:09<00:00,  1.48it/s]


Epoch 92 | Train Loss: 0.0764 | Val Loss: 0.1549 | Val Acc: 0.9387 | F1: 0.9319 | Precision: 0.8827 | Recall: 0.9870


In [16]:
# Play an audio beep. Any audio URL will do.
from google.colab import output
output.eval_js('new Audio("https://upload.wikimedia.org/wikipedia/commons/5/57/BP_Just_D.ogg").play()')
