In [1]:
import numpy as np
import torch
import torch.nn as nn
from torch.utils.data import DataLoader, WeightedRandomSampler
from sklearn.model_selection import train_test_split
from sklearn.model_selection import StratifiedKFold
import torch.optim as optim
from torchvision import transforms, datasets
from PIL import Image
from tqdm import tqdm

In [2]:
from google.colab import drive
drive.mount('/content/drive')

Mounted at /content/drive


# Torch Model

## Training modules

In [3]:
## After splitting, we need a way to load images + labels from our lists of X and y — that’s where FrameDataset comes in.
from torch.utils.data import Dataset
from PIL import Image

class FrameDataset(Dataset):
    def __init__(self, image_paths, labels, transform = None):
        self.image_paths = image_paths
        self.labels = labels
        self.transform = transform

    def __len__(self):
        return len(self.image_paths)

    def __getitem__(self,idx):
        image = Image.open(self.image_paths[idx]).convert('RGB')
        label = self.labels[idx]

        if self.transform:
            image = self.transform(image)
        return image, label

In [4]:
# Model
class DeepF_CNN(nn.Module):
    def __init__(self):
        super().__init__()
        self.net = nn.Sequential(
            # Block 1
            nn.Conv2d(in_channels=3, out_channels=32, kernel_size=3, padding=1),  # padding='same' → padding=1 when kernel=3
            nn.ReLU(),
            nn.BatchNorm2d(num_features=32),  # because output channels = 32
            nn.MaxPool2d(kernel_size=2),
            nn.Dropout(p=0.2),

            # Block 2
            nn.Conv2d(in_channels=32, out_channels=64, kernel_size=3, padding=1),
            nn.ReLU(),
            nn.BatchNorm2d(num_features=64),
            nn.MaxPool2d(kernel_size=2),
            nn.Dropout(p=0.3),

            # Block 3
            nn.Conv2d(64, 128, kernel_size=3, padding=1),
            nn.ReLU(),
            nn.BatchNorm2d(128),
            nn.AdaptiveAvgPool2d((1, 1)),  # → output shape [batch, 128, 1, 1]
            nn.Flatten(),                  # → [batch, 128]

            nn.Linear(128, 1),
            nn.Sigmoid()
            )
    def forward(self, x):
        return self.net(x)

In [5]:
#Create ES function
class EarlyStopping:
    def __init__(self, patience=5):
        self.patience = patience
        self.counter = 0
        self.best_loss = float('inf')
        self.best_model = None
        self.early_stop = False

    def __call__(self, val_loss, model):
        if val_loss < self.best_loss:
            self.best_loss = val_loss
            self.best_model = model.state_dict() # saves best weight
            self.counter = 0
        else:
            self.counter += 1
            if self.counter >= self.patience:
                self.early_stop = True

In [21]:
# Define a training session ( will be x5)
def train_one_fold(X, y, fold_idx, train_idx, val_idx, transform, device):
    print(f"Training Fold {fold_idx + 1}...")

    # Datasets
    train_dataset = FrameDataset(X[train_idx], y[train_idx], transform)
    val_dataset   = FrameDataset(X[val_idx], y[val_idx], transform)

    # Weighted sampling
    class_counts = np.bincount(y[train_idx])
    weights = 1. / class_counts
    sample_weights = weights[y[train_idx]]
    sampler = WeightedRandomSampler(sample_weights, len(sample_weights), replacement=True)

    # Dataloaders
    train_loader = DataLoader(train_dataset, batch_size=32, sampler=sampler)
    val_loader = DataLoader(val_dataset, batch_size=32, shuffle=False)

    # Model, loss, optimizer
    model = DeepF_CNN().to(device)
    criterion = nn.BCEWithLogitsLoss()
    optimizer = optim.Adam(model.parameters(), lr=1e-3)
    scheduler = torch.optim.lr_scheduler.ReduceLROnPlateau(optimizer, mode='min', factor=0.5, patience=3)
    early_stopper = EarlyStopping(patience=5)

    for epoch in range(30):
        model.train()
        running_loss, correct, total = 0.0, 0, 0
        for images, labels in tqdm(train_loader, desc=f"Epoch {epoch+1}"):
            images, labels = images.to(device), labels.float().unsqueeze(1).to(device)
            optimizer.zero_grad()
            outputs = model(images)
            loss = criterion(outputs, labels)
            loss.backward()
            optimizer.step()
            running_loss += loss.item() * labels.size(0)
            preds = torch.sigmoid(outputs) >= 0.5
            correct += (preds == labels).sum().item()
            total += labels.size(0)

        train_acc = correct / total
        train_loss = running_loss / total

        # Validation
        model.eval()
        val_loss, val_correct, val_total = 0.0, 0, 0
        with torch.no_grad():
            for images, labels in val_loader:
                images, labels = images.to(device), labels.float().unsqueeze(1).to(device)
                outputs = model(images)
                loss = criterion(outputs, labels)
                val_loss += loss.item() * labels.size(0)
                preds = torch.sigmoid(outputs) >= 0.5
                val_correct += (preds == labels).sum().item()
                val_total += labels.size(0)

        val_loss /= val_total
        val_acc = val_correct / val_total

        print(f"Epoch {epoch+1} | Train Loss: {train_loss:.4f}, Val Loss: {val_loss:.4f}, Val Acc: {val_acc:.4f}")
        scheduler.step(val_loss)
        early_stopper(val_loss, model)

        if early_stopper.early_stop:
            print("Early stopping triggered.")
            break

    # Return best model for this fold
    model.load_state_dict(early_stopper.best_model)
    return model


# Prepare data

In [7]:
import shutil

# # Only copy once
# !rm -rf /content/local_data  # Clean up if re-running
shutil.copytree('/content/drive/MyDrive/Colab_Notebooks/GAIDI/Deepfake/data', '/content/local_data')

# data = datasets.ImageFolder(root='/content/local_data', transform=transform)  <== below


'/content/local_data'

In [19]:
from collections import Counter

transform = transforms.Compose([
    transforms.Resize((32,32)),
    transforms.ToTensor(),
    transforms.Normalize(mean=[0.5]*3, std=[0.5]*3)
])

device = torch.device("cuda" if torch.cuda.is_available() else "cpu")
print("Using device:", device)
skf = StratifiedKFold(n_splits=5, shuffle=True, random_state=42) # 5 fold

# data = datasets.ImageFolder(root='/content/drive/MyDrive/Colab_Notebooks/GAIDI/Deepfake/data', transform=transform)
data = datasets.ImageFolder(root='/content/local_data', transform=transform) #  <== after copying the dataset to colab local disk with shutil
class_counts = Counter(data.targets)

print(f'data has two classes: {data.classes}, there are {len(data)} images(frames) in data, {class_counts[1]} real video frames, {class_counts[0]} fake video frames')

if ((class_counts[0] * 100) / class_counts[1]) < 45 or ((class_counts[0] * 100) / class_counts[1]) > 55:
    print('classes weights are imbalanced, WeightedRandomSampler is required')
else:
    print('classes weights are balanced, no WeightedRandomSampler required.')

Using device: cuda
data has two classes: ['fake', 'real'], there are 19061 images(frames) in data, 8332 real video frames, 10729 fake video frames
classes weights are imbalanced, WeightedRandomSampler is required


In [10]:
X = np.array([s[0] for s in data.samples])
y = np.array([s[1] for s in data.samples])

In [11]:
from sklearn.model_selection import train_test_split
X_train, X_val, y_train, y_val = train_test_split(X, y, test_size=0.2, stratify=y)

In [12]:
print(f'{X_train.shape}\n{ X_val.shape}\n{y_train.shape}\n{y_val.shape}')

(15248,)
(3813,)
(15248,)
(3813,)


# Train

In [None]:
!nvidia-smi


In [22]:
for fold_idx, (train_idx, val_idx) in enumerate(skf.split(X, y)):
    model = train_one_fold(X, y, fold_idx, train_idx, val_idx, transform, device)


Training Fold 1...


Epoch 1: 100%|██████████| 477/477 [02:37<00:00,  3.03it/s]


📊 Epoch 1 | Train Loss: 0.6264, Val Loss: 0.5895, Val Acc: 0.4372


Epoch 2:  14%|█▍        | 68/477 [00:23<02:22,  2.88it/s]


KeyboardInterrupt: 

In [None]:
labels = labels.float().unsqueeze(1)
outputs = model(images)
preds = torch.sigmoid(outputs) >= 0.5  # ← Make sure this matches label format
print(labels, outputs, preds,labels.shape, preds.shape)

NameError: name 'labels' is not defined

In [None]:
print("Full data:", np.bincount(y))        # Should be [10729, 8332]
print("Train:", np.bincount(y[train_idx])) # Should be ~85% of that
print("Val:", np.bincount(y[val_idx]))

Full data: [10729  8332]
Train: [8583 6665]
Val: [2146 1667]


## save model

In [None]:
torch.save({
    'model_state_dict': model.state_dict(),
    'optimizer_state_dict': optimizer.state_dict(),
    'scheduler_state_dict': scheduler.state_dict(),
}, 'model_fold1.pth')


## Load model

In [None]:
checkpoint = torch.load('model_fold1.pth', map_location=device)
model.load_state_dict(checkpoint['model_state_dict'])
optimizer.load_state_dict(checkpoint['optimizer_state_dict'])
scheduler.load_state_dict(checkpoint['scheduler_state_dict'])

In [None]:
for fold_idx, (train_idx, val_idx) in enumerate(skf.split(X, y)):

    # Load or initialize model
    model = DeepF_CNN().to(device)
    optimizer = optim.Adam(model.parameters(), lr=1e-3)
    scheduler = torch.optim.lr_scheduler.ReduceLROnPlateau(optimizer, ...)

    # Load from checkpoint if continuing
    checkpoint_path = f"model_fold{fold_idx+1}.pth"
    if os.path.exists(checkpoint_path):
        checkpoint = torch.load(checkpoint_path, map_location=device)
        model.load_state_dict(checkpoint['model_state_dict'])
        optimizer.load_state_dict(checkpoint['optimizer_state_dict'])
        scheduler.load_state_dict(checkpoint['scheduler_state_dict'])
        print(f"Resumed model for fold {fold_idx+1}")

    # Train model as usual
    trained_model = train_one_fold(...)

    # Save everything after training
    torch.save({
        'model_state_dict': trained_model.state_dict(),
        'optimizer_state_dict': optimizer.state_dict(),
        'scheduler_state_dict': scheduler.state_dict(),
    }, checkpoint_path)