In [1]:
import random
import pandas as pd 
import numpy as np 

import torch
import torch.nn as nn
import torch.optim as optim 

from torchmetrics.classification import BinaryF1Score
from torchvision import transforms as T
import torchvision.models as models
from torch.utils.data import Dataset, DataLoader, Subset
from sklearn.metrics import f1_score
from sklearn.model_selection import GroupKFold
from PIL import Image
from tqdm import tqdm

DEBUGGING = False

In [2]:
train_df = pd.read_csv("/kaggle/input/ai-vs-human-generated-dataset/train.csv")
test_df = pd.read_csv("/kaggle/input/ai-vs-human-generated-dataset/test.csv")

train_df["group"] = np.arange(len(train_df)) // 2 # Create groups: Assign the same group number to each pair

In [3]:
class DeepfakeDataset(Dataset): 
    def __init__(self, df, root_dir, transform, train=bool): 
        self.data = df
        self.root = root_dir 
        self.transform = transform
        self.train = train

    def __len__(self):
        return len(self.data) 

    def __getitem__(self, idx): 
        if self.train: 
            img_path = self.root + self.data.iloc[idx, 1]
            
            image = Image.open(img_path).convert("RGB") 
            image = self.transform(image) 
            
            label = self.data.iloc[idx, 2]

            return image, torch.tensor(label)
        else:
            img_path = self.root + self.data.iloc[idx, 0]
            image = Image.open(img_path).convert("RGB") 
            image = self.transform(image) 

            return image

In [4]:
class DeepfakeTrainer:
    def __init__(self, train_dataset, test_dataset, model_class=None, 
                 k_folds=3, batch_size=64, lr=0.001, 
                 criterion=nn.CrossEntropyLoss(), random_state=42):
        
        self.model_class = model_class
        self.train_dataset = train_dataset
        self.test_dataset = test_dataset
  
        self.device = torch.device("cuda" if torch.cuda.is_available() else "cpu")
        self.criterion = criterion
        
        self.k_folds = k_folds
        self.batch_size = batch_size
        self.lr = lr
        self.random_state = random_state
        
        # Metrics
        self.train_scores = []
        self.val_scores = []
    
    def train_epoch(self, model, train_loader, optimizer, f1_metric):
        """Train for one epoch."""
        model.train()
        f1_metric.reset()
        
        for img, label in tqdm(train_loader, desc="TRAINING"):
            img, label = img.to(self.device), label.to(self.device)
            
            # Forward pass
            optimizer.zero_grad()
            outputs = model(img).squeeze()
            loss = self.criterion(outputs, label.float())
            
            # Backward pass
            loss.backward()
            optimizer.step()
            
            # Calculate metrics
            preds = (outputs > 0.5).int()
            f1_metric.update(preds, label.int())
            
        return f1_metric.compute().item()
    
    def evaluate(self, model, val_loader, f1_metric):
        """Evaluate the model."""
        
        model.eval()
        f1_metric.reset()
        
        with torch.no_grad():
            for img, label in tqdm(val_loader, desc="VALIDATION"):
                img, label = img.to(self.device), label.to(self.device)
                
                # Forward pass
                outputs = model(img).squeeze()
                
                # Calculate metrics
                preds = (outputs > 0.5).int()
                f1_metric.update(preds, label.int())
                
        return f1_metric.compute().item()
    
    def process_fold(self, train_loader, val_loader, epochs):
        """Process a single fold."""

        if self.model_class: 
            model = self.model_class()
        else: 
            model = models.resnet50(weights=models.ResNet50_Weights.IMAGENET1K_V1)
            n_features = model.fc.in_features
            model.fc = nn.Linear(n_features, 1)

        model = model.to(self.device)
        optimizer = optim.Adam(model.parameters(), lr=self.lr)
        train_f1_metric = BinaryF1Score().to(self.device)
        val_f1_metric = BinaryF1Score().to(self.device)
        
        train_fold_scores = []
        val_fold_scores = []
        
        for epoch in range(epochs):
            # Train
            train_epoch_f1 = self.train_epoch(model, train_loader, optimizer, train_f1_metric)
            train_fold_scores.append(train_epoch_f1)
            
            # Validate
            val_epoch_f1 = self.evaluate(model, val_loader, val_f1_metric)
            val_fold_scores.append(val_epoch_f1)
            
            print(f"Epoch {epoch+1}/{epochs} - Train F1 Score: {train_epoch_f1:.3f}, Val F1 Score: {val_epoch_f1:.3f}")
        
        train_avg_f1 = sum(train_fold_scores) / len(train_fold_scores)
        val_avg_f1 = sum(val_fold_scores) / len(val_fold_scores)
        
        return model, train_avg_f1, val_avg_f1
    
    def train(self, epochs, df):
        """Train with k-fold cross validation."""

        best_model = None
        best_val_score = 0
        
        gkf = GroupKFold(n_splits=self.k_folds)
        splits = list(gkf.split(df, groups=df["group"]))
        
        for fold, (train_idx, val_idx) in enumerate(splits):
            print(f"\n{'-'*50}")
            print(f"FOLD {fold + 1}/{self.k_folds}")
            print(f"{'-'*50}\n")
            
            # Create data loaders for this fold
            train_data = Subset(self.train_dataset, train_idx)
            val_data = Subset(self.train_dataset, val_idx)
            
            train_loader = DataLoader(
                train_data, 
                batch_size=self.batch_size, 
                shuffle=True
            )
            val_loader = DataLoader(
                val_data, 
                batch_size=self.batch_size, 
                shuffle=False
            )
            
            # Process this fold
            model, train_fold_f1, val_fold_f1 = self.process_fold(
                train_loader, val_loader, epochs
            )
            
            print(f"\nFOLD {fold + 1} RESULTS:")
            print(f"TRAIN F1_SCORE: {train_fold_f1:.3f}, VAL F1_SCORE: {val_fold_f1:.3f}\n")
            
            # Store scores
            self.train_scores.append(train_fold_f1)
            self.val_scores.append(val_fold_f1)
            
            # Keep track of best model
            if val_fold_f1 > best_val_score:
                best_val_score = val_fold_f1
                best_model = model
        
        # Print final results
        train_avg_f1 = sum(self.train_scores) / len(self.train_scores)
        val_avg_f1 = sum(self.val_scores) / len(self.val_scores)
        
        print(f"{'-'*50}")
        print(f"CROSS-VALIDATION RESULTS:")
        print(f"AVG TRAIN F1_SCORE: {train_avg_f1:.3f}, AVG VAL F1_SCORE: {val_avg_f1:.3f}")
        print(f"{'-'*50}\n")
        
        return best_model
    
    def predict(self, model):
        """Make predictions with the model."""
        model.eval()
        all_preds = []

        test_loader = DataLoader(
            self.test_dataset, 
            batch_size=64, 
            shuffle=False
        )
        
        with torch.no_grad():
            for img in tqdm(test_loader, desc="PREDICTING"):
                img = img.to(self.device)
                outputs = model(img).squeeze()
                preds = (outputs > 0.5).int()
                all_preds.extend(preds.cpu().numpy())
                
        return np.array(all_preds)

In [5]:
train_transform = T.Compose([
    T.Resize((128, 128)), 
    
    # Human Augments
    T.RandomPerspective(
        distortion_scale=0.5, 
        p=0.5
    ),
    T.RandomRotation(30), 
    T.GaussianBlur(
        kernel_size=5, 
        sigma=(0.1, 2.0)
    ),

    # AI Augments 
    T.ColorJitter(
        brightness=0.5, 
        contrast=0.5, 
        saturation=0.5,
        hue=0.3
    ),           
    T.ToTensor(), 
    T.Normalize(
        mean=[0.5, 0.5, 0.5], 
        std=[0.5, 0.5, 0.5]
    )  
])

test_transform = T.Compose([
    T.Resize((128, 128)),
    T.ToTensor(), 
    T.Normalize(
        mean=[0.5, 0.5, 0.5], 
        std=[0.5, 0.5, 0.5]
    )  
])

In [6]:
train_data = DeepfakeDataset(
    df=train_df, 
    root_dir="/kaggle/input/ai-vs-human-generated-dataset/",
    transform=train_transform,
    train=True
)

test_data = DeepfakeDataset(
    df=test_df,
    root_dir="/kaggle/input/ai-vs-human-generated-dataset/", 
    transform=test_transform,
    train=False
)

if DEBUGGING: 
    indices = list(range(300))
    
    train_data = Subset(train_data, indices)
    train_df = train_df.iloc[indices]

In [7]:
device = torch.device("cuda" if torch.cuda.is_available() else "cpu")

cv_trainer = DeepfakeTrainer(
    train_dataset=train_data,  
    test_dataset=test_data,
    model_class=None,  
    k_folds=3,            
    batch_size=64,         
    lr=0.001,
)

model = cv_trainer.train(epochs=5, df=train_df)


--------------------------------------------------
FOLD 1/3
--------------------------------------------------



Downloading: "https://download.pytorch.org/models/resnet50-0676ba61.pth" to /root/.cache/torch/hub/checkpoints/resnet50-0676ba61.pth
100%|██████████| 97.8M/97.8M [00:00<00:00, 203MB/s]
TRAINING: 100%|██████████| 833/833 [17:11<00:00,  1.24s/it]
VALIDATION: 100%|██████████| 417/417 [07:47<00:00,  1.12s/it]


Epoch 1/5 - Train F1 Score: 0.390, Val F1 Score: 0.313


TRAINING: 100%|██████████| 833/833 [12:18<00:00,  1.13it/s]
VALIDATION: 100%|██████████| 417/417 [05:01<00:00,  1.39it/s]


Epoch 2/5 - Train F1 Score: 0.520, Val F1 Score: 0.745


TRAINING: 100%|██████████| 833/833 [12:01<00:00,  1.15it/s]
VALIDATION: 100%|██████████| 417/417 [05:05<00:00,  1.37it/s]


Epoch 3/5 - Train F1 Score: 0.715, Val F1 Score: 0.719


TRAINING: 100%|██████████| 833/833 [12:08<00:00,  1.14it/s]
VALIDATION: 100%|██████████| 417/417 [05:07<00:00,  1.36it/s]


Epoch 4/5 - Train F1 Score: 0.805, Val F1 Score: 0.806


TRAINING: 100%|██████████| 833/833 [12:01<00:00,  1.15it/s]
VALIDATION: 100%|██████████| 417/417 [05:24<00:00,  1.29it/s]


Epoch 5/5 - Train F1 Score: 0.812, Val F1 Score: 0.831

FOLD 1 RESULTS:
TRAIN F1_SCORE: 0.648, VAL F1_SCORE: 0.683


--------------------------------------------------
FOLD 2/3
--------------------------------------------------



TRAINING: 100%|██████████| 833/833 [12:18<00:00,  1.13it/s]
VALIDATION: 100%|██████████| 417/417 [05:28<00:00,  1.27it/s]


Epoch 1/5 - Train F1 Score: 0.203, Val F1 Score: 0.634


TRAINING: 100%|██████████| 833/833 [12:32<00:00,  1.11it/s]
VALIDATION: 100%|██████████| 417/417 [05:25<00:00,  1.28it/s]


Epoch 2/5 - Train F1 Score: 0.231, Val F1 Score: 0.000


TRAINING: 100%|██████████| 833/833 [12:32<00:00,  1.11it/s]
VALIDATION: 100%|██████████| 417/417 [05:03<00:00,  1.37it/s]


Epoch 3/5 - Train F1 Score: 0.333, Val F1 Score: 0.645


TRAINING: 100%|██████████| 833/833 [11:45<00:00,  1.18it/s]
VALIDATION: 100%|██████████| 417/417 [05:01<00:00,  1.38it/s]


Epoch 4/5 - Train F1 Score: 0.661, Val F1 Score: 0.617


TRAINING: 100%|██████████| 833/833 [12:11<00:00,  1.14it/s]
VALIDATION: 100%|██████████| 417/417 [05:26<00:00,  1.28it/s]


Epoch 5/5 - Train F1 Score: 0.782, Val F1 Score: 0.839

FOLD 2 RESULTS:
TRAIN F1_SCORE: 0.442, VAL F1_SCORE: 0.547


--------------------------------------------------
FOLD 3/3
--------------------------------------------------



TRAINING: 100%|██████████| 833/833 [12:26<00:00,  1.12it/s]
VALIDATION: 100%|██████████| 417/417 [05:29<00:00,  1.26it/s]


Epoch 1/5 - Train F1 Score: 0.489, Val F1 Score: 0.719


TRAINING: 100%|██████████| 833/833 [12:22<00:00,  1.12it/s]
VALIDATION: 100%|██████████| 417/417 [05:21<00:00,  1.30it/s]


Epoch 2/5 - Train F1 Score: 0.750, Val F1 Score: 0.685


TRAINING: 100%|██████████| 833/833 [12:27<00:00,  1.11it/s]
VALIDATION: 100%|██████████| 417/417 [05:28<00:00,  1.27it/s]


Epoch 3/5 - Train F1 Score: 0.820, Val F1 Score: 0.834


TRAINING: 100%|██████████| 833/833 [12:15<00:00,  1.13it/s]
VALIDATION: 100%|██████████| 417/417 [05:14<00:00,  1.33it/s]


Epoch 4/5 - Train F1 Score: 0.847, Val F1 Score: 0.827


TRAINING: 100%|██████████| 833/833 [12:09<00:00,  1.14it/s]
VALIDATION: 100%|██████████| 417/417 [05:18<00:00,  1.31it/s]

Epoch 5/5 - Train F1 Score: 0.849, Val F1 Score: 0.834

FOLD 3 RESULTS:
TRAIN F1_SCORE: 0.751, VAL F1_SCORE: 0.780

--------------------------------------------------
CROSS-VALIDATION RESULTS:
AVG TRAIN F1_SCORE: 0.614, AVG VAL F1_SCORE: 0.670
--------------------------------------------------






In [8]:
ids = test_df["id"]
preds = cv_trainer.predict(model)

submission = pd.DataFrame({
    "id": ids,
    "label": preds
})

submission.to_csv("/kaggle/working/V5.csv", index=False)

PREDICTING: 100%|██████████| 87/87 [04:19<00:00,  2.98s/it]
