In [1]:
import random
import pandas as pd 
import numpy as np 

import torch
import torch.nn as nn
import torch.optim as optim 

from torchmetrics.classification import BinaryF1Score
from torchvision import transforms as T
from torch.utils.data import Dataset, DataLoader, Subset
from sklearn.metrics import f1_score
from sklearn.model_selection import GroupKFold
from PIL import Image
from tqdm import tqdm

DEBUGGING = False

In [2]:
train_df = pd.read_csv("/kaggle/input/ai-vs-human-generated-dataset/train.csv")
test_df = pd.read_csv("/kaggle/input/ai-vs-human-generated-dataset/test.csv")

# Create groups: Assign the same group number to each pair
train_df["group"] = np.arange(len(train_df)) // 2

In [3]:
class DeepfakeDataset(Dataset): 
    def __init__(self, df, root_dir, transform, train=bool): 
        self.data = df
        self.root = root_dir 
        self.transform = transform
        self.train = train

    def get_indices_and_labels(self):
        if self.train: 
            indices = list(range(len(self)))
            labels = self.data["label"].to_list()
            return indices, labels
            
    def __len__(self):
        return len(self.data) 

    def __getitem__(self, idx): 
        if self.train: 
            img_path = self.root + self.data.iloc[idx, 1]
            
            image = Image.open(img_path).convert("RGB") 
            image = self.transform(image) 
            
            label = self.data.iloc[idx, 2]

            return image, torch.tensor(label)
        else:
            img_path = self.root + self.data.iloc[idx, 0]
            image = Image.open(img_path).convert("RGB") 
            image = self.transform(image) 

            return image

In [4]:
img_width = (128) // (2*2)
img_height = (128) // (2*2)

class AIDetector(nn.Module):
    def __init__(self): 
        super().__init__()

        self.conv_block = nn.Sequential(
            nn.Conv2d(3, 16, 3, padding=1),
            nn.BatchNorm2d(16),
            nn.ReLU(), 
            nn.MaxPool2d(2, 2),
            
            nn.Conv2d(16, 32, 3, padding=1),
            nn.BatchNorm2d(32), 
            nn.ReLU(), 
            nn.AvgPool2d(2, 2), 
        )

        self.fc = nn.Sequential(
            nn.Flatten(), 
            
            nn.Linear(32 * img_width * img_height, 512),
            nn.ReLU(),
            nn.Dropout(0.5), 

            nn.Linear(512, 64),
            nn.ReLU(), 
            nn.Dropout(0.5), 

            nn.Linear(64, 1),
            nn.Sigmoid()
        )

    def forward(self, x): 
        x = self.conv_block(x)
        x = self.fc(x) 

        return x 

In [5]:
class DeepfakeTrainer:
    def __init__(self, model_class, train_dataset, test_dataset, 
                 k_folds=3, batch_size=64, lr=0.001, 
                 criterion=nn.BCELoss(), random_state=42):
        """
        Initialize the KFoldTrainer.
        
        Args:
            model_class: Class to instantiate for each fold
            dataset: Dataset to use for training and validation
            device: Device to use for training (cuda or cpu)
            k_folds: Number of folds for cross-validation
            batch_size: Batch size for training
            lr: Learning rate for optimizer
            criterion: Loss function
            random_state: Random seed for reproducibility
        """
        
        self.model_class = model_class
        self.train_dataset = train_dataset
        self.test_dataset = test_dataset
  
        self.device = torch.device("cuda" if torch.cuda.is_available() else "cpu")
        self.criterion = criterion
        
        self.k_folds = k_folds
        self.batch_size = batch_size
        self.lr = lr
        self.random_state = random_state
        
        # Metrics
        self.train_scores = []
        self.val_scores = []
    
    def train_epoch(self, model, train_loader, optimizer, f1_metric):
        """Train for one epoch."""
        model.train()
        f1_metric.reset()
        
        for img, label in tqdm(train_loader, desc="TRAINING"):
            img, label = img.to(self.device), label.to(self.device)
            
            # Forward pass
            optimizer.zero_grad()
            outputs = model(img).squeeze()
            loss = self.criterion(outputs, label.float())
            
            # Backward pass
            loss.backward()
            optimizer.step()
            
            # Calculate metrics
            preds = (outputs > 0.5).int()
            f1_metric.update(preds, label.int())
            
        return f1_metric.compute().item()
    
    def evaluate(self, model, val_loader, f1_metric):
        """Evaluate the model."""
        
        model.eval()
        f1_metric.reset()
        
        with torch.no_grad():
            for img, label in tqdm(val_loader, desc="VALIDATION"):
                img, label = img.to(self.device), label.to(self.device)
                
                # Forward pass
                outputs = model(img).squeeze()
                
                # Calculate metrics
                preds = (outputs > 0.5).int()
                f1_metric.update(preds, label.int())
                
        return f1_metric.compute().item()
    
    def process_fold(self, train_loader, val_loader, epochs):
        """Process a single fold."""
        model = self.model_class().to(self.device)
        optimizer = optim.Adam(model.parameters(), lr=self.lr)
        
        train_f1_metric = BinaryF1Score().to(self.device)
        val_f1_metric = BinaryF1Score().to(self.device)
        
        train_fold_scores = []
        val_fold_scores = []
        
        for epoch in range(epochs):
            # Train
            train_epoch_f1 = self.train_epoch(model, train_loader, optimizer, train_f1_metric)
            train_fold_scores.append(train_epoch_f1)
            
            # Validate
            val_epoch_f1 = self.evaluate(model, val_loader, val_f1_metric)
            val_fold_scores.append(val_epoch_f1)
            
            print(f"Epoch {epoch+1}/{epochs} - Train F1 Score: {train_epoch_f1:.3f}, Val F1 Score: {val_epoch_f1:.3f}")
        
        train_avg_f1 = sum(train_fold_scores) / len(train_fold_scores)
        val_avg_f1 = sum(val_fold_scores) / len(val_fold_scores)
        
        return model, train_avg_f1, val_avg_f1
    
    def train(self, epochs, df):
        """Train with k-fold cross validation."""

        best_model = None
        best_val_score = 0
        
        gkf = GroupKFold(n_splits=self.k_folds)
        splits = list(gkf.split(df, groups=df["group"]))
        
        for fold, (train_idx, val_idx) in enumerate(splits):
            print(f"\n{'-'*50}")
            print(f"FOLD {fold + 1}/{self.k_folds}")
            print(f"{'-'*50}\n")
            
            # Create data loaders for this fold
            train_data = Subset(self.train_dataset, train_idx)
            val_data = Subset(self.train_dataset, val_idx)
            
            train_loader = DataLoader(
                train_data, 
                batch_size=self.batch_size, 
                shuffle=True
            )
            val_loader = DataLoader(
                val_data, 
                batch_size=self.batch_size, 
                shuffle=False
            )
            
            # Process this fold
            model, train_fold_f1, val_fold_f1 = self.process_fold(
                train_loader, val_loader, epochs
            )
            
            print(f"\nFOLD {fold + 1} RESULTS:")
            print(f"TRAIN F1_SCORE: {train_fold_f1:.3f}, VAL F1_SCORE: {val_fold_f1:.3f}\n")
            
            # Store scores
            self.train_scores.append(train_fold_f1)
            self.val_scores.append(val_fold_f1)
            
            # Keep track of best model
            if val_fold_f1 > best_val_score:
                best_val_score = val_fold_f1
                best_model = model
        
        # Print final results
        train_avg_f1 = sum(self.train_scores) / len(self.train_scores)
        val_avg_f1 = sum(self.val_scores) / len(self.val_scores)
        
        print(f"{'-'*50}")
        print(f"CROSS-VALIDATION RESULTS:")
        print(f"AVG TRAIN F1_SCORE: {train_avg_f1:.3f}, AVG VAL F1_SCORE: {val_avg_f1:.3f}")
        print(f"{'-'*50}\n")
        
        return best_model, train_avg_f1, val_avg_f1
    
    def predict(self, model):
        """Make predictions with the model."""
        model.eval()
        all_preds = []

        test_loader = DataLoader(
            self.test_dataset, 
            batch_size=64, 
            shuffle=False
        )
        
        with torch.no_grad():
            for img in tqdm(test_loader, desc="PREDICTING"):
                img = img.to(self.device)
                outputs = model(img).squeeze()
                preds = (outputs > 0.5).int()
                all_preds.extend(preds.cpu().numpy())
                
        return np.array(all_preds)

In [6]:
train_transform = T.Compose([
    T.Resize((128, 128)), 
    
    # Human Augments
    T.RandomPerspective(
        distortion_scale=0.5, 
        p=0.5
    ),
    T.RandomRotation(30), 
    T.GaussianBlur(
        kernel_size=5, 
        sigma=(0.1, 2.0)
    ),

    # AI Augments 
    T.ColorJitter(
        brightness=0.5, 
        contrast=0.5, 
        saturation=0.5,
        hue=0.3
    ),           
    T.ToTensor(), 
    T.Normalize(
        mean=[0.5, 0.5, 0.5], 
        std=[0.5, 0.5, 0.5]
    )  
])

test_transform = T.Compose([
    T.Resize((128, 128)),
    T.ToTensor(), 
    T.Normalize(
        mean=[0.5, 0.5, 0.5], 
        std=[0.5, 0.5, 0.5]
    )  
])

In [7]:
train_data = DeepfakeDataset(
    df=train_df, 
    root_dir="/kaggle/input/ai-vs-human-generated-dataset/",
    transform=train_transform,
    train=True
)

test_data = DeepfakeDataset(
    df=test_df,
    root_dir="/kaggle/input/ai-vs-human-generated-dataset/", 
    transform=test_transform,
    train=False
)

if DEBUGGING: 
    indices = list(range(1000))
    
    train_data = Subset(train_data, indices)
    train_df = train_df.iloc[indices]

In [8]:
device = torch.device("cuda" if torch.cuda.is_available() else "cpu")

cv_trainer = DeepfakeTrainer(
    model_class=AIDetector,  
    train_dataset=train_data,  
    test_dataset=test_data,
    k_folds=3,            
    batch_size=64,         
    lr=0.001               
)

model, train_f1, val_f1 = cv_trainer.train(epochs=5, df=train_df)


--------------------------------------------------
FOLD 1/3
--------------------------------------------------



TRAINING: 100%|██████████| 833/833 [14:16<00:00,  1.03s/it]
VALIDATION: 100%|██████████| 417/417 [06:46<00:00,  1.03it/s]


Epoch 1/5 - Train F1 Score: 0.640, Val F1 Score: 0.722


TRAINING: 100%|██████████| 833/833 [09:35<00:00,  1.45it/s]
VALIDATION: 100%|██████████| 417/417 [04:39<00:00,  1.49it/s]


Epoch 2/5 - Train F1 Score: 0.708, Val F1 Score: 0.738


TRAINING: 100%|██████████| 833/833 [09:31<00:00,  1.46it/s]
VALIDATION: 100%|██████████| 417/417 [04:31<00:00,  1.53it/s]


Epoch 3/5 - Train F1 Score: 0.730, Val F1 Score: 0.756


TRAINING: 100%|██████████| 833/833 [09:23<00:00,  1.48it/s]
VALIDATION: 100%|██████████| 417/417 [04:28<00:00,  1.55it/s]


Epoch 4/5 - Train F1 Score: 0.744, Val F1 Score: 0.755


TRAINING: 100%|██████████| 833/833 [09:16<00:00,  1.50it/s]
VALIDATION: 100%|██████████| 417/417 [04:30<00:00,  1.54it/s]


Epoch 5/5 - Train F1 Score: 0.755, Val F1 Score: 0.703

FOLD 1 RESULTS:
TRAIN F1_SCORE: 0.715, VAL F1_SCORE: 0.735


--------------------------------------------------
FOLD 2/3
--------------------------------------------------



TRAINING: 100%|██████████| 833/833 [09:14<00:00,  1.50it/s]
VALIDATION: 100%|██████████| 417/417 [04:28<00:00,  1.56it/s]


Epoch 1/5 - Train F1 Score: 0.642, Val F1 Score: 0.727


TRAINING: 100%|██████████| 833/833 [09:17<00:00,  1.49it/s]
VALIDATION: 100%|██████████| 417/417 [04:23<00:00,  1.58it/s]


Epoch 2/5 - Train F1 Score: 0.716, Val F1 Score: 0.758


TRAINING: 100%|██████████| 833/833 [09:07<00:00,  1.52it/s]
VALIDATION: 100%|██████████| 417/417 [04:27<00:00,  1.56it/s]


Epoch 3/5 - Train F1 Score: 0.741, Val F1 Score: 0.748


TRAINING: 100%|██████████| 833/833 [09:08<00:00,  1.52it/s]
VALIDATION: 100%|██████████| 417/417 [04:22<00:00,  1.59it/s]


Epoch 4/5 - Train F1 Score: 0.750, Val F1 Score: 0.772


TRAINING: 100%|██████████| 833/833 [09:11<00:00,  1.51it/s]
VALIDATION: 100%|██████████| 417/417 [04:23<00:00,  1.58it/s]


Epoch 5/5 - Train F1 Score: 0.762, Val F1 Score: 0.724

FOLD 2 RESULTS:
TRAIN F1_SCORE: 0.722, VAL F1_SCORE: 0.746


--------------------------------------------------
FOLD 3/3
--------------------------------------------------



TRAINING: 100%|██████████| 833/833 [09:12<00:00,  1.51it/s]
VALIDATION: 100%|██████████| 417/417 [04:27<00:00,  1.56it/s]


Epoch 1/5 - Train F1 Score: 0.671, Val F1 Score: 0.713


TRAINING: 100%|██████████| 833/833 [09:15<00:00,  1.50it/s]
VALIDATION: 100%|██████████| 417/417 [04:24<00:00,  1.58it/s]


Epoch 2/5 - Train F1 Score: 0.719, Val F1 Score: 0.738


TRAINING: 100%|██████████| 833/833 [09:14<00:00,  1.50it/s]
VALIDATION: 100%|██████████| 417/417 [04:26<00:00,  1.57it/s]


Epoch 3/5 - Train F1 Score: 0.736, Val F1 Score: 0.751


TRAINING: 100%|██████████| 833/833 [09:15<00:00,  1.50it/s]
VALIDATION: 100%|██████████| 417/417 [04:26<00:00,  1.56it/s]


Epoch 4/5 - Train F1 Score: 0.748, Val F1 Score: 0.689


TRAINING: 100%|██████████| 833/833 [09:12<00:00,  1.51it/s]
VALIDATION: 100%|██████████| 417/417 [04:24<00:00,  1.57it/s]

Epoch 5/5 - Train F1 Score: 0.756, Val F1 Score: 0.781

FOLD 3 RESULTS:
TRAIN F1_SCORE: 0.726, VAL F1_SCORE: 0.734

--------------------------------------------------
CROSS-VALIDATION RESULTS:
AVG TRAIN F1_SCORE: 0.721, AVG VAL F1_SCORE: 0.738
--------------------------------------------------






In [9]:
ids = test_df["id"]
preds = cv_trainer.predict(model)

submission = pd.DataFrame({
    "id": ids,
    "label": preds
})

submission.to_csv("/kaggle/working/V4.csv", index=False)

PREDICTING: 100%|██████████| 87/87 [03:28<00:00,  2.40s/it]
