**Goal of the Assignment**

We are trying to build a binary classifier neural network to detect:

  - Real cats have label = 0

  - Fake cats have label = 1

Images are:

- Size: 32 Ã— 32

- Format: RGB

- Tensor shape: [N, 3, 32, 32]

In [None]:
import torch
import torch.nn as nn
from torch.utils.data import DataLoader, TensorDataset, Subset

import torchvision.utils as vutils
import torchvision.datasets as dsets
import torchvision.transforms as transforms
import torchvision.models as models
from torch.optim import SGD, Adam # optimizer


from PIL import Image
import os
import pandas as pd
import numpy as np
import matplotlib.pyplot as plt
from sklearn.model_selection import KFold


# Load the Data:

In [None]:

print("Loading dataset")
X_raw, Y_raw = torch.load("/content/drive/MyDrive/Colab Notebooks/deepfake detector/hw2_data.pt")

Loading dataset


In [None]:
print(f"Data shapes--{X_raw.shape}, {Y_raw.shape}")

Data shapes--torch.Size([2000, 3, 32, 32]), torch.Size([2000])


# Baseline Model:

In [None]:
# A. Feature Extraction via ResNet() backbone:

# 1. Load the pretrained model:
resnet = models.resnet18(pretrained=True)
resnet.fc = nn.Identity() # output embeddings instead of logits
resnet.eval() # Put network in evaluation mode, dropout is switched off

# 2. Function to extract features:
def extract_features(images, batch_size=100):
    """Runs images through ResNet18 to get 512-dim embeddings."""
    embeddings = []
    with torch.no_grad():
        for i in range(0, len(images), batch_size):
            batch = images[i: i+batch_size]
            emb = resnet(batch)
            embeddings.append(emb)
    return torch.cat(embeddings)

# 2.2 Extract baseline features
print("Extracting baseline features...")
X_emb = extract_features(X_raw)  # new data for training the model
print(f"Features extracted. Baseline features shape: {X_emb.shape}")



Extracting baseline features...
Features extracted. Baseline features shape: torch.Size([2000, 512])


In [None]:
# B. MLP architecture:

class MLPClassifier(nn.Module):
    def __init__(self, input_dim=512, n_hidden=128):
        super(MLPClassifier, self).__init__()
        self.network = nn.Sequential(
            nn.Linear(input_dim, n_hidden),  # First hidden layer
            nn.ReLU(),                       # ReLU nonlinearity
            nn.Linear(n_hidden, n_hidden),   # Second hidden layer
            nn.ReLU(),                       # ReLU nonlinearity
            nn.Linear(n_hidden, 2)           # Output logits
                                             #dimension n_classes = 2
        )

    def forward(self, x):
        return self.network(x)  # pytorch does the forward feed by itself

In [None]:
# Check : Inspect this model - parameters, layers
model = MLPClassifier()
print(model)

MLPClassifier(
  (network): Sequential(
    (0): Linear(in_features=512, out_features=128, bias=True)
    (1): ReLU()
    (2): Linear(in_features=128, out_features=128, bias=True)
    (3): ReLU()
    (4): Linear(in_features=128, out_features=2, bias=True)
  )
)


In [None]:
# Check : Run it - get logits
x = torch.randn((10, 512))
with torch.no_grad():
    logits = model(x)
logits

tensor([[-0.0910,  0.0088],
        [-0.0766,  0.1015],
        [-0.0369,  0.0287],
        [-0.1319, -0.0204],
        [-0.1197, -0.0330],
        [-0.1069,  0.0661],
        [-0.0445,  0.0245],
        [-0.0911, -0.0019],
        [-0.0609, -0.0302],
        [-0.1336,  0.1210]])

In [None]:
# C. Training the model with Cross Entropy loss(): W/O hyperparameter tuning

# Define the model and the optimizer and the loss criterion:
lr = 0.01; wd = 0.0001
model = MLPClassifier()  # init model
optimizer = SGD(model.parameters(), lr=lr, weight_decay=wd)
loss_fn = nn.CrossEntropyLoss()  # define loss


# Load the Tensor Dataset:
#full_dataset = TensorDataset(X_emb, Y_raw)
train_dataloader = DataLoader(TensorDataset(X_emb[:1500],Y_raw[:1500]), batch_size=50, shuffle=True)
val_dataloader = DataLoader(TensorDataset(X_emb[1500:],Y_raw[1500:]), batch_size=50, shuffle=True)  # validation data!


In [None]:
# Train the model:
n_epochs=50
for i in range(n_epochs):
    running_loss = 0.0
    model.train()
    for x_batch, y_batch in train_dataloader:  # 1 epoch
        optimizer.zero_grad()  # important to zero out the gradient buffer first

        # Forward pass
        logits = model(x_batch)  # 1st step of forward - predict something
        loss = loss_fn(logits, y_batch)  # 2nd step - get loss, comparing prediction to ground truth

        # Backward pass - gradient of loss wrt parameters
        loss.backward()

        # Critical part: update weights!
        optimizer.step()
        running_loss += loss.item() / len(train_dataloader)

    # Calculate the loss on VALIDATION data
    model.eval()
    correct_predictions = 0
    total_predictions = 0
    val_loss = 0.
    with torch.no_grad():
        for x_batch, y_batch in val_dataloader:
            # Forward pass
            logits = model(x_batch)  # 1st step of forward - predict something
            loss = loss_fn(logits, y_batch)  # 2nd step - get loss, comparing prediction to ground truth
            val_loss += loss.item() / len(val_dataloader)

            # Accuracy Calculation:
            # Get the predicted class (0 or 1) by finding the max logit
            predictions = torch.argmax(logits, dim=1)
            # Update the counters
            correct_predictions += (predictions == y_batch).sum().item()
            total_predictions += y_batch.size(0)

    # Calculate final accuracy as a ratio
    val_accuracy = correct_predictions / total_predictions
    print(f"epoch {i} \t Training loss={running_loss:.3f} || Validation loss={val_loss:.3f}|| Validation Accuracy ={val_accuracy:0.3f}") # Loss is decresing




epoch 0 	 Training loss=0.006 || Validation loss=0.406|| Validation Accuracy =0.882
epoch 1 	 Training loss=0.006 || Validation loss=0.407|| Validation Accuracy =0.882
epoch 2 	 Training loss=0.006 || Validation loss=0.409|| Validation Accuracy =0.880
epoch 3 	 Training loss=0.006 || Validation loss=0.413|| Validation Accuracy =0.884
epoch 4 	 Training loss=0.006 || Validation loss=0.416|| Validation Accuracy =0.884
epoch 5 	 Training loss=0.006 || Validation loss=0.411|| Validation Accuracy =0.882
epoch 6 	 Training loss=0.006 || Validation loss=0.412|| Validation Accuracy =0.882
epoch 7 	 Training loss=0.006 || Validation loss=0.412|| Validation Accuracy =0.882
epoch 8 	 Training loss=0.006 || Validation loss=0.420|| Validation Accuracy =0.884
epoch 9 	 Training loss=0.006 || Validation loss=0.415|| Validation Accuracy =0.880
epoch 10 	 Training loss=0.006 || Validation loss=0.412|| Validation Accuracy =0.884
epoch 11 	 Training loss=0.006 || Validation loss=0.414|| Validation Accura

# 2. Model_2 =  Baseline Model + K fold Hyperparameter tuning:

- Test data Accuracy 0.922

In [None]:
# D. Model with Hyper parameter tuning with K-fold:

# Hyperparameter Grid:
learning_rates = [0.001, 0.01, 0.05, 0.1]
weight_decays = [0, 1e-4, 1e-3, 1e-2]
print("Hyper parameter Grid")
print(f"Learning Rates: {learning_rates}")
print(f"Weight Decay: {weight_decays}")

n_epochs=50

# Prepare K-Fold:
kf = KFold(n_splits=5, shuffle=True, random_state=42) # 5-fold CV with shuffling
full_dataset = TensorDataset(X_emb, Y_raw)

# counters for best parameter update:
best_acc = 0
best_params = (None, None)

print("--------------------")
print("Starting Grid Search CV...")
for lr in learning_rates:
    for wd in weight_decays:

        #print("---------------------")
        #print(f"\nLearning Rate: {lr}, Weight Decay: {wd}")

        fold_accuracies = [] # List of fold accuracies
        train_loss=[]
        validation_loss=[]
        #fold=0
        for train_idx, val_idx in kf.split(full_dataset):
            #print(f" \nFold {fold}")
            #fold+=1

            # Split data
            train_sub = Subset(full_dataset, train_idx)
            val_sub = Subset(full_dataset, val_idx)

            train_loader = DataLoader(train_sub, batch_size=50, shuffle=True)
            val_loader = DataLoader(val_sub, batch_size=50, shuffle=True)

            # Initialize Model, Loss, Optimizer
            model = MLPClassifier()
            loss_fn = nn.CrossEntropyLoss() # Use Cross Entropy loss
            optimizer =SGD(model.parameters(), lr=lr, weight_decay=wd) # SGD optimizer

            # Train for n_epochs
            model.train()

            for epoch in range(n_epochs):
                running_loss = 0.0
                for batch_X, batch_y in train_loader:
                    optimizer.zero_grad()
                    logits = model(batch_X)
                    loss = loss_fn(logits, batch_y)
                    loss.backward()
                    optimizer.step()
                    running_loss += loss.item() / len(train_loader)
            #print(f"training loss = {running_loss : 0.4f}")
            train_loss.append(running_loss)
            # Validate
            model.eval()
            correct_predictions=0
            total_predictions = 0
            val_loss=0.0
            with torch.no_grad():
                for batch_X, batch_y in val_loader:
                    logits = model(batch_X)
                    loss = loss_fn(logits, batch_y)
                    val_loss += loss.item() / len(val_loader)
                    predictions = torch.argmax(logits, dim=1)
                    correct_predictions += (predictions == batch_y).sum().item()
                    total_predictions += batch_y.size(0)
                val_acc= correct_predictions/total_predictions
            #print(f"validation loss = {val_loss : 0.4f}")
            #print(f"validation accuracy = {val_acc : 0.4f}")

            validation_loss.append(val_loss)
            #fold_accuracies.append(correct_predictions / total_predictions)
            fold_accuracies.append(val_acc)

        mean_val_acc = np.mean(fold_accuracies) # Choose hyperparameters by mean validation accuracy
        mean_train_loss=np.mean(train_loss)
        mean_val_loss=np.mean(validation_loss)

        print("-----------")
        print(f"Learning Rate: {lr}, Weight Decay: {wd}")
        print(f"Train_Loss ={mean_train_loss:0.4f}|| Validation_loss ={mean_val_loss:0.4f}|| Validation_accurancy ={mean_val_acc:.4f},")

        if mean_val_acc > best_acc:
            best_acc = mean_val_acc
            best_params = (lr, wd)
print("\n------------------------------")
print(f"Best Parameters: Learning Rate={best_params[0]}, Weight Decay={best_params[1]} with Validation Accuracy={best_acc:.4f}")

Hyper parameter Grid
Learning Rates: [0.001, 0.01, 0.05, 0.1]
Weight Decay: [0, 0.0001, 0.001, 0.01]
--------------------
Starting Grid Search CV...
-----------
Learning Rate: 0.001, Weight Decay: 0
Train_Loss =0.5145|| Validation_loss =0.5179|| Validation_accurancy =0.8110,
-----------
Learning Rate: 0.001, Weight Decay: 0.0001
Train_Loss =0.5171|| Validation_loss =0.5174|| Validation_accurancy =0.8110,
-----------
Learning Rate: 0.001, Weight Decay: 0.001
Train_Loss =0.5226|| Validation_loss =0.5276|| Validation_accurancy =0.8145,
-----------
Learning Rate: 0.001, Weight Decay: 0.01
Train_Loss =0.5349|| Validation_loss =0.5377|| Validation_accurancy =0.8105,
-----------
Learning Rate: 0.01, Weight Decay: 0
Train_Loss =0.1297|| Validation_loss =0.2254|| Validation_accurancy =0.9115,
-----------
Learning Rate: 0.01, Weight Decay: 0.0001
Train_Loss =0.1355|| Validation_loss =0.2273|| Validation_accurancy =0.9135,
-----------
Learning Rate: 0.01, Weight Decay: 0.001
Train_Loss =0.1367|| 

## Final Model after Hyperparameter training

- I chose the best parameters:

   **Learning Rate=0.1, Weight Decay=0.0001**
   with Validation Accuracy=0.9180 OR 91.8%

- I trained the full dataset on this model and predicted the labels for the test data set.   

In [None]:
# Final training on all data :
full_dataset = TensorDataset(X_emb, Y_raw)
full_loader = DataLoader(full_dataset, batch_size=50, shuffle=True)

# Initialize the final model with best hyperparameters:
final_model = MLPClassifier() # Create a new instance of the model
loss_fn = nn.CrossEntropyLoss() # Define loss function
final_optimizer = SGD(final_model.parameters(), lr=best_params[0], weight_decay=best_params[1])

# Train the final model on the entire dataset
n_epochs = 50
print(f"Training final model with LR={best_params[0]} and WD={best_params[1]} for {n_epochs} epochs")
print("------------")

for epoch in range(n_epochs):
    final_model.train()
    running_loss = 0.0

    for x_batch, y_batch in full_loader:
        final_optimizer.zero_grad()
        logits = final_model(x_batch)
        loss = loss_fn(logits, y_batch)
        loss.backward()
        final_optimizer.step()
        running_loss += loss.item() / len(full_loader)

    print(f"Epoch {epoch} \t Training Loss: {running_loss:.4f}")

print("Final model training complete.")

Training final model with LR=0.1 and WD=0.0001 for 50 epochs
------------
Epoch 0 	 Training Loss: 0.5710
Epoch 1 	 Training Loss: 0.3809
Epoch 2 	 Training Loss: 0.3064
Epoch 3 	 Training Loss: 0.2759
Epoch 4 	 Training Loss: 0.2415
Epoch 5 	 Training Loss: 0.2225
Epoch 6 	 Training Loss: 0.2371
Epoch 7 	 Training Loss: 0.1970
Epoch 8 	 Training Loss: 0.2406
Epoch 9 	 Training Loss: 0.1730
Epoch 10 	 Training Loss: 0.1564
Epoch 11 	 Training Loss: 0.1610
Epoch 12 	 Training Loss: 0.1502
Epoch 13 	 Training Loss: 0.1632
Epoch 14 	 Training Loss: 0.1234
Epoch 15 	 Training Loss: 0.1649
Epoch 16 	 Training Loss: 0.1380
Epoch 17 	 Training Loss: 0.0961
Epoch 18 	 Training Loss: 0.1080
Epoch 19 	 Training Loss: 0.0857
Epoch 20 	 Training Loss: 0.1138
Epoch 21 	 Training Loss: 0.0797
Epoch 22 	 Training Loss: 0.0484
Epoch 23 	 Training Loss: 0.0585
Epoch 24 	 Training Loss: 0.0986
Epoch 25 	 Training Loss: 0.0802
Epoch 26 	 Training Loss: 0.0365
Epoch 27 	 Training Loss: 0.0275
Epoch 28 	 T

In [None]:

# Load the test data:
data_test = torch.load("/content/drive/MyDrive/Colab Notebooks/deepfake detector/hw2_test-1.pt")
print(type(data_test))
print(data_test[0].shape) # these are the ids
print(data_test[1].shape) # these are the images



<class 'list'>
torch.Size([500])
torch.Size([500, 3, 32, 32])


In [None]:
#Load and predict on Test Data
X_test_raw= data_test[1]
Y_test_ids = data_test[0]

X_test_emb = extract_features(X_test_raw)
final_model.eval()
with torch.no_grad():
    test_outputs = final_model(X_test_emb)
    predictions = torch.argmax(test_outputs, dim=1).numpy()

# Save to CSV file
df = pd.DataFrame({'id': Y_test_ids, 'label': predictions})
df.to_csv("predictions.csv", index=False)

# 3. Model_3 = Model_2+ Dropout

- Hyperparameter: Learning Rate=0.1, Weight Decay=0.0001
  - dropout_prob = 0.5  ---- Test Set accuracy 0.902

  - dropout_prob =0.3 --- Test Set Accuracy 0.916



In [None]:
# A. MLP architecture:

class MLPClassifier_dropout(nn.Module):
    def __init__(self, input_dim=512, n_hidden=128, dropout_prob=0.3):

        super(MLPClassifier_dropout, self).__init__()
        # 2 hidden layers with 128 units, ReLU, and output dimension 2
        self.network = nn.Sequential(
            nn.Linear(input_dim, n_hidden),  # First hidden layer
            nn.ReLU(),
            nn.Dropout(p=dropout_prob),       # Dropout added here
            nn.Linear(n_hidden, n_hidden),   # Second hidden layer
            nn.ReLU(),
            nn.Dropout(p=dropout_prob), # Dropout added here
            nn.Linear(n_hidden, 2)           # Output logits
                                             #dimension n_classes = 2
        )

    def forward(self, x):
        return self.network(x)

In [None]:
 #C. Re-Training the model with Cross Entropy loss(): W/O best hyperparameter

# Define the model and the optimizer and the loss criterion:
lr = best_params[0]; wd = best_params[1]
model = MLPClassifier_dropout()  # init model
optimizer = SGD(model.parameters(), lr=lr, weight_decay=wd)
loss_fn = nn.CrossEntropyLoss()  # define loss


# Load the Tensor Dataset:
#full_dataset = TensorDataset(X_emb, Y_raw)
train_dataloader = DataLoader(TensorDataset(X_emb[:1500],Y_raw[:1500]), batch_size=50, shuffle=True)
val_dataloader = DataLoader(TensorDataset(X_emb[1500:],Y_raw[1500:]), batch_size=50, shuffle=True)  # validation data!


# Train the model:
n_epochs=50
for i in range(n_epochs):
    running_loss = 0.0
    model.train()
    for x_batch, y_batch in train_dataloader:  # 1 epoch
        optimizer.zero_grad()  # important to zero out the gradient buffer first

        # Forward pass
        logits = model(x_batch)  # 1st step of forward - predict something
        loss = loss_fn(logits, y_batch)  # 2nd step - get loss, comparing prediction to ground truth

        # Backward pass - gradient of loss wrt parameters
        loss.backward()

        # Critical part: update weights!
        optimizer.step()
        running_loss += loss.item() / len(train_dataloader)

    # Calculate the loss on VALIDATION data
    model.eval()
    correct_predictions = 0
    total_predictions = 0
    val_loss = 0.
    with torch.no_grad():
        for x_batch, y_batch in val_dataloader:
            # Forward pass
            logits = model(x_batch)  # 1st step of forward - predict something
            loss = loss_fn(logits, y_batch)  # 2nd step - get loss, comparing prediction to ground truth
            val_loss += loss.item() / len(val_dataloader)

            # Accuracy Calculation:
            # Get the predicted class (0 or 1) by finding the max logit
            predictions = torch.argmax(logits, dim=1)
            # Update the counters
            correct_predictions += (predictions == y_batch).sum().item()
            total_predictions += y_batch.size(0)

    # Calculate final accuracy as a ratio
    val_accuracy = correct_predictions / total_predictions
    print(f"epoch {i} \t Training loss={running_loss:.3f} || Validation loss={val_loss:.3f}|| Validation Accuracy ={val_accuracy:0.3f}") # Loss is decresing



epoch 0 	 Training loss=0.608 || Validation loss=0.471|| Validation Accuracy =0.788
epoch 1 	 Training loss=0.450 || Validation loss=0.386|| Validation Accuracy =0.814
epoch 2 	 Training loss=0.381 || Validation loss=0.362|| Validation Accuracy =0.840
epoch 3 	 Training loss=0.321 || Validation loss=0.275|| Validation Accuracy =0.888
epoch 4 	 Training loss=0.324 || Validation loss=0.255|| Validation Accuracy =0.890
epoch 5 	 Training loss=0.286 || Validation loss=0.266|| Validation Accuracy =0.900
epoch 6 	 Training loss=0.262 || Validation loss=0.305|| Validation Accuracy =0.854
epoch 7 	 Training loss=0.234 || Validation loss=0.285|| Validation Accuracy =0.870
epoch 8 	 Training loss=0.240 || Validation loss=0.294|| Validation Accuracy =0.886
epoch 9 	 Training loss=0.230 || Validation loss=0.242|| Validation Accuracy =0.892
epoch 10 	 Training loss=0.234 || Validation loss=0.229|| Validation Accuracy =0.900
epoch 11 	 Training loss=0.206 || Validation loss=0.222|| Validation Accura

In [None]:
# Final training on all data :
full_dataset = TensorDataset(X_emb, Y_raw)
full_loader = DataLoader(full_dataset, batch_size=50, shuffle=True)

# Initialize the final model with best hyperparameters:
final_model = MLPClassifier_dropout() # Create a new instance of the model
loss_fn = nn.CrossEntropyLoss() # Define loss function
final_optimizer = SGD(final_model.parameters(), lr=best_params[0], weight_decay=best_params[1])

# Train the final model on the entire dataset
n_epochs = 50
print(f"Training model with dropout with LR={best_params[0]} and WD={best_params[1]} for {n_epochs} epochs")
print("------------")

for epoch in range(n_epochs):
    final_model.train()
    running_loss = 0.0

    for x_batch, y_batch in full_loader:
        final_optimizer.zero_grad()
        logits = final_model(x_batch)
        loss = loss_fn(logits, y_batch)
        loss.backward()
        final_optimizer.step()
        running_loss += loss.item() / len(full_loader)

    print(f"Epoch {epoch} \t Training Loss: {running_loss:.4f}")

print("Final model training complete.")

Training model with dropout with LR=0.1 and WD=0.0001 for 50 epochs
------------
Epoch 0 	 Training Loss: 0.5568
Epoch 1 	 Training Loss: 0.3964
Epoch 2 	 Training Loss: 0.3269
Epoch 3 	 Training Loss: 0.2937
Epoch 4 	 Training Loss: 0.2710
Epoch 5 	 Training Loss: 0.2439
Epoch 6 	 Training Loss: 0.2377
Epoch 7 	 Training Loss: 0.2334
Epoch 8 	 Training Loss: 0.2037
Epoch 9 	 Training Loss: 0.1957
Epoch 10 	 Training Loss: 0.1877
Epoch 11 	 Training Loss: 0.1794
Epoch 12 	 Training Loss: 0.1866
Epoch 13 	 Training Loss: 0.1533
Epoch 14 	 Training Loss: 0.1598
Epoch 15 	 Training Loss: 0.1359
Epoch 16 	 Training Loss: 0.1558
Epoch 17 	 Training Loss: 0.1437
Epoch 18 	 Training Loss: 0.1368
Epoch 19 	 Training Loss: 0.1482
Epoch 20 	 Training Loss: 0.1228
Epoch 21 	 Training Loss: 0.1191
Epoch 22 	 Training Loss: 0.0993
Epoch 23 	 Training Loss: 0.0911
Epoch 24 	 Training Loss: 0.0884
Epoch 25 	 Training Loss: 0.1462
Epoch 26 	 Training Loss: 0.0791
Epoch 27 	 Training Loss: 0.0909
Epoch

In [None]:
#Load and predict on Test Data
data_test = torch.load("/content/drive/MyDrive/Colab Notebooks/deepfake detector/hw2_test-1.pt")
X_test_raw= data_test[1]
Y_test_ids = data_test[0]

X_test_emb = extract_features(X_test_raw)
final_model.eval()
with torch.no_grad():
    test_outputs = final_model(X_test_emb)
    predictions = torch.argmax(test_outputs, dim=1).numpy()

# Save to CSV file
df = pd.DataFrame({'id': Y_test_ids, 'label': predictions})
df.to_csv("predictions.csv", index=False)

# 4. Model_4 = Baseline + Dropout + HP tuning

- Dropout =0.3
  - Best Parameter : Learning Rate=0.01, Weight Decay=0 with Validation Accuracy=0.9160
  - Test Set Accuracy 0.908

- Dropout =0.5
  - Best Parameters: Learning Rate=0.05, Weight Decay=0.0001 with Validation Accuracy=0.9205
  - Test Set Accuracy 0.9000



In [None]:
 #A. MLP architecture:

class MLPClassifier_dropout(nn.Module):
    def __init__(self, input_dim=512, n_hidden=128, dropout_prob=0.5):

        super(MLPClassifier_dropout, self).__init__()
        # 2 hidden layers with 128 units, ReLU, and output dimension 2
        self.network = nn.Sequential(
            nn.Linear(input_dim, n_hidden),  # First hidden layer
            nn.ReLU(),
            nn.Dropout(p=dropout_prob),       # Dropout added here
            nn.Linear(n_hidden, n_hidden),   # Second hidden layer
            nn.ReLU(),
            nn.Dropout(p=dropout_prob), # Dropout added here
            nn.Linear(n_hidden, 2)           # Output logits
                                             #dimension n_classes = 2
        )

    def forward(self, x):
        return self.network(x)

In [None]:
# D. Model with Hyper parameter tuning with K-fold:

# Hyperparameter Grid:
learning_rates = [0.001, 0.01, 0.05, 0.1]
weight_decays = [0, 1e-4, 1e-3, 1e-2]
print("Hyper parameter Grid")
print(f"Learning Rates: {learning_rates}")
print(f"Weight Decay: {weight_decays}")

n_epochs=50

# Prepare K-Fold:
kf = KFold(n_splits=5, shuffle=True, random_state=42) # 5-fold CV with shuffling
full_dataset = TensorDataset(X_emb, Y_raw)

# counters for best parameter update:
best_acc = 0
best_params = (None, None)

print("--------------------")
print("Starting Grid Search CV...")
for lr in learning_rates:
    for wd in weight_decays:

        #print("---------------------")
        #print(f"\nLearning Rate: {lr}, Weight Decay: {wd}")

        fold_accuracies = [] # List of fold accuracies
        train_loss=[]
        validation_loss=[]
        #fold=0
        for train_idx, val_idx in kf.split(full_dataset):
            #print(f" \nFold {fold}")
            #fold+=1

            # Split data
            train_sub = Subset(full_dataset, train_idx)
            val_sub = Subset(full_dataset, val_idx)

            train_loader = DataLoader(train_sub, batch_size=50, shuffle=True)
            val_loader = DataLoader(val_sub, batch_size=50, shuffle=True)

            # Initialize Model, Loss, Optimizer
            model = MLPClassifier_dropout()
            loss_fn = nn.CrossEntropyLoss() # Use Cross Entropy loss
            optimizer =SGD(model.parameters(), lr=lr, weight_decay=wd) # SGD optimizer

            # Train for n_epochs
            model.train()

            for epoch in range(n_epochs):
                running_loss = 0.0
                for batch_X, batch_y in train_loader:
                    optimizer.zero_grad()
                    logits = model(batch_X)
                    loss = loss_fn(logits, batch_y)
                    loss.backward()
                    optimizer.step()
                    running_loss += loss.item() / len(train_loader)
            #print(f"training loss = {running_loss : 0.4f}")
            train_loss.append(running_loss)
            # Validate
            model.eval()
            correct_predictions=0
            total_predictions = 0
            val_loss=0.0
            with torch.no_grad():
                for batch_X, batch_y in val_loader:
                    logits = model(batch_X)
                    loss = loss_fn(logits, batch_y)
                    val_loss += loss.item() / len(val_loader)
                    predictions = torch.argmax(logits, dim=1)
                    correct_predictions += (predictions == batch_y).sum().item()
                    total_predictions += batch_y.size(0)
                val_acc= correct_predictions/total_predictions
            #print(f"validation loss = {val_loss : 0.4f}")
            #print(f"validation accuracy = {val_acc : 0.4f}")

            validation_loss.append(val_loss)
            #fold_accuracies.append(correct_predictions / total_predictions)
            fold_accuracies.append(val_acc)

        mean_val_acc = np.mean(fold_accuracies) # Choose hyperparameters by mean validation accuracy
        mean_train_loss=np.mean(train_loss)
        mean_val_loss=np.mean(validation_loss)

        print("-----------")
        print(f"Learning Rate: {lr}, Weight Decay: {wd}")
        print(f"Train_Loss ={mean_train_loss:0.4f}|| Validation_loss ={mean_val_loss:0.4f}|| Validation_accurancy ={mean_val_acc:.4f},")

        if mean_val_acc > best_acc:
            best_acc = mean_val_acc
            best_params = (lr, wd)
print("\n------------------------------")
print(f"Best Parameters: Learning Rate={best_params[0]}, Weight Decay={best_params[1]} with Validation Accuracy={best_acc:.4f}")

Hyper parameter Grid
Learning Rates: [0.001, 0.01, 0.05, 0.1]
Weight Decay: [0, 0.0001, 0.001, 0.01]
--------------------
Starting Grid Search CV...
-----------
Learning Rate: 0.001, Weight Decay: 0
Train_Loss =0.6031|| Validation_loss =0.5858|| Validation_accurancy =0.7875,
-----------
Learning Rate: 0.001, Weight Decay: 0.0001
Train_Loss =0.6061|| Validation_loss =0.5848|| Validation_accurancy =0.8070,
-----------
Learning Rate: 0.001, Weight Decay: 0.001
Train_Loss =0.5961|| Validation_loss =0.5752|| Validation_accurancy =0.8020,
-----------
Learning Rate: 0.001, Weight Decay: 0.01
Train_Loss =0.6148|| Validation_loss =0.6025|| Validation_accurancy =0.7760,
-----------
Learning Rate: 0.01, Weight Decay: 0
Train_Loss =0.1965|| Validation_loss =0.2299|| Validation_accurancy =0.9080,
-----------
Learning Rate: 0.01, Weight Decay: 0.0001
Train_Loss =0.1943|| Validation_loss =0.2332|| Validation_accurancy =0.9110,
-----------
Learning Rate: 0.01, Weight Decay: 0.001
Train_Loss =0.2051|| 

In [None]:
# Final training on all data :
full_dataset = TensorDataset(X_emb, Y_raw)
full_loader = DataLoader(full_dataset, batch_size=50, shuffle=True)

# Initialize the final model with best hyperparameters:
final_model = MLPClassifier_dropout() # Create a new instance of the model
loss_fn = nn.CrossEntropyLoss() # Define loss function
final_optimizer = SGD(final_model.parameters(), lr=best_params[0], weight_decay=best_params[1])

# Train the final model on the entire dataset
n_epochs = 50
print(f"Training final model with LR={best_params[0]} and WD={best_params[1]} for {n_epochs} epochs")
print("------------")

for epoch in range(n_epochs):
    final_model.train()
    running_loss = 0.0

    for x_batch, y_batch in full_loader:
        final_optimizer.zero_grad()
        logits = final_model(x_batch)
        loss = loss_fn(logits, y_batch)
        loss.backward()
        final_optimizer.step()
        running_loss += loss.item() / len(full_loader)

    print(f"Epoch {epoch} \t Training Loss: {running_loss:.4f}")

print("Final model training complete.")

Training final model with LR=0.05 and WD=0.0001 for 50 epochs
------------
Epoch 0 	 Training Loss: 0.6467
Epoch 1 	 Training Loss: 0.4952
Epoch 2 	 Training Loss: 0.3916
Epoch 3 	 Training Loss: 0.3394
Epoch 4 	 Training Loss: 0.3250
Epoch 5 	 Training Loss: 0.2822
Epoch 6 	 Training Loss: 0.2798
Epoch 7 	 Training Loss: 0.2537
Epoch 8 	 Training Loss: 0.2385
Epoch 9 	 Training Loss: 0.2525
Epoch 10 	 Training Loss: 0.2400
Epoch 11 	 Training Loss: 0.2138
Epoch 12 	 Training Loss: 0.2066
Epoch 13 	 Training Loss: 0.2095
Epoch 14 	 Training Loss: 0.1968
Epoch 15 	 Training Loss: 0.1927
Epoch 16 	 Training Loss: 0.1852
Epoch 17 	 Training Loss: 0.1851
Epoch 18 	 Training Loss: 0.1741
Epoch 19 	 Training Loss: 0.1652
Epoch 20 	 Training Loss: 0.1513
Epoch 21 	 Training Loss: 0.1591
Epoch 22 	 Training Loss: 0.1676
Epoch 23 	 Training Loss: 0.1433
Epoch 24 	 Training Loss: 0.1471
Epoch 25 	 Training Loss: 0.1469
Epoch 26 	 Training Loss: 0.1277
Epoch 27 	 Training Loss: 0.1417
Epoch 28 	 

In [None]:
#Load and predict on Test Data
data_test = torch.load("/content/drive/MyDrive/Colab Notebooks/deepfake detector/hw2_test-1.pt")
X_test_raw= data_test[1]
Y_test_ids = data_test[0]

X_test_emb = extract_features(X_test_raw)
final_model.eval()
with torch.no_grad():
    test_outputs = final_model(X_test_emb)
    predictions = torch.argmax(test_outputs, dim=1).numpy()

# Save to CSV file
df = pd.DataFrame({'id': Y_test_ids, 'label': predictions})
df.to_csv("predictions.csv", index=False)

# 5. Model_5 = Model_2 with Adam Optimizer

- Best Parameters: Learning Rate=0.001, Weight Decay=0 with Validation Accuracy=0.9055

- Test Data Accuracy 0.9140


In [None]:
# B. MLP architecture:

class MLPClassifier(nn.Module):
    def __init__(self, input_dim=512, n_hidden=128):
        super(MLPClassifier, self).__init__()
        self.network = nn.Sequential(
            nn.Linear(input_dim, n_hidden),  # First hidden layer
            nn.ReLU(),                       # ReLU nonlinearity
            nn.Linear(n_hidden, n_hidden),   # Second hidden layer
            nn.ReLU(),                       # ReLU nonlinearity
            nn.Linear(n_hidden, 2)           # Output logits
                                             #dimension n_classes = 2
        )

    def forward(self, x):
        return self.network(x)


# D. Model with Hyper parameter tuning with K-fold:

# Hyperparameter Grid:
learning_rates = [0.001, 0.01, 0.05, 0.1]
weight_decays = [0, 1e-4, 1e-3, 1e-2]
print("Hyper parameter Grid")
print(f"Learning Rates: {learning_rates}")
print(f"Weight Decay: {weight_decays}")

n_epochs=50

# Prepare K-Fold:
kf = KFold(n_splits=5, shuffle=True, random_state=42) # 5-fold CV with shuffling
full_dataset = TensorDataset(X_emb, Y_raw)

# counters for best parameter update:
best_acc = 0
best_params = (None, None)

print("--------------------")
print("Starting Grid Search CV...")
for lr in learning_rates:
    for wd in weight_decays:

        #print("---------------------")
        #print(f"\nLearning Rate: {lr}, Weight Decay: {wd}")

        fold_accuracies = [] # List of fold accuracies
        train_loss=[]
        validation_loss=[]
        #fold=0
        for train_idx, val_idx in kf.split(full_dataset):
            #print(f" \nFold {fold}")
            #fold+=1

            # Split data
            train_sub = Subset(full_dataset, train_idx)
            val_sub = Subset(full_dataset, val_idx)

            train_loader = DataLoader(train_sub, batch_size=50, shuffle=True)
            val_loader = DataLoader(val_sub, batch_size=50, shuffle=True)

            # Initialize Model, Loss, Optimizer
            model = MLPClassifier()
            loss_fn = nn.CrossEntropyLoss() # Use Cross Entropy loss
            optimizer =Adam(model.parameters(), lr=lr, weight_decay=wd) # Adam optimizer

            # Train for n_epochs
            model.train()

            for epoch in range(n_epochs):
                running_loss = 0.0
                for batch_X, batch_y in train_loader:
                    optimizer.zero_grad()
                    logits = model(batch_X)
                    loss = loss_fn(logits, batch_y)
                    loss.backward()
                    optimizer.step()
                    running_loss += loss.item() / len(train_loader)
            #print(f"training loss = {running_loss : 0.4f}")
            train_loss.append(running_loss)
            # Validate
            model.eval()
            correct_predictions=0
            total_predictions = 0
            val_loss=0.0
            with torch.no_grad():
                for batch_X, batch_y in val_loader:
                    logits = model(batch_X)
                    loss = loss_fn(logits, batch_y)
                    val_loss += loss.item() / len(val_loader)
                    predictions = torch.argmax(logits, dim=1)
                    correct_predictions += (predictions == batch_y).sum().item()
                    total_predictions += batch_y.size(0)
                val_acc= correct_predictions/total_predictions
            #print(f"validation loss = {val_loss : 0.4f}")
            #print(f"validation accuracy = {val_acc : 0.4f}")

            validation_loss.append(val_loss)
            #fold_accuracies.append(correct_predictions / total_predictions)
            fold_accuracies.append(val_acc)

        mean_val_acc = np.mean(fold_accuracies) # Choose hyperparameters by mean validation accuracy
        mean_train_loss=np.mean(train_loss)
        mean_val_loss=np.mean(validation_loss)

        print("-----------")
        print(f"Learning Rate: {lr}, Weight Decay: {wd}")
        print(f"Train_Loss ={mean_train_loss:0.4f}|| Validation_loss ={mean_val_loss:0.4f}|| Validation_accurancy ={mean_val_acc:.4f},")

        if mean_val_acc > best_acc:
            best_acc = mean_val_acc
            best_params = (lr, wd)
print("\n------------------------------")
print(f"Best Parameters: Learning Rate={best_params[0]}, Weight Decay={best_params[1]} with Validation Accuracy={best_acc:.4f}")

Hyper parameter Grid
Learning Rates: [0.001, 0.01, 0.05, 0.1]
Weight Decay: [0, 0.0001, 0.001, 0.01]
--------------------
Starting Grid Search CV...
-----------
Learning Rate: 0.001, Weight Decay: 0
Train_Loss =0.0000|| Validation_loss =0.6622|| Validation_accurancy =0.9055,
-----------
Learning Rate: 0.001, Weight Decay: 0.0001
Train_Loss =0.0002|| Validation_loss =0.5577|| Validation_accurancy =0.9040,
-----------
Learning Rate: 0.001, Weight Decay: 0.001
Train_Loss =0.0309|| Validation_loss =0.3796|| Validation_accurancy =0.9040,
-----------
Learning Rate: 0.001, Weight Decay: 0.01
Train_Loss =0.0345|| Validation_loss =0.2921|| Validation_accurancy =0.9000,
-----------
Learning Rate: 0.01, Weight Decay: 0
Train_Loss =0.0001|| Validation_loss =0.9683|| Validation_accurancy =0.9045,
-----------
Learning Rate: 0.01, Weight Decay: 0.0001
Train_Loss =0.0102|| Validation_loss =0.8103|| Validation_accurancy =0.8990,
-----------
Learning Rate: 0.01, Weight Decay: 0.001
Train_Loss =0.0371|| 

In [None]:
best_params

(0.001, 0)

In [None]:
# Final training on all data :
full_dataset = TensorDataset(X_emb, Y_raw)
full_loader = DataLoader(full_dataset, batch_size=50, shuffle=True)

# Initialize the final model with best hyperparameters:
final_model = MLPClassifier() # Create a new instance of the model
loss_fn = nn.CrossEntropyLoss() # Define loss function
final_optimizer = Adam(final_model.parameters(), lr=best_params[0], weight_decay=best_params[1])

# Train the final model on the entire dataset
n_epochs = 50
print(f"Training final model with LR={best_params[0]} and WD={best_params[1]} for {n_epochs} epochs")
print("------------")

for epoch in range(n_epochs):
    final_model.train()
    running_loss = 0.0

    for x_batch, y_batch in full_loader:
        final_optimizer.zero_grad()
        logits = final_model(x_batch)
        loss = loss_fn(logits, y_batch)
        loss.backward()
        final_optimizer.step()
        running_loss += loss.item() / len(full_loader)

    print(f"Epoch {epoch} \t Training Loss: {running_loss:.4f}")

print("Final model training complete.")



Training final model with LR=0.001 and WD=0 for 50 epochs
------------
Epoch 0 	 Training Loss: 0.4495
Epoch 1 	 Training Loss: 0.2497
Epoch 2 	 Training Loss: 0.2236
Epoch 3 	 Training Loss: 0.1730
Epoch 4 	 Training Loss: 0.1583
Epoch 5 	 Training Loss: 0.1571
Epoch 6 	 Training Loss: 0.1253
Epoch 7 	 Training Loss: 0.0954
Epoch 8 	 Training Loss: 0.0915
Epoch 9 	 Training Loss: 0.0640
Epoch 10 	 Training Loss: 0.0641
Epoch 11 	 Training Loss: 0.0490
Epoch 12 	 Training Loss: 0.0403
Epoch 13 	 Training Loss: 0.0295
Epoch 14 	 Training Loss: 0.0268
Epoch 15 	 Training Loss: 0.0130
Epoch 16 	 Training Loss: 0.0086
Epoch 17 	 Training Loss: 0.0046
Epoch 18 	 Training Loss: 0.0018
Epoch 19 	 Training Loss: 0.0015
Epoch 20 	 Training Loss: 0.0010
Epoch 21 	 Training Loss: 0.0008
Epoch 22 	 Training Loss: 0.0007
Epoch 23 	 Training Loss: 0.0006
Epoch 24 	 Training Loss: 0.0005
Epoch 25 	 Training Loss: 0.0004
Epoch 26 	 Training Loss: 0.0003
Epoch 27 	 Training Loss: 0.0002
Epoch 28 	 Trai

In [None]:
#Load and predict on Test Data
data_test = torch.load("/content/drive/MyDrive/Colab Notebooks/deepfake detector/hw2_test-1.pt")
X_test_raw= data_test[1]
Y_test_ids = data_test[0]

X_test_emb = extract_features(X_test_raw)
final_model.eval()
with torch.no_grad():
    test_outputs = final_model(X_test_emb)
    predictions = torch.argmax(test_outputs, dim=1).numpy()

# Save to CSV file
df = pd.DataFrame({'id': Y_test_ids, 'label': predictions})
df.to_csv("predictions.csv", index=False)

# 6. Model_6 = Simple Convolutional NN

 1. Hyperparamter tuning with Adam Optimizer and Dropout_prob =0.5
    - Trained for 50 epochs
    - Best Parameters: Learning Rate=0.001, Weight Decay=0.0001 with Validation Accuracy=0.9500
    - Test set accuracy 0.944

 2. Hyperparameter tuning with SGD Optimizer and Dropout_prob =0.5
    - Trained for 200 epochs  
    - Best Parameters: Learning Rate=0.01, Weight Decay=0.01 with Validation Accuracy=0.7050
    - Test set accuracy 0.9420  



In [None]:
import torch
import torch.nn as nn
import torch.nn.functional as F

class SimpleConvNet(nn.Module):
    def __init__(self):
        super(SimpleConvNet, self).__init__()

        # 1st Convolutional Block:
        # Input: 3 color channels (RGB). Output: 16 feature maps.
        self.conv1 = nn.Conv2d(in_channels=3, out_channels=16, kernel_size=3, padding=1)
        self.pool1 = nn.MaxPool2d(kernel_size=2, stride=2) # Shrinks 32x32 to 16x16

        # 2nd Convolutional Block:
        self.conv2 = nn.Conv2d(in_channels=16, out_channels=32, kernel_size=3, padding=1)
        self.pool2 = nn.MaxPool2d(kernel_size=2, stride=2) # Shrinks 16x16 to 8x8

        # 3rd Convolutional Block:
        self.conv3 = nn.Conv2d(in_channels=32, out_channels=64, kernel_size=3, padding=1)
        self.pool3 = nn.MaxPool2d(kernel_size=2, stride=2) # Shrinks 8x8 to 4x4

        # Fully Connected (MLP) Classifier at the end:
        # The image is now 64 channels, each 4x4 pixels.
        # Flattened size = 64 * 4 * 4 = 1024
        self.fc1 = nn.Linear(64 * 4 * 4, 128)
        self.dropout = nn.Dropout(0.5) # Prevent overfitting
        self.fc2 = nn.Linear(128, 2)   # 2 Output classes

    def forward(self, x):
        # Pass through Conv layers with ReLU activations
        x = self.pool1(F.relu(self.conv1(x)))
        x = self.pool2(F.relu(self.conv2(x)))
        x = self.pool3(F.relu(self.conv3(x)))

        # Flatten the 2D maps into a 1D vector for the linear layers
        x = x.view(-1, 64 * 4 * 4)

        # Pass through the linear classifier
        x = F.relu(self.fc1(x))
        x = self.dropout(x)
        x = self.fc2(x)
        return x

In [None]:
# 1. Load Data (Skiping ResNet extraction completely)
print("Loading dataset")
X_raw, Y_raw = torch.load("/content/drive/MyDrive/Colab Notebooks/deepfake detector/hw2_data.pt")

Loading dataset


In [None]:
# 2. Full Data set :
full_dataset = TensorDataset(X_raw, Y_raw)

In [None]:
# 3. Model with Hyper parameter tuning with K-fold:

# Hyperparameter Grid:
learning_rates = [0.0001, 0.001, 0.01]
weight_decays = [0, 1e-4, 1e-3, 1e-2]
print("Hyper parameter Grid")
print(f"Learning Rates: {learning_rates}")
print(f"Weight Decay: {weight_decays}")

n_epochs=50

# Prepare K-Fold:
kf = KFold(n_splits=5, shuffle=True, random_state=42) # 5-fold CV with shuffling
full_dataset = TensorDataset(X_raw, Y_raw) # Changed from X_emb to X_raw

# counters for best parameter update:
best_acc = 0
best_params = (None, None)

print("--------------------")
print("Starting Grid Search CV...")
for lr in learning_rates:
    for wd in weight_decays:

        #print("---------------------")
        #print(f"\nLearning Rate: {lr}, Weight Decay: {wd}")

        fold_accuracies = [] # List of fold accuracies
        train_loss=[]
        validation_loss=[]
        #fold=0
        for train_idx, val_idx in kf.split(full_dataset):
            #print(f" \nFold {fold}")
            #fold+=1

            # Split data
            train_sub = Subset(full_dataset, train_idx)
            val_sub = Subset(full_dataset, val_idx)

            train_loader = DataLoader(train_sub, batch_size=256, shuffle=True)
            val_loader = DataLoader(val_sub, batch_size=256, shuffle=True)

            # Initialize Model, Loss, Optimizer
            model = SimpleConvNet()
            loss_fn = nn.CrossEntropyLoss() # Use Cross Entropy loss
            #optimizer =Adam(model.parameters(), lr=lr, weight_decay=wd) # Adam optimizer
            optimizer =SGD(model.parameters(), lr=lr, weight_decay=wd) # SGD optimizer

            # Train for n_epochs
            model.train()

            for epoch in range(n_epochs):
                running_loss = 0.0
                for batch_X, batch_y in train_loader:
                    optimizer.zero_grad()
                    logits = model(batch_X)
                    loss = loss_fn(logits, batch_y)
                    loss.backward()
                    optimizer.step()
                    running_loss += loss.item() / len(train_loader)
            #print(f"training loss = {running_loss : 0.4f}")
            train_loss.append(running_loss)
            # Validate
            model.eval()
            correct_predictions=0
            total_predictions = 0
            val_loss=0.0
            with torch.no_grad():
                for batch_X, batch_y in val_loader:
                    logits = model(batch_X)
                    loss = loss_fn(logits, batch_y)
                    val_loss += loss.item() / len(val_loader)
                    predictions = torch.argmax(logits, dim=1)
                    correct_predictions += (predictions == batch_y).sum().item()
                    total_predictions += batch_y.size(0)
                val_acc= correct_predictions/total_predictions
            #print(f"validation loss = {val_loss : 0.4f}")
            #print(f"validation accuracy = {val_acc : 0.4f}")

            validation_loss.append(val_loss)
            #fold_accuracies.append(correct_predictions / total_predictions)
            fold_accuracies.append(val_acc)

        mean_val_acc = np.mean(fold_accuracies) # Choose hyperparameters by mean validation accuracy
        mean_train_loss=np.mean(train_loss)
        mean_val_loss=np.mean(validation_loss)

        print("-----------")
        print(f"Learning Rate: {lr}, Weight Decay: {wd}")
        print(f"Train_Loss ={mean_train_loss:0.4f}|| Validation_loss ={mean_val_loss:0.4f}|| Validation_accurancy ={mean_val_acc:.4f},")

        if mean_val_acc > best_acc:
            best_acc = mean_val_acc
            best_params = (lr, wd)
print("\n------------------------------")
print(f"Best Parameters: Learning Rate={best_params[0]}, Weight Decay={best_params[1]} with Validation Accuracy={best_acc:.4f}")

Hyper parameter Grid
Learning Rates: [0.0001, 0.001, 0.01]
Weight Decay: [0, 0.0001, 0.001, 0.01]
--------------------
Starting Grid Search CV...
-----------
Learning Rate: 0.0001, Weight Decay: 0
Train_Loss =0.6943|| Validation_loss =0.6939|| Validation_accurancy =0.4950,
-----------
Learning Rate: 0.0001, Weight Decay: 0.0001
Train_Loss =0.6935|| Validation_loss =0.6944|| Validation_accurancy =0.5215,
-----------
Learning Rate: 0.0001, Weight Decay: 0.001
Train_Loss =0.6939|| Validation_loss =0.6928|| Validation_accurancy =0.4860,
-----------
Learning Rate: 0.0001, Weight Decay: 0.01
Train_Loss =0.6932|| Validation_loss =0.6927|| Validation_accurancy =0.5230,
-----------
Learning Rate: 0.001, Weight Decay: 0
Train_Loss =0.6921|| Validation_loss =0.6920|| Validation_accurancy =0.5210,
-----------
Learning Rate: 0.001, Weight Decay: 0.0001
Train_Loss =0.6934|| Validation_loss =0.6917|| Validation_accurancy =0.5190,
-----------
Learning Rate: 0.001, Weight Decay: 0.001
Train_Loss =0.692

In [None]:
best_params

(0.01, 0.01)

In [None]:
# Final training on all data :
full_dataset = TensorDataset(X_raw, Y_raw)
full_loader = DataLoader(full_dataset, batch_size=50, shuffle=True)

# Initialize the final model with best hyperparameters:
final_model = SimpleConvNet() # Create a new instance of the model
loss_fn = nn.CrossEntropyLoss() # Define loss function
#final_optimizer = Adam(final_model.parameters(), lr=best_params[0], weight_decay=best_params[1])
final_optimizer = SGD(final_model.parameters(), lr=best_params[0], weight_decay=best_params[1])
# Train the final model on the entire dataset
#n_epochs = 50
n_epochs = 200
print(f"Training final model with LR={best_params[0]} and WD={best_params[1]} for {n_epochs} epochs")
print("------------")

for epoch in range(n_epochs):
    final_model.train()
    running_loss = 0.0

    for x_batch, y_batch in full_loader:
        final_optimizer.zero_grad()
        logits = final_model(x_batch)
        loss = loss_fn(logits, y_batch)
        loss.backward()
        final_optimizer.step()
        running_loss += loss.item() / len(full_loader)

    print(f"Epoch {epoch} \t Training Loss: {running_loss:.4f}")

print("Final model training complete.")

Training final model with LR=0.01 and WD=0.01 for 100 epochs
------------
Epoch 0 	 Training Loss: 0.6927
Epoch 1 	 Training Loss: 0.6907
Epoch 2 	 Training Loss: 0.6897
Epoch 3 	 Training Loss: 0.6890
Epoch 4 	 Training Loss: 0.6877
Epoch 5 	 Training Loss: 0.6867
Epoch 6 	 Training Loss: 0.6859
Epoch 7 	 Training Loss: 0.6850
Epoch 8 	 Training Loss: 0.6842
Epoch 9 	 Training Loss: 0.6823
Epoch 10 	 Training Loss: 0.6800
Epoch 11 	 Training Loss: 0.6790
Epoch 12 	 Training Loss: 0.6764
Epoch 13 	 Training Loss: 0.6733
Epoch 14 	 Training Loss: 0.6703
Epoch 15 	 Training Loss: 0.6667
Epoch 16 	 Training Loss: 0.6611
Epoch 17 	 Training Loss: 0.6546
Epoch 18 	 Training Loss: 0.6497
Epoch 19 	 Training Loss: 0.6404
Epoch 20 	 Training Loss: 0.6301
Epoch 21 	 Training Loss: 0.6144
Epoch 22 	 Training Loss: 0.5902
Epoch 23 	 Training Loss: 0.5684
Epoch 24 	 Training Loss: 0.5340
Epoch 25 	 Training Loss: 0.5034
Epoch 26 	 Training Loss: 0.4624
Epoch 27 	 Training Loss: 0.4294
Epoch 28 	 T

In [None]:
#Load and predict on Test Data
data_test = torch.load("/content/drive/MyDrive/Colab Notebooks/deepfake detector/hw2_test-1.pt")
X_test_raw= data_test[1]
Y_test_ids = data_test[0]

#X_test_emb = extract_features(X_test_raw)
final_model.eval()
with torch.no_grad():
    test_outputs = final_model(X_test_raw)
    predictions = torch.argmax(test_outputs, dim=1).numpy()

# Save to CSV file
df = pd.DataFrame({'id': Y_test_ids, 'label': predictions})
df.to_csv("predictions.csv", index=False)

# Conclusion:

1. The Best Model is:

- Simple Convolutional Net with 3 blocks and one MLP classifier at the end
  - With Hyperparameters
    - Learning Rate=0.001,
    - Weight Decay=0.0001
  - Dropout probability = 0.5
  - Adam optimizer
  - Yeilds
    - Validation Accuracy=0.9500
    - Test set accuracy 0.944

2. Why a Custom ConvNet Beats the Baseline:

- Since we are freezing the ResNet in the
"extract feature" function, i.e we don't backpropagate into the ResNet thus the overall model is not able to tune those existing convolutional filters to specifically detect deepfake artifacts. We are just using the ResNet for feature extraction purpose.

- ResNet pretrained on ImageNet focuses more on semantic features like:
ears,face,object shape. Not fake artifacts.

- Deepfake detection depends heavily on:
pixel noise, texture inconsistencies, local patterns.

  By directly training a simple ConvNet from scratch on the raw images, my model can learn completely new, custom filters that are 100% focused on finding those unique fake patterns, which is exactly why the simple ConvNet might outperform the pretrained ResNet in this case.


