In [63]:
import os
import glob
import pandas as pd
import numpy as np
import matplotlib.pyplot as plt
from PIL import Image
from sklearn.model_selection import train_test_split
from sklearn.metrics import classification_report, accuracy_score

import torch
import torch.nn as nn
import torch.optim as optim
from torch.utils.data import Dataset, DataLoader
from torchvision import transforms, models


In [67]:
# Go up one directory from notebooks/ to Final Dataset/
os.makedirs("../Final Dataset/Real", exist_ok=True)
os.makedirs("../Final Dataset/Fake", exist_ok=True)

# Grab all valid image files
image_exts = ('.jpg', '.jpeg', '.png')

real_images = [f for f in glob.glob("../Final Dataset/Real/*") if f.lower().endswith(image_exts)]
fake_images = [f for f in glob.glob("../Final Dataset/Fake/*") if f.lower().endswith(image_exts)]

print(f"Found {len(real_images)} Real images and {len(fake_images)} Fake images.")

# Build DataFrame
df_real = pd.DataFrame({'path': real_images, 'label': 'Real'})
df_fake = pd.DataFrame({'path': fake_images, 'label': 'Fake'})
df = pd.concat([df_real, df_fake]).reset_index(drop=True)

# Save to CSV
df.to_csv("../Final Dataset/dataset_rebuilt.csv", index=False)
df.head()


Found 5890 Real images and 7000 Fake images.


Unnamed: 0,path,label
0,../Final Dataset/Real/02725.jpg,Real
1,../Final Dataset/Real/real_433_aug_0.jpg,Real
2,../Final Dataset/Real/real_119_aug_0.jpg,Real
3,../Final Dataset/Real/02043.jpg,Real
4,../Final Dataset/Real/real_523_aug_2.jpg,Real


In [69]:
df = pd.read_csv("../Final Dataset/dataset_rebuilt.csv")

print("Label distribution:\n", df['label'].value_counts())

train_df, test_df = train_test_split(df, test_size=0.2, stratify=df['label'], random_state=42)
print(f"Train size: {len(train_df)}, Test size: {len(test_df)}")


Label distribution:
 label
Fake    7000
Real    5890
Name: count, dtype: int64
Train size: 10312, Test size: 2578


In [71]:
class DeepfakeDataset(Dataset):
    def __init__(self, dataframe, transform=None):
        self.dataframe = dataframe.reset_index(drop=True)
        self.transform = transform

    def __len__(self):
        return len(self.dataframe)

    def __getitem__(self, idx):
        img_path = self.dataframe.loc[idx, 'path']
        image = Image.open(img_path).convert("RGB")

        label_str = self.dataframe.loc[idx, 'label']
        label = 1 if label_str.lower() == 'fake' else 0

        if self.transform:
            image = self.transform(image)

        return image, label


In [73]:
transform = transforms.Compose([
    transforms.Resize((224, 224)),
    transforms.ToTensor(),
    transforms.Normalize([0.5]*3, [0.5]*3)
])

train_dataset = DeepfakeDataset(train_df, transform=transform)
test_dataset = DeepfakeDataset(test_df, transform=transform)

train_loader = DataLoader(train_dataset, batch_size=32, shuffle=True)
test_loader = DataLoader(test_dataset, batch_size=32, shuffle=False)


In [75]:
device = torch.device("cuda" if torch.cuda.is_available() else "cpu")

model = models.resnet50(pretrained=True)
model.fc = nn.Linear(model.fc.in_features, 2)  # binary classification
model = model.to(device)

criterion = nn.CrossEntropyLoss()
optimizer = optim.Adam(model.parameters(), lr=1e-4)




In [None]:
train_losses = []
val_accuracies = []

for epoch in range(5):  # increase for better performance
    model.train()
    running_loss = 0.0

    for images, labels in train_loader:
        images, labels = images.to(device), labels.to(device)

        optimizer.zero_grad()
        outputs = model(images)
        loss = criterion(outputs, labels)
        loss.backward()
        optimizer.step()

        running_loss += loss.item()

    train_losses.append(running_loss / len(train_loader))

    # Validate
    model.eval()
    preds, targets = [], []
    with torch.no_grad():
        for images, labels in test_loader:
            images = images.to(device)
            outputs = model(images)
            preds.extend(torch.argmax(outputs, 1).cpu().numpy())
            targets.extend(labels.numpy())

    acc = accuracy_score(targets, preds)
    val_accuracies.append(acc)
    print(f"Epoch {epoch+1}: Loss={running_loss:.4f}, Val Acc={acc:.4f}")
