In [28]:
import torch
import torch.nn as nn
import torch.optim as optim
from torch.utils.data import DataLoader
from torchvision import datasets, models, transforms
from torch.nn import functional as F
import shutil
from tqdm import tqdm
import numpy as np
import json
import matplotlib.pyplot as plt
from sklearn.metrics import (
    ConfusionMatrixDisplay, confusion_matrix, classification_report, roc_curve, auc
)
import seaborn as sns
import os
import time

In [2]:
from google.colab import drive
drive.mount('/content/drive')

drive_dataset_path = '/content/drive/MyDrive/DocuForge/dataset'
local_dataset_path = '/content/dataset'

# Function to copy dataset with progress
def copy_dataset(src, dst):
    if not os.path.exists(dst):
        os.makedirs(dst)

    for root, dirs, files in os.walk(src):
        # Recreate directory structure
        rel_path = os.path.relpath(root, src)
        dest_dir = os.path.join(dst, rel_path)
        os.makedirs(dest_dir, exist_ok=True)

        # Copy files with progress bar
        for file in tqdm(files, desc=f"Copying {rel_path}", unit="file"):
            src_file = os.path.join(root, file)
            dest_file = os.path.join(dest_dir, file)
            if not os.path.exists(dest_file):
                shutil.copy2(src_file, dest_file)

# Run it
copy_dataset(drive_dataset_path, local_dataset_path)

print("✅ Dataset copied successfully!")

Mounted at /content/drive


Copying .: 0file [00:00, ?file/s]

Copying test: 0file [00:00, ?file/s]

Copying test/authentic:   0%|          | 0/300 [00:00<?, ?file/s]

Copying test/forged:   0%|          | 0/300 [00:00<?, ?file/s]

Copying train: 0file [00:00, ?file/s]

Copying train/forged:   0%|          | 0/1400 [00:00<?, ?file/s]

Copying train/authentic:   0%|          | 0/1400 [00:00<?, ?file/s]

Copying val: 0file [00:00, ?file/s]

Copying val/authentic:   0%|          | 0/300 [00:00<?, ?file/s]

Copying val/forged:   0%|          | 0/300 [00:00<?, ?file/s]

✅ Dataset copied successfully!


In [29]:
data_path = "/content/dataset"

IMG_SIZE = 224
BATCH_SIZE = 64
NUM_WORKERS = 2
DEVICE = torch.device("cuda" if torch.cuda.is_available() else "cpu")
print("Device:", DEVICE)

Device: cuda


In [30]:
# -------------------------------
# 3. Transforms & Datasets
# -------------------------------
train_transforms = transforms.Compose([
    transforms.Resize((IMG_SIZE, IMG_SIZE)),
    transforms.RandomRotation(15),
    transforms.RandomResizedCrop(IMG_SIZE, scale=(0.8, 1.0)),
    transforms.ColorJitter(brightness=0.4, contrast=0.4, saturation=0.3),
    transforms.RandomAffine(degrees=0, translate=(0.1, 0.1)),
    transforms.GaussianBlur(3, sigma=(0.1, 1.0)),
    transforms.RandomHorizontalFlip(),
    transforms.ToTensor(),
    transforms.Normalize([0.485, 0.456, 0.406],
                         [0.229, 0.224, 0.225])
])

val_transforms = transforms.Compose([
    transforms.Resize((IMG_SIZE, IMG_SIZE)),
    transforms.ToTensor(),
    transforms.Normalize([0.485, 0.456, 0.406],
                         [0.229, 0.224, 0.225])
])

train_dir = os.path.join(data_path, 'train')
val_dir = os.path.join(data_path, 'val')
test_dir = os.path.join(data_path, 'test')

train_dataset = datasets.ImageFolder(train_dir, transform=train_transforms)
val_dataset = datasets.ImageFolder(val_dir, transform=val_transforms)
test_dataset = datasets.ImageFolder(test_dir, transform=val_transforms)

train_loader = DataLoader(train_dataset, batch_size=BATCH_SIZE, shuffle=True, pin_memory=True, num_workers=NUM_WORKERS)
val_loader = DataLoader(val_dataset, batch_size=BATCH_SIZE, shuffle=False, pin_memory=True, num_workers=NUM_WORKERS)
test_loader = DataLoader(test_dataset, batch_size=BATCH_SIZE, shuffle=False, pin_memory=True, num_workers=NUM_WORKERS)

print(f"Classes: {train_dataset.classes}")
print(f"Train: {len(train_dataset)} | Val: {len(val_dataset)} | Test: {len(test_dataset)}")

Classes: ['authentic', 'forged']
Train: 2800 | Val: 600 | Test: 600


In [31]:
# -------------------------------
# 4. Model setup (ResNet50)
# -------------------------------
model = models.resnet50(weights='IMAGENET1K_V2')

# Replace final FC for binary classification (logit output)
num_features = model.fc.in_features
model.fc = nn.Sequential(
    nn.Linear(num_features, 256),
    nn.ReLU(),
    nn.Dropout(0.3),
    nn.Linear(256, 1),
)

model = model.to(DEVICE)

In [32]:
for name, param in model.named_parameters():
    if "layer2" in name or "layer3" in name or "layer4" in name or "fc" in name:
        param.requires_grad = True
    else:
        param.requires_grad = False

trainable_params = sum(p.numel() for p in model.parameters() if p.requires_grad)
total_params = sum(p.numel() for p in model.parameters())
print(f"Trainable parameters: {trainable_params}/{total_params}")

Trainable parameters: 23807489/24032833


In [33]:
# criterion = nn.BCEWithLogitsLoss()  # balanced dataset -> no pos_weight by default

class FocalLoss(nn.Module):
    def __init__(self, alpha=1, gamma=2, logits=True):
        super().__init__()
        self.alpha = alpha
        self.gamma = gamma
        self.logits = logits
    def forward(self, inputs, targets):
        if self.logits:
            bce = F.binary_cross_entropy_with_logits(inputs, targets, reduction='none')
        else:
            bce = F.binary_cross_entropy(inputs, targets, reduction='none')
        pt = torch.exp(-bce)
        loss = self.alpha * (1 - pt) ** self.gamma * bce
        return loss.mean()
criterion = FocalLoss(alpha=1, gamma=2, logits=True)

In [34]:
HEAD_EPOCHS = 5

# Freeze all conv layers (train only fc)
for name, param in model.named_parameters():
    param.requires_grad = False
for param in model.fc.parameters():
    param.requires_grad = True

trainable_params = sum(p.numel() for p in model.parameters() if p.requires_grad)
total_params = sum(p.numel() for p in model.parameters())
print(f"[Phase1] Trainable parameters: {trainable_params}/{total_params}")

[Phase1] Trainable parameters: 524801/24032833


In [35]:
optimizer = torch.optim.AdamW(filter(lambda p: p.requires_grad, model.parameters()), lr=1e-4, weight_decay=1e-4)
scheduler = torch.optim.lr_scheduler.ReduceLROnPlateau(optimizer, mode='min', patience=2, factor=0.5)

In [36]:
SAVE_DIR = "saved_models"
os.makedirs(SAVE_DIR, exist_ok=True)
best_val_acc = 0.0

In [37]:
def run_epoch(model, loader, optimizer=None, criterion=None, train=False, device=DEVICE):
    if train:
        model.train()
    else:
        model.eval()

    running_loss = 0.0
    correct = 0
    total = 0

    if train:
        iterator = tqdm(loader, desc="Train" if train else "Eval")
    else:
        iterator = tqdm(loader, desc="Eval")

    with torch.set_grad_enabled(train):
        for imgs, labels in iterator:
            imgs = imgs.to(device)
            labels = labels.float().unsqueeze(1).to(device)

            outputs = model(imgs)  # logits
            loss = criterion(outputs, labels)
            if train:
                optimizer.zero_grad()
                loss.backward()
                optimizer.step()

            running_loss += loss.item() * imgs.size(0)
            probs = torch.sigmoid(outputs)
            preds = (probs > 0.5).float()
            correct += (preds == labels).sum().item()
            total += labels.size(0)

    avg_loss = running_loss / total
    acc = correct / total
    return avg_loss, acc

In [38]:
# -------------------------------
# Phase 1: Train head only
# -------------------------------
train_losses, val_losses = [], []
train_accs, val_accs = [], []

print("=== Phase 1: training head-only ===")
for epoch in range(HEAD_EPOCHS):
    t0 = time.time()
    train_loss, train_acc = run_epoch(model, train_loader, optimizer=optimizer, criterion=criterion, train=True)
    val_loss, val_acc = run_epoch(model, val_loader, optimizer=None, criterion=criterion, train=False)

    train_losses.append(train_loss)
    val_losses.append(val_loss)
    train_accs.append(train_acc)
    val_accs.append(val_acc)

    print(f"Head Epoch {epoch+1}/{HEAD_EPOCHS} | Train Acc={train_acc:.3f} | Val Acc={val_acc:.3f} | Train Loss={train_loss:.3f} | Val Loss={val_loss:.3f} | time={time.time()-t0:.1f}s")

    # Save current epoch
    torch.save(model.state_dict(), os.path.join(SAVE_DIR, f"head_epoch_{epoch+1}.pth"))

    # Save best
    if val_acc > best_val_acc:
        best_val_acc = val_acc
        torch.save(model.state_dict(), os.path.join(SAVE_DIR, "best_model.pth"))
        print(f"🏆 Best head model updated (Val Acc={val_acc:.3f})")

    # Scheduler step (ReduceLROnPlateau expects metric)
    scheduler.step(val_loss)

print("=== Phase 1 complete ===")
print(f"Best val acc so far: {best_val_acc:.3f}")

=== Phase 1: training head-only ===


Train: 100%|██████████| 44/44 [01:00<00:00,  1.38s/it]
Eval: 100%|██████████| 10/10 [00:10<00:00,  1.05s/it]


Head Epoch 1/5 | Train Acc=0.739 | Val Acc=0.667 | Train Loss=0.146 | Val Loss=0.155 | time=71.4s
🏆 Best head model updated (Val Acc=0.667)


Train: 100%|██████████| 44/44 [01:02<00:00,  1.42s/it]
Eval: 100%|██████████| 10/10 [00:11<00:00,  1.17s/it]


Head Epoch 2/5 | Train Acc=0.814 | Val Acc=0.843 | Train Loss=0.116 | Val Loss=0.107 | time=74.1s
🏆 Best head model updated (Val Acc=0.843)


Train: 100%|██████████| 44/44 [01:00<00:00,  1.37s/it]
Eval: 100%|██████████| 10/10 [00:09<00:00,  1.08it/s]


Head Epoch 3/5 | Train Acc=0.814 | Val Acc=0.847 | Train Loss=0.108 | Val Loss=0.102 | time=69.7s
🏆 Best head model updated (Val Acc=0.847)


Train: 100%|██████████| 44/44 [01:00<00:00,  1.38s/it]
Eval: 100%|██████████| 10/10 [00:08<00:00,  1.12it/s]


Head Epoch 4/5 | Train Acc=0.830 | Val Acc=0.857 | Train Loss=0.104 | Val Loss=0.099 | time=69.5s
🏆 Best head model updated (Val Acc=0.857)


Train: 100%|██████████| 44/44 [01:01<00:00,  1.41s/it]
Eval: 100%|██████████| 10/10 [00:11<00:00,  1.18s/it]


Head Epoch 5/5 | Train Acc=0.833 | Val Acc=0.860 | Train Loss=0.101 | Val Loss=0.097 | time=73.7s
🏆 Best head model updated (Val Acc=0.860)
=== Phase 1 complete ===
Best val acc so far: 0.860


In [39]:
# -------------------------------
# Phase 2: Unfreeze deeper layers and fine-tune
# -------------------------------
FT_EPOCHS = 10
print("=== Phase 2: unfreezing layer2, layer3, layer4 and fine-tuning ===")

# Unfreeze layer2, layer3, layer4 and fc
for name, param in model.named_parameters():
    if ("layer2" in name) or ("layer3" in name) or ("layer4" in name) or ("fc" in name):
        param.requires_grad = True
    else:
        param.requires_grad = False

trainable_params = sum(p.numel() for p in model.parameters() if p.requires_grad)
print(f"[Phase2] Trainable parameters: {trainable_params}/{total_params}")

=== Phase 2: unfreezing layer2, layer3, layer4 and fine-tuning ===
[Phase2] Trainable parameters: 23807489/24032833


In [40]:
# Re-create optimizer with separate LRs: smaller for pretrained convs, higher for fc
fc_params = list(model.fc.parameters())
pretrained_params = [p for n, p in model.named_parameters() if p.requires_grad and "fc" not in n]

optimizer = torch.optim.AdamW([
    {"params": pretrained_params, "lr": 1e-5},
    {"params": fc_params, "lr": 1e-4}
], weight_decay=1e-4)

# Option: reduce all LRs a bit for gentle fine-tuning
for g in optimizer.param_groups:
    g["lr"] = g["lr"] * 0.5

# Use ReduceLROnPlateau for stability
scheduler = torch.optim.lr_scheduler.ReduceLROnPlateau(optimizer, mode='min', patience=2, factor=0.3)

In [41]:
# Continue training
for epoch in range(FT_EPOCHS):
    t0 = time.time()
    train_loss, train_acc = run_epoch(model, train_loader, optimizer=optimizer, criterion=criterion, train=True)
    val_loss, val_acc = run_epoch(model, val_loader, optimizer=None, criterion=criterion, train=False)

    train_losses.append(train_loss)
    val_losses.append(val_loss)
    train_accs.append(train_acc)
    val_accs.append(val_acc)

    print(f"FT Epoch {epoch+1}/{FT_EPOCHS} | Train Acc={train_acc:.3f} | Val Acc={val_acc:.3f} | Train Loss={train_loss:.3f} | Val Loss={val_loss:.3f} | time={time.time()-t0:.1f}s")

    # Save checkpoint
    torch.save(model.state_dict(), os.path.join(SAVE_DIR, f"ft_epoch_{epoch+1}.pth"))

    if val_acc > best_val_acc:
        best_val_acc = val_acc
        torch.save(model.state_dict(), os.path.join(SAVE_DIR, "best_model.pth"))
        print(f"🏆 Best model updated (Val Acc={val_acc:.3f})")

    scheduler.step(val_loss)

print("✅ Two-phase training complete.")
print(f"Final best val acc: {best_val_acc:.3f}")

Train: 100%|██████████| 44/44 [01:05<00:00,  1.50s/it]
Eval: 100%|██████████| 10/10 [00:09<00:00,  1.00it/s]


FT Epoch 1/10 | Train Acc=0.833 | Val Acc=0.865 | Train Loss=0.100 | Val Loss=0.093 | time=75.9s
🏆 Best model updated (Val Acc=0.865)


Train: 100%|██████████| 44/44 [01:04<00:00,  1.47s/it]
Eval: 100%|██████████| 10/10 [00:10<00:00,  1.05s/it]


FT Epoch 2/10 | Train Acc=0.846 | Val Acc=0.868 | Train Loss=0.097 | Val Loss=0.094 | time=75.1s
🏆 Best model updated (Val Acc=0.868)


Train: 100%|██████████| 44/44 [01:07<00:00,  1.54s/it]
Eval: 100%|██████████| 10/10 [00:09<00:00,  1.01it/s]


FT Epoch 3/10 | Train Acc=0.844 | Val Acc=0.878 | Train Loss=0.096 | Val Loss=0.091 | time=77.8s
🏆 Best model updated (Val Acc=0.878)


Train: 100%|██████████| 44/44 [01:06<00:00,  1.50s/it]
Eval: 100%|██████████| 10/10 [00:09<00:00,  1.09it/s]


FT Epoch 4/10 | Train Acc=0.846 | Val Acc=0.882 | Train Loss=0.093 | Val Loss=0.090 | time=75.3s
🏆 Best model updated (Val Acc=0.882)


Train: 100%|██████████| 44/44 [01:06<00:00,  1.50s/it]
Eval: 100%|██████████| 10/10 [00:10<00:00,  1.06s/it]


FT Epoch 5/10 | Train Acc=0.851 | Val Acc=0.885 | Train Loss=0.090 | Val Loss=0.087 | time=76.7s
🏆 Best model updated (Val Acc=0.885)


Train: 100%|██████████| 44/44 [01:08<00:00,  1.55s/it]
Eval: 100%|██████████| 10/10 [00:10<00:00,  1.05s/it]


FT Epoch 6/10 | Train Acc=0.859 | Val Acc=0.885 | Train Loss=0.089 | Val Loss=0.088 | time=78.6s


Train: 100%|██████████| 44/44 [01:06<00:00,  1.51s/it]
Eval: 100%|██████████| 10/10 [00:09<00:00,  1.10it/s]


FT Epoch 7/10 | Train Acc=0.849 | Val Acc=0.880 | Train Loss=0.090 | Val Loss=0.088 | time=75.5s


Train: 100%|██████████| 44/44 [01:06<00:00,  1.52s/it]
Eval: 100%|██████████| 10/10 [00:10<00:00,  1.03s/it]


FT Epoch 8/10 | Train Acc=0.858 | Val Acc=0.883 | Train Loss=0.089 | Val Loss=0.087 | time=77.1s


Train: 100%|██████████| 44/44 [01:08<00:00,  1.55s/it]
Eval: 100%|██████████| 10/10 [00:10<00:00,  1.04s/it]


FT Epoch 9/10 | Train Acc=0.867 | Val Acc=0.885 | Train Loss=0.086 | Val Loss=0.086 | time=78.8s


Train: 100%|██████████| 44/44 [01:06<00:00,  1.50s/it]
Eval: 100%|██████████| 10/10 [00:08<00:00,  1.12it/s]


FT Epoch 10/10 | Train Acc=0.863 | Val Acc=0.888 | Train Loss=0.087 | Val Loss=0.083 | time=74.9s
🏆 Best model updated (Val Acc=0.888)
✅ Two-phase training complete.
Final best val acc: 0.888


In [42]:
# -------------------------------
# 7. Evaluation & saving metrics
# -------------------------------
def evaluate_and_save(model, test_loader, criterion, device, save_dir="evaluation_results", threshold=0.5):
    os.makedirs(save_dir, exist_ok=True)

    model.eval()
    test_loss, correct, total = 0.0, 0, 0
    all_labels, all_preds, all_probs = [], [], []

    with torch.no_grad():
        for imgs, labels in tqdm(test_loader, desc="Evaluating"):
            imgs = imgs.to(device)
            labels = labels.float().unsqueeze(1).to(device)
            outputs = model(imgs)

            loss = criterion(outputs, labels)
            test_loss += loss.item() * imgs.size(0)

            probs = torch.sigmoid(outputs).cpu().numpy().flatten()
            preds = (probs > threshold).astype(int)
            labels_np = labels.cpu().numpy().flatten()

            all_probs.extend(probs.tolist())
            all_preds.extend(preds.tolist())
            all_labels.extend(labels_np.tolist())

            correct += (preds == labels_np).sum().item()
            total += labels_np.shape[0]

    test_loss = test_loss / total
    test_acc = correct / total

    print(f"\n🧪 Test Loss: {test_loss:.4f} | Test Accuracy: {test_acc:.4f}")

    report = classification_report(all_labels, all_preds, target_names=["Authentic", "Forged"], output_dict=True)
    print("\n📊 Classification Report:")
    print(classification_report(all_labels, all_preds, target_names=["Authentic", "Forged"]))

    # Save report
    with open(os.path.join(save_dir, "classification_report.txt"), "w") as f:
        f.write(classification_report(all_labels, all_preds, target_names=["Authentic", "Forged"]))

    # Confusion matrix
    cm = confusion_matrix(all_labels, all_preds)
    plt.figure(figsize=(5, 4))
    sns.heatmap(cm, annot=True, fmt="d", cmap="Blues",
                xticklabels=["Authentic", "Forged"],
                yticklabels=["Authentic", "Forged"])
    plt.xlabel("Predicted Label")
    plt.ylabel("True Label")
    plt.title("Confusion Matrix")
    cm_path = os.path.join(save_dir, "confusion_matrix.png")
    plt.savefig(cm_path, bbox_inches="tight")
    plt.close()
    print(f"🖼️ Confusion matrix saved to {cm_path}")

    # ROC
    fpr, tpr, _ = roc_curve(all_labels, all_probs)
    roc_auc = auc(fpr, tpr)
    plt.figure(figsize=(5, 4))
    plt.plot(fpr, tpr, lw=2, label=f"AUC = {roc_auc:.3f}")
    plt.plot([0, 1], [0, 1], linestyle="--", color="gray")
    plt.xlabel("False Positive Rate")
    plt.ylabel("True Positive Rate")
    plt.title("ROC Curve")
    plt.legend()
    roc_path = os.path.join(save_dir, "roc_curve.png")
    plt.savefig(roc_path, bbox_inches="tight")
    plt.close()
    print(f"📉 ROC curve saved to {roc_path}")

    # Save metrics JSON
    results = {
        "test_loss": float(test_loss),
        "test_accuracy": float(test_acc),
        "roc_auc": float(roc_auc),
        "precision_authentic": report["Authentic"]["precision"],
        "recall_authentic": report["Authentic"]["recall"],
        "f1_authentic": report["Authentic"]["f1-score"],
        "precision_forged": report["Forged"]["precision"],
        "recall_forged": report["Forged"]["recall"],
        "f1_forged": report["Forged"]["f1-score"]
    }
    with open(os.path.join(save_dir, "metrics.json"), "w") as f:
        json.dump(results, f, indent=4)

    print(f"📦 Metrics saved to {os.path.join(save_dir, 'metrics.json')}")
    return test_loss, test_acc, roc_auc, all_labels, all_probs, all_preds

In [43]:
# Load best model and evaluate
best_path = os.path.join(SAVE_DIR, "best_model.pth")
model.load_state_dict(torch.load(best_path))
model.to(DEVICE)

test_loss, test_acc, roc_auc, all_labels, all_probs, all_preds = evaluate_and_save(model, test_loader, criterion, DEVICE)

Evaluating: 100%|██████████| 10/10 [00:10<00:00,  1.05s/it]



🧪 Test Loss: 0.1079 | Test Accuracy: 0.8317

📊 Classification Report:
              precision    recall  f1-score   support

   Authentic       0.76      0.98      0.85       300
      Forged       0.97      0.69      0.80       300

    accuracy                           0.83       600
   macro avg       0.86      0.83      0.83       600
weighted avg       0.86      0.83      0.83       600

🖼️ Confusion matrix saved to evaluation_results/confusion_matrix.png
📉 ROC curve saved to evaluation_results/roc_curve.png
📦 Metrics saved to evaluation_results/metrics.json


In [44]:
# -------------------------------
# 8. Threshold tuning helper
# -------------------------------
from sklearn.metrics import precision_recall_curve

precisions, recalls, thresholds = precision_recall_curve(all_labels, all_probs)
# compute F1 for thresholds (thresholds array length = len(precisions)-1)
f1_scores = []
thr_list = thresholds.tolist()
for t in thr_list:
    preds_t = (np.array(all_probs) > t).astype(int)
    report_t = classification_report(all_labels, preds_t, output_dict=True)
    f1_scores.append(report_t["Forged"]["f1-score"])

best_idx = int(np.argmax(f1_scores))
best_thr = thr_list[best_idx]
print(f"\nBest threshold (w.r.t Forged F1) found ≈ {best_thr:.3f} with Forged F1 = {f1_scores[best_idx]:.3f}")

# Quick plot (optional)
plt.figure(figsize=(6,4))
plt.plot(thr_list, f1_scores, marker='.')
plt.xlabel("Threshold")
plt.ylabel("Forged F1-score")
plt.title("Threshold vs Forged F1")
plt.grid(True)
plt.show()

print("Notebook finished. Best model:", best_path)

KeyError: 'Forged'