In [None]:
import torch
import torch.nn as nn
import torch.optim as optim
from torch.utils.data import DataLoader
from torchvision import datasets, models, transforms
from tqdm import tqdm
import numpy as np
import json
import matplotlib.pyplot as plt
from sklearn.metrics import (
    confusion_matrix, classification_report, roc_curve, auc
)
import seaborn as sns
import os

In [16]:
from torchvision import datasets, transforms
from torch.utils.data import DataLoader

IMG_SIZE = 224  # ResNet50 default input size

train_transforms = transforms.Compose([
    transforms.Resize((IMG_SIZE, IMG_SIZE)),
    transforms.RandomRotation(5),
    transforms.RandomResizedCrop(IMG_SIZE, scale=(0.9, 1.0)),
    transforms.ColorJitter(brightness=0.2, contrast=0.2),
    transforms.RandomHorizontalFlip(),
    transforms.ToTensor(),
    transforms.Normalize([0.485, 0.456, 0.406],
                         [0.229, 0.224, 0.225])
])

val_transforms = transforms.Compose([
    transforms.Resize((IMG_SIZE, IMG_SIZE)),
    transforms.ToTensor(),
    transforms.Normalize([0.485, 0.456, 0.406],
                         [0.229, 0.224, 0.225])
])

# Directories
train_dir = "dataset/train"
val_dir = "dataset/val"
test_dir = "dataset/test"

# Datasets
train_dataset = datasets.ImageFolder(train_dir, transform=train_transforms)
val_dataset = datasets.ImageFolder(val_dir, transform=val_transforms)
test_dataset = datasets.ImageFolder(test_dir, transform=val_transforms)  # same transform as val

# DataLoaders
train_loader = DataLoader(train_dataset, batch_size=32, shuffle=True)
val_loader = DataLoader(val_dataset, batch_size=32, shuffle=False)
test_loader = DataLoader(test_dataset, batch_size=32, shuffle=False)

print(f"Classes: {train_dataset.classes}")
print(f"Train: {len(train_dataset)} | Val: {len(val_dataset)} | Test: {len(test_dataset)}")

Classes: ['authentic', 'forged']
Train: 1400 | Val: 300 | Test: 300


In [10]:
param_grid = {
    'learning_rate': [0.001, 0.01, 0.0001],
    'batch_size': [16, 32, 64],
    'optimizer': ['adam', 'sgd', 'adamw'],
    'weight_decay': [1e-4, 1e-3, 0],
    'dropout_rate': [0.3, 0.5, 0.7],
    'hidden_units': [128, 256, 512]
}

In [None]:
device = torch.device("cuda" if torch.cuda.is_available() else "cpu")

# Load pre-trained ResNet50
model = models.resnet50(weights='IMAGENET1K_V2')

# Freeze earlier layers (optional fine-tuning)
for param in model.parameters():
    param.requires_grad = False

# Replace final FC layer
num_features = model.fc.in_features
model.fc = nn.Sequential(
    nn.Linear(num_features, 256),
    nn.ReLU(),
    nn.Dropout(0.3),
    nn.Linear(256, 1),
    nn.Sigmoid()
)

model = model.to(device)

In [None]:
criterion = nn.BCEWithLogitsLoss()  # combines sigmoid + binary cross entropy
optimizer = torch.optim.AdamW(filter(lambda p: p.requires_grad, model.parameters()), lr=1e-4, weight_decay=1e-4)
scheduler = torch.optim.lr_scheduler.ReduceLROnPlateau(optimizer, mode='min', patience=3, factor=0.5)

In [None]:
EPOCHS = 10
SAVE_DIR = "saved_models"
os.makedirs(SAVE_DIR, exist_ok=True)

best_val_acc = 0.0

train_losses, val_losses = [], []
train_accs, val_accs = [], []

for epoch in range(EPOCHS):
    model.train()
    train_loss, correct, total = 0.0, 0, 0

    for imgs, labels in tqdm(train_loader, desc=f"Epoch {epoch+1}/{EPOCHS}"):
        imgs, labels = imgs.to(device), labels.float().unsqueeze(1).to(device)
        optimizer.zero_grad()

        outputs = model(imgs)
        loss = criterion(outputs, labels)
        loss.backward()
        optimizer.step()

        train_loss += loss.item()
        preds = torch.sigmoid(outputs)
        preds = (preds > 0.5).float()
        correct += (preds == labels).sum().item()
        total += labels.size(0)

    train_acc = correct / total
    train_losses.append(train_loss / len(train_loader))
    train_accs.append(train_acc)

    # -----------------------------
    # Validation phase
    # -----------------------------
    model.eval()
    val_loss, correct, total = 0.0, 0, 0
    with torch.no_grad():
        for imgs, labels in val_loader:
            imgs, labels = imgs.to(device), labels.float().unsqueeze(1).to(device)
            outputs = model(imgs)
            loss = criterion(outputs, labels)
            val_loss += loss.item()

            preds = torch.sigmoid(outputs)
            preds = (preds > 0.5).float()
            correct += (preds == labels).sum().item()
            total += labels.size(0)

    val_acc = correct / total
    val_losses.append(val_loss / len(val_loader))
    val_accs.append(val_acc)

    print(f"Epoch {epoch+1}/{EPOCHS} | Train Acc={train_acc:.3f} | Val Acc={val_acc:.3f} | Train Loss={train_loss/len(train_loader):.3f} | Val Loss={val_loss/len(val_loader):.3f}")

    # -----------------------------
    # Save current epoch model
    # -----------------------------
    model_path = os.path.join(SAVE_DIR, f"model_epoch_{epoch+1}.pth")
    torch.save(model.state_dict(), model_path)

    # -----------------------------
    # Save best model
    # -----------------------------
    if val_acc > best_val_acc:
        best_val_acc = val_acc
        best_model_path = os.path.join(SAVE_DIR, "best_model.pth")
        torch.save(model.state_dict(), best_model_path)
        print(f"🏆 Best model updated (Val Acc={val_acc:.3f})")

print("✅ Training complete.")

Epoch 1/10: 100%|██████████| 44/44 [03:35<00:00,  4.89s/it]


Epoch 1: Train Acc=0.701, Val Acc=0.670, Train Loss=0.642
✅ Model saved to saved_models\model_epoch_1.pth
🏆 Best model updated (Val Acc=0.670)


Epoch 2/10: 100%|██████████| 44/44 [03:12<00:00,  4.37s/it]


Epoch 2: Train Acc=0.819, Val Acc=0.837, Train Loss=0.541
✅ Model saved to saved_models\model_epoch_2.pth
🏆 Best model updated (Val Acc=0.837)


Epoch 3/10: 100%|██████████| 44/44 [03:08<00:00,  4.29s/it]


Epoch 3: Train Acc=0.806, Val Acc=0.823, Train Loss=0.474
✅ Model saved to saved_models\model_epoch_3.pth


Epoch 4/10: 100%|██████████| 44/44 [03:06<00:00,  4.23s/it]


Epoch 4: Train Acc=0.819, Val Acc=0.853, Train Loss=0.450
✅ Model saved to saved_models\model_epoch_4.pth
🏆 Best model updated (Val Acc=0.853)


Epoch 5/10: 100%|██████████| 44/44 [03:08<00:00,  4.29s/it]


Epoch 5: Train Acc=0.834, Val Acc=0.857, Train Loss=0.420
✅ Model saved to saved_models\model_epoch_5.pth
🏆 Best model updated (Val Acc=0.857)


Epoch 6/10: 100%|██████████| 44/44 [03:07<00:00,  4.26s/it]


Epoch 6: Train Acc=0.824, Val Acc=0.857, Train Loss=0.420
✅ Model saved to saved_models\model_epoch_6.pth


Epoch 7/10: 100%|██████████| 44/44 [03:19<00:00,  4.53s/it]


Epoch 7: Train Acc=0.826, Val Acc=0.863, Train Loss=0.402
✅ Model saved to saved_models\model_epoch_7.pth
🏆 Best model updated (Val Acc=0.863)


Epoch 8/10: 100%|██████████| 44/44 [03:06<00:00,  4.24s/it]


Epoch 8: Train Acc=0.825, Val Acc=0.853, Train Loss=0.408
✅ Model saved to saved_models\model_epoch_8.pth


Epoch 9/10: 100%|██████████| 44/44 [03:09<00:00,  4.32s/it]


Epoch 9: Train Acc=0.834, Val Acc=0.867, Train Loss=0.401
✅ Model saved to saved_models\model_epoch_9.pth
🏆 Best model updated (Val Acc=0.867)


Epoch 10/10: 100%|██████████| 44/44 [03:08<00:00,  4.28s/it]


Epoch 10: Train Acc=0.826, Val Acc=0.863, Train Loss=0.409
✅ Model saved to saved_models\model_epoch_10.pth


In [None]:
def evaluate_and_save(model, test_loader, criterion, device, save_dir="evaluation_results"):
    """
    Evaluate the model on test data and save all results (plots + metrics).
    """

    os.makedirs(save_dir, exist_ok=True)

    model.eval()
    test_loss, correct, total = 0.0, 0, 0
    all_labels, all_preds, all_probs = [], [], []

    with torch.no_grad():
        for imgs, labels in tqdm(test_loader, desc="Evaluating"):
            imgs, labels = imgs.to(device), labels.float().unsqueeze(1).to(device)
            outputs = model(imgs)

            # ✅ Use sigmoid consistently for binary output
            probs = torch.sigmoid(outputs).cpu().numpy().flatten()
            preds = (probs > 0.5).astype(int)
            labels_np = labels.cpu().numpy().flatten()

            loss = criterion(torch.tensor(probs).unsqueeze(1).to(device), labels)
            test_loss += loss.item()

            all_probs.extend(probs)
            all_preds.extend(preds)
            all_labels.extend(labels_np)

            correct += (preds == labels_np).sum().item()
            total += labels_np.shape[0]

    test_loss /= len(test_loader)
    test_acc = correct / total

    print(f"\n🧪 Test Loss: {test_loss:.4f} | Test Accuracy: {test_acc:.4f}")

    # -----------------------------------
    # Classification Report
    # -----------------------------------
    report = classification_report(all_labels, all_preds, target_names=["Authentic", "Forged"], output_dict=True)
    print("\n📊 Classification Report:")
    print(classification_report(all_labels, all_preds, target_names=["Authentic", "Forged"]))

    report_path = os.path.join(save_dir, "classification_report.txt")
    with open(report_path, "w") as f:
        f.write(classification_report(all_labels, all_preds, target_names=["Authentic", "Forged"]))
    print(f"📝 Classification report saved to {report_path}")

    # -----------------------------------
    # Confusion Matrix
    # -----------------------------------
    cm = confusion_matrix(all_labels, all_preds)
    plt.figure(figsize=(5, 4))
    sns.heatmap(cm, annot=True, fmt="d", cmap="Blues",
                xticklabels=["Authentic", "Forged"],
                yticklabels=["Authentic", "Forged"])
    plt.xlabel("Predicted Label")
    plt.ylabel("True Label")
    plt.title("Confusion Matrix")
    cm_path = os.path.join(save_dir, "confusion_matrix.png")
    plt.savefig(cm_path, bbox_inches="tight")
    plt.close()
    print(f"🖼️ Confusion matrix saved to {cm_path}")

    # -----------------------------------
    # ROC Curve and AUC
    # -----------------------------------
    fpr, tpr, _ = roc_curve(all_labels, all_probs)
    roc_auc = auc(fpr, tpr)

    plt.figure(figsize=(5, 4))
    plt.plot(fpr, tpr, color="darkorange", lw=2, label=f"ROC Curve (AUC = {roc_auc:.3f})")
    plt.plot([0, 1], [0, 1], color="gray", lw=1, linestyle="--")
    plt.xlabel("False Positive Rate")
    plt.ylabel("True Positive Rate")
    plt.title("ROC Curve for Forgery Detection")
    plt.legend(loc="lower right")

    roc_path = os.path.join(save_dir, "roc_curve.png")
    plt.savefig(roc_path, bbox_inches="tight")
    plt.close()
    print(f"📉 ROC curve saved to {roc_path}")

    # -----------------------------------
    # Save numeric results
    # -----------------------------------
    results = {
        "test_loss": float(test_loss),
        "test_accuracy": float(test_acc),
        "roc_auc": float(roc_auc),
        "precision_authentic": report["Authentic"]["precision"],
        "recall_authentic": report["Authentic"]["recall"],
        "f1_authentic": report["Authentic"]["f1-score"],
        "precision_forged": report["Forged"]["precision"],
        "recall_forged": report["Forged"]["recall"],
        "f1_forged": report["Forged"]["f1-score"]
    }

    results_path = os.path.join(save_dir, "metrics.json")
    with open(results_path, "w") as f:
        json.dump(results, f, indent=4)
    print(f"📦 Metrics saved to {results_path}")

    print("\n✅ Evaluation complete. All results saved in:", os.path.abspath(save_dir))

    return test_loss, test_acc, roc_auc

In [17]:
# Load best model
model.load_state_dict(torch.load("saved_models/best_model.pth"))
model.to(device)

# Run detailed evaluation
test_loss, test_acc, roc_auc = evaluate_and_save(model, test_loader, criterion, device)

Evaluating: 100%|██████████| 10/10 [00:44<00:00,  4.42s/it]



🧪 Test Loss: 0.4044 | Test Accuracy: 0.8400

📊 Classification Report:
              precision    recall  f1-score   support

   Authentic       0.77      0.96      0.86       150
      Forged       0.95      0.72      0.82       150

    accuracy                           0.84       300
   macro avg       0.86      0.84      0.84       300
weighted avg       0.86      0.84      0.84       300

📝 Classification report saved to evaluation_results\classification_report.txt
🖼️ Confusion matrix saved to evaluation_results\confusion_matrix.png
📉 ROC curve saved to evaluation_results\roc_curve.png
📦 Metrics saved to evaluation_results\metrics.json

✅ Evaluation complete. All results saved in: c:\Users\PC\Desktop\Apps\DocuForge\evaluation_results
