In [1]:
import os

import numpy as np
import matplotlib.pyplot as plt
import seaborn as sns
import pandas as pd

import torch
import torch.nn as nn
import torch.nn.functional as F
import torchvision.models as models
from torchvision.datasets import ImageFolder
from torchvision import transforms
import torch.optim as optim
from torch.utils.data import DataLoader, random_split

from sklearn.metrics import classification_report, confusion_matrix
from sklearn.utils.class_weight import compute_class_weight

In [2]:
# Step 1: Set up transformations for the images

train_transform= transforms.Compose([
    transforms.RandomRotation(5),
    transforms.RandomResizedCrop(224, scale=(0.8, 1.0)),
    transforms.ColorJitter(brightness=0.2, contrast=0.2),
    transforms.RandomHorizontalFlip(),
    transforms.ToTensor(),
    transforms.Normalize([0.485, 0.456, 0.406], [0.229, 0.224, 0.225])
])

test_transform = transforms.Compose([
    transforms.Resize((224, 224)),  # Resize to fit model input
    transforms.ToTensor(),
    transforms.Normalize(mean=[0.485, 0.456, 0.406], std=[0.229, 0.224, 0.225]),  # Standard ImageNet normalization
])


# Step 2: Load the dataset using ImageFolder
train_dataset = ImageFolder(root="../dataset/train", transform=train_transform)
test_dataset = ImageFolder(root="../dataset/test", transform=test_transform)
out_of_sample_dataset = ImageFolder(root="../dataset/out_of_sample_test", transform=test_transform)

# Creating vaidation set
train_size = int(0.9 * len(train_dataset))
val_size = len(train_dataset) - train_size
train_subset, val_subset = random_split(train_dataset, [train_size, val_size])

# Create DataLoader for batching and shuffling
train_loader = DataLoader(train_subset, batch_size=16, shuffle=True)
val_loader = DataLoader(val_subset, batch_size=16, shuffle=False)
test_loader = DataLoader(test_dataset, batch_size=16, shuffle=False)
out_of_sample_loader = DataLoader(out_of_sample_dataset, batch_size=16, shuffle=False)

# Create your custom CNN model
model = models.resnet50(pretrained=True)
model.fc = nn.Linear(model.fc.in_features, 3)

# Move the model to GPU if available
device = torch.device("cuda" if torch.cuda.is_available() else "cpu")
model.to(device)



ResNet(
  (conv1): Conv2d(3, 64, kernel_size=(7, 7), stride=(2, 2), padding=(3, 3), bias=False)
  (bn1): BatchNorm2d(64, eps=1e-05, momentum=0.1, affine=True, track_running_stats=True)
  (relu): ReLU(inplace=True)
  (maxpool): MaxPool2d(kernel_size=3, stride=2, padding=1, dilation=1, ceil_mode=False)
  (layer1): Sequential(
    (0): Bottleneck(
      (conv1): Conv2d(64, 64, kernel_size=(1, 1), stride=(1, 1), bias=False)
      (bn1): BatchNorm2d(64, eps=1e-05, momentum=0.1, affine=True, track_running_stats=True)
      (conv2): Conv2d(64, 64, kernel_size=(3, 3), stride=(1, 1), padding=(1, 1), bias=False)
      (bn2): BatchNorm2d(64, eps=1e-05, momentum=0.1, affine=True, track_running_stats=True)
      (conv3): Conv2d(64, 256, kernel_size=(1, 1), stride=(1, 1), bias=False)
      (bn3): BatchNorm2d(256, eps=1e-05, momentum=0.1, affine=True, track_running_stats=True)
      (relu): ReLU(inplace=True)
      (downsample): Sequential(
        (0): Conv2d(64, 256, kernel_size=(1, 1), stride=(1, 

In [3]:
# Step 4: Define the loss function and optimizer
# Calculate class weights inverse to their frequency

# Get all training labels
all_labels = []
for _, labels in train_loader:
    all_labels.extend(labels.cpu().numpy())

# Compute weights
class_weights = compute_class_weight('balanced', classes=np.unique(all_labels), y=all_labels)
class_weights = torch.tensor(class_weights, dtype=torch.float).to(device)

criterion = nn.CrossEntropyLoss(weight=class_weights)
optimizer = optim.Adam(model.parameters(), lr=1.2526746729247135e-05, weight_decay=0.0007394206084128669)

# Create a scheduler to reduce learning rate when validation loss plateaus
scheduler = torch.optim.lr_scheduler.ReduceLROnPlateau(
    optimizer, mode='min', factor=0.13777011736928238, patience=5, verbose=True
)

# Initialize lists to track metrics
train_losses = []
train_accuracies = []
val_accuracies = []
best_val_accuracy = 0.0

# Step 5: Train the model with validation after each epoch
num_epochs = 40

def evaluate_model(model, data_loader, device, class_names):
    """
    Evaluate model performance with detailed metrics
    """
    model.eval()
    
    all_preds = []
    all_labels = []
    correct = 0
    total = 0
    
    class_correct = [0] * len(class_names)
    class_total = [0] * len(class_names)
    
    with torch.no_grad():
        for inputs, labels in data_loader:
            inputs, labels = inputs.to(device), labels.to(device)
            outputs = model(inputs)
            _, preds = torch.max(outputs, 1)
            
            total += labels.size(0)
            correct += (preds == labels).sum().item()
            
            # For per-class accuracy
            for i in range(len(preds)):
                label = labels[i].item()
                class_correct[label] += (preds[i] == labels[i]).item()
                class_total[label] += 1
            
            # For confusion matrix and classification report
            all_labels.extend(labels.cpu().numpy())
            all_preds.extend(preds.cpu().numpy())
    
    # Calculate overall accuracy
    accuracy = correct / total
    
    # Calculate per-class accuracy
    per_class_accuracy = {}
    for i in range(len(class_names)):
        if class_total[i] > 0:
            per_class_accuracy[class_names[i]] = class_correct[i] / class_total[i]
        else:
            per_class_accuracy[class_names[i]] = 0
    
    # Generate confusion matrix and classification report
    conf_matrix = confusion_matrix(all_labels, all_preds)
    class_report = classification_report(all_labels, all_preds, target_names=class_names, output_dict=True,  zero_division=0)
    
    return accuracy, per_class_accuracy, conf_matrix, class_report, all_labels, all_preds


# Get class names from your dataset
class_names = train_dataset.classes  # Should be ['0', '1', '2']

for epoch in range(num_epochs):
    # Training phase
    model.train()
    running_loss = 0.0
    correct_preds = 0
    total_preds = 0
    
    for inputs, labels in train_loader:
        inputs, labels = inputs.to(device), labels.to(device)
        
        # Zero the gradients
        optimizer.zero_grad()

        # Forward pass
        outputs = model(inputs)
        loss = criterion(outputs, labels)
        
        # Backward pass and optimization
        loss.backward()
        optimizer.step()
        
        # Track the loss and accuracy
        running_loss += loss.item()
        _, preds = torch.max(outputs, 1)
        correct_preds += (preds == labels).sum().item()
        total_preds += labels.size(0)
    
    # Calculate epoch accuracy and loss
    epoch_accuracy = correct_preds / total_preds
    epoch_loss = running_loss / len(train_loader)
    train_losses.append(epoch_loss)
    train_accuracies.append(epoch_accuracy)
    
    # Evaluation on validation set (test set in this case)
    val_accuracy, val_per_class_acc, _, _, _, _ = evaluate_model(model, val_loader, device, class_names)
    val_accuracies.append(val_accuracy)
    
    # Update learning rate based on validation performance
    scheduler.step(epoch_loss)
    
    # Save best model
    if val_accuracy > best_val_accuracy:
        best_val_accuracy = val_accuracy
        torch.save(model.state_dict(), "best_fraud_detection_model.pth")
        print(f"New best model saved with validation accuracy: {best_val_accuracy:.4f}")
    
    # Print epoch statistics
    print(f"Epoch [{epoch+1}/{num_epochs}], "
          f"Loss: {epoch_loss:.4f}, "
          f"Train Accuracy: {epoch_accuracy:.4f}, "
          f"Validation Accuracy: {val_accuracy:.4f}")
    
    # Print per-class accuracy on validation set
    # print("Test Per-class Accuracy:")
    # for cls_name, acc in val_per_class_acc.items():
    #     print(f"  Class {cls_name}: {acc:.4f}")

# Plot training progress
plt.figure(figsize=(12, 4))

plt.subplot(1, 2, 1)
plt.plot(train_losses)
plt.title('Training Loss')
plt.xlabel('Epoch')
plt.ylabel('Loss')

plt.subplot(1, 2, 2)
plt.plot(train_accuracies, label='Train')
plt.plot(val_accuracies, label='Validation')
plt.title('Accuracy')
plt.xlabel('Epoch')
plt.ylabel('Accuracy')
plt.legend()

plt.tight_layout()
plt.savefig('training_progress.png')
plt.close()

# Load the best model for final evaluation
model.load_state_dict(torch.load("best_fraud_detection_model.pth"))

# Final evaluation on all datasets
print("\n" + "="*50)
print("FINAL MODEL EVALUATION")
print("="*50)

# Evaluate on training set
print("\nTraining Set Performance:")
train_acc, train_per_class_acc, train_cm, train_report, _, _ = evaluate_model(
    model, train_loader, device, class_names
)
print(f"Overall Accuracy: {train_acc:.4f}")
print("Per-class Accuracy:")
for cls_name, acc in train_per_class_acc.items():
    print(f"  Class {cls_name}: {acc:.4f}")

# Evaluate on test set
print("\nTest Set Performance:")
test_acc, test_per_class_acc, test_cm, test_report, _, _ = evaluate_model(
    model, test_loader, device, class_names
)
print(f"Overall Accuracy: {test_acc:.4f}")
print("Per-class Accuracy:")
for cls_name, acc in test_per_class_acc.items():
    print(f"  Class {cls_name}: {acc:.4f}")

# Evaluate on out-of-sample set
print("\nOut-of-Sample Set Performance:")
oos_acc, oos_per_class_acc, oos_cm, oos_report, _, _ = evaluate_model(
    model, out_of_sample_loader, device, class_names
)
print(f"Overall Accuracy: {oos_acc:.4f}")
print("Per-class Accuracy:")
for cls_name, acc in oos_per_class_acc.items():
    print(f"  Class {cls_name}: {acc:.4f}")

# Create a DataFrame for all metrics to easily compare
metrics_df = pd.DataFrame({
    'Dataset': ['Training', 'Test', 'Out-of-Sample'],
    'Overall Accuracy': [train_acc, test_acc, oos_acc],
    'Class 0 Accuracy': [train_per_class_acc.get('0', 0), test_per_class_acc.get('0', 0), oos_per_class_acc.get('0', 0)],
    'Class 1 Accuracy': [train_per_class_acc.get('1', 0), test_per_class_acc.get('1', 0), oos_per_class_acc.get('1', 0)],
    'Class 2 Accuracy': [train_per_class_acc.get('2', 0), test_per_class_acc.get('2', 0), oos_per_class_acc.get('2', 0)],
    'Class 0 F1-Score': [train_report['0']['f1-score'], test_report['0']['f1-score'], oos_report['0']['f1-score']],
    'Class 1 F1-Score': [train_report['1']['f1-score'], test_report['1']['f1-score'], oos_report['1']['f1-score']],
    'Class 2 F1-Score': [train_report['2']['f1-score'], test_report['2']['f1-score'], oos_report['2']['f1-score']]
})

# Display the metrics table
print("\nComparison of Metrics Across Datasets:")
print(metrics_df.to_string(index=False))

# Save metrics to CSV
metrics_df.to_csv('fraud_detection_metrics.csv', index=False)
print("\nMetrics saved to 'fraud_detection_metrics.csv'")

# Step 7: Save the final model
torch.save(model.state_dict(), "fraud_detection_model.pth")
print("Final model saved as fraud_detection_model.pth")

print("\nModel training and evaluation complete!")



New best model saved with validation accuracy: 0.6190
Epoch [1/40], Loss: 1.0868, Train Accuracy: 0.5352, Validation Accuracy: 0.6190
New best model saved with validation accuracy: 0.7143
Epoch [2/40], Loss: 1.0596, Train Accuracy: 0.6348, Validation Accuracy: 0.7143
Epoch [3/40], Loss: 1.0307, Train Accuracy: 0.6268, Validation Accuracy: 0.6310
Epoch [4/40], Loss: 1.0114, Train Accuracy: 0.5179, Validation Accuracy: 0.5238
Epoch [5/40], Loss: 0.9392, Train Accuracy: 0.6149, Validation Accuracy: 0.6786
Epoch [6/40], Loss: 0.8544, Train Accuracy: 0.7211, Validation Accuracy: 0.6429
New best model saved with validation accuracy: 0.8214
Epoch [7/40], Loss: 0.7864, Train Accuracy: 0.7384, Validation Accuracy: 0.8214
New best model saved with validation accuracy: 0.8810
Epoch [8/40], Loss: 0.5945, Train Accuracy: 0.8526, Validation Accuracy: 0.8810
Epoch [9/40], Loss: 0.5112, Train Accuracy: 0.8911, Validation Accuracy: 0.8095
Epoch [10/40], Loss: 0.4156, Train Accuracy: 0.9163, Validation 

  model.load_state_dict(torch.load("best_fraud_detection_model.pth"))



FINAL MODEL EVALUATION

Training Set Performance:
Overall Accuracy: 0.9947
Per-class Accuracy:
  Class 0: 1.0000
  Class 1: 1.0000
  Class 2: 0.9487

Test Set Performance:
Overall Accuracy: 0.9016
Per-class Accuracy:
  Class 0: 0.9592
  Class 1: 0.8333
  Class 2: 0.5000

Out-of-Sample Set Performance:
Overall Accuracy: 0.8800
Per-class Accuracy:
  Class 0: 0.9500
  Class 1: 1.0000
  Class 2: 0.2000

Comparison of Metrics Across Datasets:
      Dataset  Overall Accuracy  Class 0 Accuracy  Class 1 Accuracy  Class 2 Accuracy  Class 0 F1-Score  Class 1 F1-Score  Class 2 F1-Score
     Training          0.994688          1.000000          1.000000          0.948718          1.000000          0.973684          0.973684
         Test          0.901639          0.959184          0.833333          0.500000          0.979167          0.625000          0.600000
Out-of-Sample          0.880000          0.950000          1.000000          0.200000          0.974359          0.625000          0.3333