In [None]:
import pandas as pd

In [None]:
config = dict()

In [None]:
dataset = pd.DataFrame(columns = ['image_path', 'label'])

In [None]:
from sklearn.model_selection import train_test_split

seed = 42
test_size = 0.15
val_size = 0.25

config['seed'] = 42
config['test_size'] = test_size
config['val_size'] = val_size

train_val_df, test_df = train_test_split(dataset, test_size=test_size, stratify=dataset['label'], random_state=seed)

train_df, val_df = train_test_split(train_val_df, test_size=val_size, stratify=train_val_df['label'], random_state=seed)

In [ ]:
config['train_n_dni'] = train_df['label'].value_counts()[0]
config['train_n_fake_dni'] = train_df['label'].value_counts()[1]
config['val_n_dni'] = val_df['label'].value_counts()[0]
config['val_n_fake_dni'] = val_df['label'].value_counts()[1]
config['test_n_dni'] = test_df['label'].value_counts()[0]
config['test_n_fake_dni'] = test_df['label'].value_counts()[1]

In [ ]:
from torch.utils.data import Dataset
from torchvision import transforms
from PIL import Image

class IdDataset(Dataset):
    def __init__(self, img_path_list, lab_list, transform=None):
        self.transform = transform
        self.images = img_path_list
        self.labels = lab_list

    def __len__(self):
        return len(self.images)

    def __getitem__(self, idx):
        img_path = self.images[idx]
        image = Image.open(img_path).convert("RGB")

        label = self.labels[idx]
        label = torch.Tensor([label])

        if self.transform:
            image = self.transform(image)

        return image, label

In [ ]:
input_size = (224,224)
config['input_size'] = input_size

In [ ]:
transform = transforms.Compose([transforms.Resize(input_size), transforms.ToTensor()])

In [ ]:
train_dataset = IdDataset(train_df['image_path'].tolist(), train_df['label'].tolist(), transform)
val_dataset = IdDataset(val_df['image_path'].tolist(), val_df['label'].tolist(), transform)
test_dataset = IdDataset(test_df['image_path'].tolist(), test_df['label'].tolist(), transform)

In [ ]:
from torch.utils.data import DataLoader

batch_size = 10
config['batch_size'] = batch_size

train_dataloader = DataLoader(train_dataset, batch_size=batch_size, shuffle=True, drop_last=True)
val_dataloader = DataLoader(train_dataset, batch_size=batch_size, shuffle=True, drop_last=True)
test_dataloader = DataLoader(test_dataset, batch_size=1, shuffle=False, drop_last=False)

In [ ]:
import torch
import torch.nn as nn
import torch.optim as optim
import torchvision.models as models

class Net(nn.Module):
    def __init__(self):
        super(Net, self).__init__()
        self.model = models.resnet50(pretrained=True)
        self.model.fc = nn.Linear(512, 1)

    def forward(self, x):
        x = self.model(x)
        return x

In [ ]:
import os

if os.name == 'posix':
    device = torch.device("mps" if torch.mps.is_available() else "cpu")
elif os.name == 'nt':
    device = torch.device("cuda" if torch.cuda.is_available() else "cpu")
else:
    device = torch.device("cpu")

In [ ]:
model = Net().to(device)

config['model'] = 'Net'

In [ ]:
criterion = nn.BCELoss()

config['model'] = 'BCELoss'

In [ ]:
lr = 0.001
optimizer = optim.Adam(model.parameters(), lr=lr)

config['learning_rate'] = lr
config['optimizer'] = 'Adam'

In [ ]:
def train(model, train_dataloader, criterion, optimizer, device):
    model.to(device)
    model.train()
    running_loss = 0.0
    correct = 0
    total = 0

    for images, labels in train_dataloader:
        images, labels = images.to(device), labels.to(device) 

        optimizer.zero_grad()  
        outputs = model(images)  
        loss = criterion(outputs, labels) 
        loss.backward()  
        optimizer.step()  

        running_loss += loss.item()

        threshold = 0.5 
        predicted = (outputs.detach() >= threshold)  
        total += labels.size(0)
        correct += (predicted == labels).sum().item()

    train_avg_loss = running_loss / len(train_dataloader)
    train_accuracy = correct / total
    return train_avg_loss, train_accuracy

In [ ]:
def validate(model, val_dataloader, criterion, device):
    model.eval()  
    running_loss = 0.0
    correct = 0
    total = 0
    with torch.no_grad():  
        for images, labels in val_dataloader:
            images, labels = images.to(device), labels.to(device)
            outputs = model(images)
            loss = criterion(outputs, labels)
            running_loss += loss.item()

            threshold = 0.5 
            predicted = (outputs.detach() >= threshold)  
            total += labels.size(0)
            correct += (predicted == labels).sum().item()

    val_avg_loss = running_loss / len(val_dataloader)
    val_accuracy = correct / total
    return val_avg_loss, val_accuracy

In [ ]:
num_epochs = 15
early_stopping_patience = 5
epochs_without_improvement = 0

config['num_epochs'] = num_epochs
config['early_stopping_patience'] = early_stopping_patience

In [ ]:
checkpoint_path = './best_model.pth'

best_val_loss = float('inf')  

for epoch in range(num_epochs):
    train_loss, train_accuracy = train(model, train_dataloader, criterion, optimizer, device)
    val_loss, val_accuracy = validate(model, val_dataloader, criterion, device)

    print(f'Epoch [{epoch + 1}/{num_epochs}], '
          f'Train Loss: {train_loss:.4f}, Train Accuracy: {train_accuracy:.2f}, '
          f'Validation Loss: {val_loss:.4f}, Validation Accuracy: {val_accuracy:.2f}')

    if val_loss < best_val_loss:
      best_val_loss = val_loss 
      torch.save(model.state_dict(), checkpoint_path) 
      epochs_without_improvement = 0
      print("Checkpoint saved")

    else:
      epochs_without_improvement +=1
      if epochs_without_improvement == early_stopping_patience:
        print("Early Stopping")
        break 

In [ ]:
model.load_state_dict(torch.load(checkpoint_path))
model.to(device)

model.eval()

In [ ]:
y_true = []
y_proba = []

for image, label in test_dataloader:
    image, label = image.to(device), label.to(device)  

    with torch.no_grad():  
        output = model(image)  

        y_true.append(label.to("cpu").float())
        y_proba.append(output.to("cpu").float())

y_true = torch.cat(y_true).int()
y_proba = torch.cat(y_proba)

thr = 0.5
y_pred = (y_proba >= thr).int()

In [ ]:
from sklearn.metrics import accuracy_score, confusion_matrix, roc_curve, auc, precision_score, recall_score

accuracy = accuracy_score(y_true, y_pred)
conf_matrix = confusion_matrix(y_true, y_pred)
precision = precision_score(y_true, y_pred)
recall = recall_score(y_true, y_pred)
specificity = recall_score(y_true, y_pred, pos_label=0)

fpr, tpr, _ = roc_curve(y_true, y_proba)
roc_auc = auc(fpr, tpr)