In [16]:
try:
    from google.colab import drive
    IN_COLAB = True
    print("Running on Google Colab. ")
    drive.mount('/content/drive')
except:
    IN_COLAB = False
    print("Not running on Google Colab. ")

Not running on Google Colab. 


In [17]:
import torch 
device = torch.device("cuda" if torch.cuda.is_available() else "cpu")
print(f"Using {device}")

Using cuda


In [18]:
import torch
import torch.nn as nn
import torch.optim as optim
from torchvision import datasets, transforms
from torch.utils.data import DataLoader

In [19]:
from PIL import Image
import os
import numpy as np
from torch.utils.data import Dataset, DataLoader

class VGGFace2Dataset(Dataset):
    def __init__(self, root_dir, image_size=(160, 160), transform=None, device='cpu'):
        self.root_dir = root_dir
        self.image_size = image_size
        self.transform = transform
        self.device = device
        # List of files in the dataset
        self.file_list = []
        self.img_list = []
        for root, dirs, files in os.walk(self.root_dir):
            for file in files:
                try:
                    img = Image.open(os.path.join(root, file)).convert('RGB')
                    self.file_list.append(os.path.join(root, file))
                    if self.transform:
                        img = self.transform(img)
                    img = img.to(self.device)
                    self.img_list.append(img)
                except:
                    print(f"Error loading file {os.path.join(root, file)}")


    def __len__(self):
        return len(self.file_list)


    def __getitem__(self, idx):
        img_path = self.file_list[idx]
        # verify if image is not corrupted if is corrupted call __getitem__ again
        img = self.img_list[idx]
        # Extract the label from the file path
        label = os.path.split(os.path.dirname(img_path))[-1]

        labels = torch.tensor(int(label)).to(self.device)

        return img, labels

Dipende poi su quale se NN1 o NN2 va ad essere utilizzato

In [20]:
import os


# Define transforms for training and validation
transform = transforms.Compose([
    transforms.Resize((160, 160)),
    transforms.ToTensor(),
])

# Set up directories
if IN_COLAB:
    dataset_dir = '/content/drive/Shareddrives/AI4CYBSEC/face_dataset/dataset_prova_prisco'
else:
    dataset_dir = 'G:\Drive condivisi\AI4CYBSEC\\face_dataset\\train_set_detector_df_cw'


train_set_path = os.path.join(dataset_dir,'train')


# Define dataset
dataset_train = VGGFace2Dataset(root_dir=train_set_path, transform=transform, device=device)

# Check the length of the dataset
print("Training Set length:", len(dataset_train))

# Create DataLoader
batch_size = 32
dataloader_train = DataLoader(dataset_train, batch_size=batch_size, shuffle=True)


# Load datasets and Set up data loaders
# VAL
val_set_path = os.path.join(dataset_dir,'val')

# Define dataset
dataset_val = VGGFace2Dataset(root_dir=val_set_path, transform=transform, device=device)

# Check the length of the dataset
print("Validation Set length:", len(dataset_val))

# Create DataLoader
batch_size = 32
dataloader_val = DataLoader(dataset_val, batch_size=batch_size, shuffle=False)

Error loading file G:\Drive condivisi\AI4CYBSEC\face_dataset\train_set_detector_df_cw\train\0\n008229_0565_01.jpg
Training Set length: 14396
Validation Set length: 3600


## Alternatica con ResNet

In [21]:
import torch
import torch.nn as nn
import torchvision.models as models
import torchvision.transforms as transforms
import torch.optim as optim
from torch.utils.data import DataLoader
from torchvision.datasets import ImageFolder

# Definiamo il modello pre-addestrato (es. mobileNetV2)
model = models.mobilenet_v2(pretrained=True)

# Sostituiamo il classificatore dell'ultimo layer con un nuovo classificatore
model.classifier[1] = nn.Linear(model.classifier[1].in_features, 2)
# 2 è il numero di classi binarie


#  # Sostituire 2 con il numero di classi binarie
model = model.to(device)
# Definiamo l'ottimizzatore e la loss function
#use optimizer AdamW 
optimizer = optim.AdamW(model.parameters(), lr=0.001)
criterion = nn.CrossEntropyLoss()



In [22]:
from tqdm import tqdm
import torch
import os
from sklearn.metrics import accuracy_score, precision_score, recall_score, f1_score, confusion_matrix, classification_report


# Addestramento del classificatore
num_epochs = 100
patience = 5  # Numero massimo di epoche senza miglioramenti nella validation accuracy
no_improvement_count = 0  # Conta il numero di epoche senza miglioramenti

best_val_accuracy = 0.0  # Inizializziamo la miglior accuracy sul validation set a 0
best_model_weights = None  # Variabile per memorizzare i pesi del miglior modello


for epoch in range(num_epochs):
    running_loss = 0.0
    total_correct = 0
    total_samples = 0

    model.train()  # Imposta il modello in modalità training

    for inputs, labels in tqdm(dataloader_train, desc=f"Epoch {epoch+1}/{num_epochs} training"):
        inputs, labels = inputs.to(device), labels.to(device)
        optimizer.zero_grad()
        outputs = model(inputs)
        loss = criterion(outputs, labels)
        loss.backward()
        optimizer.step()

        running_loss += loss.item()

        _, predicted = torch.max(outputs, 1)
        total_samples += labels.size(0)
        total_correct += (predicted == labels).sum().item()

    train_loss = running_loss / len(dataloader_train)
    train_accuracy = total_correct / total_samples

    # print(f'  Epoch [{epoch+1}/{num_epochs}], Train Loss: {train_loss:.4f}, Train Accuracy: {train_accuracy:.4f}')

    # Valutazione sul validation set
    model.eval()  # Imposta il modello in modalità valutazione (non addestramento)
    all_predictions = []
    all_labels = []

    with torch.no_grad():
        for inputs, labels in tqdm(dataloader_val, desc=f"Epoch {epoch+1}/{num_epochs} validation"):
            inputs, labels = inputs.to(device), labels.to(device)
            outputs = model(inputs)
            _, predicted = torch.max(outputs, 1)
            all_predictions.extend(predicted.tolist())
            all_labels.extend(labels.tolist())

    val_accuracy = accuracy_score(all_labels, all_predictions)


    # TP, FP, TN, FN
    
    # classificazione_report = classification_report(all_labels, all_predictions)

    print(f'  Epoch [{epoch+1}/{num_epochs}], Train Loss: {train_loss:.4f}, Train Accuracy: {train_accuracy:.4f}, Validation Accuracy: {val_accuracy:.4f}')

    # Torniamo in modalità training per la prossima epoca
    model.train()

    # Salviamo i pesi del modello se l'accuracy sul validation set attuale è migliore della precedente
    if val_accuracy > best_val_accuracy:
        best_val_accuracy = val_accuracy
        #save classificatio report
        conf_matrix = confusion_matrix(all_labels, all_predictions)
        report = classification_report(all_labels, all_predictions)
        precision_score_value = precision_score(all_labels, all_predictions)
        recall_score_value = recall_score(all_labels, all_predictions)
        f1_score_value = f1_score(all_labels, all_predictions)
        #save confusion matrix
        best_model_weights = model.state_dict()
        no_improvement_count = 0  # Resettiamo il contatore di epoche senza miglioramenti
    else:
        no_improvement_count += 1  # Incrementiamo il contatore di epoche senza miglioramenti
    print(f'  Best Validation accuracy: {best_val_accuracy:.4f}')
    # Verifica del criterio di early stopping
    if no_improvement_count >= patience:
        print(f'Early stopping at epoch {epoch+1}')
        break

# Salviamo i pesi del miglior modello addestrato
if IN_COLAB:
    directory = "/content/drive/Shareddrives/AI4CYBSEC/models/"
else:
    directory = "G:\Drive condivisi\AI4CYBSEC\models"
if not os.path.exists(directory):
    os.makedirs(directory)

if best_model_weights is not None:
    torch.save(best_model_weights, os.path.join(directory,'best_mobilenetv2_df_cw.pth'))
    print("Best model weights saved")
    print("Best Validation accuracy: ", best_val_accuracy)
    print("Confusion Matrix: ", conf_matrix)
    print("Classification Report: ", report)
    print("Precision Score: ", precision_score_value)
    print("Recall Score: ", recall_score_value)
    print("F1 Score: ", f1_score_value)


Epoch 1/100 training: 100%|██████████| 450/450 [00:07<00:00, 62.56it/s]
Epoch 1/100 validation: 100%|██████████| 113/113 [00:00<00:00, 230.44it/s]


  Epoch [1/100], Train Loss: 0.5993, Train Accuracy: 0.6517, Validation Accuracy: 0.8089
  Best Validation accuracy: 0.8089


Epoch 2/100 training: 100%|██████████| 450/450 [00:06<00:00, 65.63it/s]
Epoch 2/100 validation: 100%|██████████| 113/113 [00:00<00:00, 234.27it/s]


  Epoch [2/100], Train Loss: 0.1925, Train Accuracy: 0.9190, Validation Accuracy: 0.9567
  Best Validation accuracy: 0.9567


Epoch 3/100 training: 100%|██████████| 450/450 [00:06<00:00, 67.16it/s]
Epoch 3/100 validation: 100%|██████████| 113/113 [00:00<00:00, 235.78it/s]


  Epoch [3/100], Train Loss: 0.1049, Train Accuracy: 0.9558, Validation Accuracy: 0.9628
  Best Validation accuracy: 0.9628


Epoch 4/100 training: 100%|██████████| 450/450 [00:06<00:00, 67.04it/s]
Epoch 4/100 validation: 100%|██████████| 113/113 [00:00<00:00, 228.36it/s]


  Epoch [4/100], Train Loss: 0.0787, Train Accuracy: 0.9670, Validation Accuracy: 0.9769
  Best Validation accuracy: 0.9769


Epoch 5/100 training: 100%|██████████| 450/450 [00:06<00:00, 64.41it/s]
Epoch 5/100 validation: 100%|██████████| 113/113 [00:00<00:00, 230.74it/s]


  Epoch [5/100], Train Loss: 0.0663, Train Accuracy: 0.9722, Validation Accuracy: 0.9744
  Best Validation accuracy: 0.9769


Epoch 6/100 training: 100%|██████████| 450/450 [00:06<00:00, 66.06it/s]
Epoch 6/100 validation: 100%|██████████| 113/113 [00:00<00:00, 226.91it/s]


  Epoch [6/100], Train Loss: 0.0564, Train Accuracy: 0.9757, Validation Accuracy: 0.9547
  Best Validation accuracy: 0.9769


Epoch 7/100 training: 100%|██████████| 450/450 [00:07<00:00, 62.09it/s]
Epoch 7/100 validation: 100%|██████████| 113/113 [00:00<00:00, 226.00it/s]


  Epoch [7/100], Train Loss: 0.0500, Train Accuracy: 0.9778, Validation Accuracy: 0.9767
  Best Validation accuracy: 0.9769


Epoch 8/100 training: 100%|██████████| 450/450 [00:07<00:00, 63.16it/s]
Epoch 8/100 validation: 100%|██████████| 113/113 [00:00<00:00, 211.96it/s]


  Epoch [8/100], Train Loss: 0.0493, Train Accuracy: 0.9795, Validation Accuracy: 0.9769
  Best Validation accuracy: 0.9769


Epoch 9/100 training: 100%|██████████| 450/450 [00:06<00:00, 64.94it/s]
Epoch 9/100 validation: 100%|██████████| 113/113 [00:00<00:00, 230.51it/s]

  Epoch [9/100], Train Loss: 0.0485, Train Accuracy: 0.9804, Validation Accuracy: 0.9661
  Best Validation accuracy: 0.9769
Early stopping at epoch 9
Best model weights saved
Best Validation accuracy:  0.9769444444444444
Confusion Matrix:  [[1950   51]
 [  32 1567]]
Classification Report:                precision    recall  f1-score   support

           0       0.98      0.97      0.98      2001
           1       0.97      0.98      0.97      1599

    accuracy                           0.98      3600
   macro avg       0.98      0.98      0.98      3600
weighted avg       0.98      0.98      0.98      3600

Precision Score:  0.9684796044499382
Recall Score:  0.9799874921826142
F1 Score:  0.9741995648119366





In [23]:


def load_model(model, model_path):
    model.load_state_dict(torch.load(model_path))
    model.eval()
    return model

# Definisci il modello mobilenet_v2
model = models.mobilenet_v2(pretrained=True)

# Sostituisci il classificatore dell'ultimo layer con un nuovo classificatore
model.classifier[1] = nn.Linear(model.classifier[1].in_features, 2)

model = model.to(device)


# carica i pesi del modello addestrato
model = load_model(model, os.path.join(directory,'best_mobilenetv2_df_cw.pth'))

import torchsummary

# Stampa un riassunto del modello
torchsummary.summary(model, (3, 160, 160))

def make_inference(model, img_tensor):
    #img must be a tensor with shape (N, C, H, W)
    model.eval()
    with torch.no_grad():
        outputs = model(img_tensor)
        _, predicted = torch.max(outputs, 1)

    return predicted







----------------------------------------------------------------
        Layer (type)               Output Shape         Param #
            Conv2d-1           [-1, 32, 80, 80]             864
       BatchNorm2d-2           [-1, 32, 80, 80]              64
             ReLU6-3           [-1, 32, 80, 80]               0
            Conv2d-4           [-1, 32, 80, 80]             288
       BatchNorm2d-5           [-1, 32, 80, 80]              64
             ReLU6-6           [-1, 32, 80, 80]               0
            Conv2d-7           [-1, 16, 80, 80]             512
       BatchNorm2d-8           [-1, 16, 80, 80]              32
  InvertedResidual-9           [-1, 16, 80, 80]               0
           Conv2d-10           [-1, 96, 80, 80]           1,536
      BatchNorm2d-11           [-1, 96, 80, 80]             192
            ReLU6-12           [-1, 96, 80, 80]               0
           Conv2d-13           [-1, 96, 40, 40]             864
      BatchNorm2d-14           [-1, 96,