<a href="https://colab.research.google.com/github/Rocco000/OncoVision/blob/main/Scripts/TrainModel.ipynb" target="_parent"><img src="https://colab.research.google.com/assets/colab-badge.svg" alt="Open In Colab"/></a>

**Import the dataset script**

In [None]:
from google.colab import drive
drive.mount('/content/drive') #Connect to Google Drive

#Run the .ipynb file
%run '/content/drive/MyDrive/Colab Notebooks/DatasetLoader.ipynb'
%run '/content/drive/MyDrive/Colab Notebooks/ModelArchitecture1.ipynb'

#Now I can access to the methods of this file
device = torch.device("cuda" if torch.cuda.is_available() else "cpu")
print("Device: ",device)

Script to EVALUATE the model

In [2]:
from sklearn.metrics import accuracy_score, precision_score, recall_score, f1_score
from sklearn.metrics import confusion_matrix
import torch.nn.functional as F
import numpy as np

def evaluate_model(model, data_loader, decision):
  #Set the model in evaluation mode
  print("Start VALIDATION STEP")
  model.eval()
  true_labels = []
  predicted_labels = []

  #In this way we don't computing the gradient
  with torch.no_grad():
    for images, labels in data_loader:
      #Move the image on gpu or cpu. It depends by device variable
      images = images.to(device)
      labels = labels.to(device)

      #Provide the samples to the model
      predictions = model(images)

      #Apply the Softmax activation function. Dim=1 because the output size is [64,2] where the model prediction is in the second column
      predictions = F.softmax(predictions, dim=1)

      #To extract the predicted class with the highest probability for each input sample. 1 to indicate on which dimension apply the max
      _, predictions = torch.max(predictions, 1)

      #print("True labels:")
      #print(labels)
      #print("Predicted labels:")
      #print(predictions)


      #Store the true labels and the predicted labels
      true_labels.extend(labels.cpu().numpy())
      predicted_labels.extend(predictions.cpu().numpy())
      break

  #Computing the EVALUATION METRICS
  #.cpu() because it can't convert cuda:0 device type tensor to numpy.
  true_labels = np.array(true_labels)
  predicted_labels = np.array(predicted_labels)

  accuracy = accuracy_score(true_labels, predicted_labels)
  precision = precision_score(true_labels, predicted_labels)
  recall = recall_score(true_labels, predicted_labels)
  f1 = f1_score(true_labels, predicted_labels)

  if decision == 1:
    print("Evaluation metrics in validation set:")
  else:
    print("Evaluation metrics in test set:")
  print("Accuracy: ",accuracy, "; Precision: ",precision, "; Recall: ",recall,"; F1: ",f1)

  cm = confusion_matrix(true_labels, predicted_labels)

  tn, fp, fn, tp = cm.ravel()

  if decision == 1:
    return accuracy, recall
  else:
    return cm, tp, tn, fp, fn, accuracy, precision, recall, f1

Script to TRAIN the model

In [3]:
from torchvision.transforms import transforms
import torch
from google.colab.patches import cv2_imshow #because Google Colab doesn't support cv2.imshow(), it causes Jupyter session to crash
import cv2
import torchvision
import torch.nn as nn

#TRAIN STEP
def train_model(model, data_loader, valid_loader, num_epoch, criterion, optimizer):
  print("START TRAINING STEP")
  model.train()
  patience = 0 #We use it to verify if the model not improve
  best_recall = 0
  best_accuracy = 0
  best_loss = 0
  best_epoch = 0

  model_state = None #To store the best model status
  optimizer_state = None #To store the best optimizer status

  #To improve the image sharpness. It is random, defined by the parameter P. With sharpness_factor = 1 we only improve the image sharpness
  up_sharpness = torchvision.transforms.RandomAdjustSharpness(sharpness_factor=1.5, p=0.5)

  #To improve the image contrast. It is random, defined by the parameter P
  up_contrast = torchvision.transforms.RandomAutocontrast(p=0.5)

  for e in range(num_epoch):
    for images, labels in data_loader:
      model.zero_grad() #Set the gradient to zero for each batch

      images = images.to(device)
      labels = labels.to(device)

      #Improve the image sharpness and the image contrast
      images = up_sharpness(images)
      images = up_contrast(images)

      #FORWARD PASS
      predictions = model(images)
      #predictions = predictions.unsqueeze(1)
      #print("Shape dopo unsqueeze: ", predictions.shape)

      #Measure the loss function
      loss = criterion(predictions, labels)

      #print("True label:")
      #print(labels)
      #print("Predicted label:")
      #print(torch.max(F.softmax(predictions, dim=1), 1)[1])

      #BACKWARD PASS
      optimizer.zero_grad() #set the gradient to 0
      loss.backward()
      optimizer.step()

    #After trained the model on the batchs, we test the model on the VALIDATION SET
    #Test the model on the VALIDATION SET
    accuracy, recall = evaluate_model(model, valid_loader, 1)
    if recall == best_recall and accuracy == best_accuracy:
      patience = patience+1
    elif recall>best_recall and accuracy==best_accuracy:
      patient = 0
      best_recall = recall
      best_loss = loss.item()
      model_state = model.state_dict()
      optimizer_state = optimizer.state_dict()
      best_epoch = e
    elif recall == best_recall and accuracy>best_accuracy:
      patient = 0
      best_accuracy = accuracy
      best_loss = loss.item()
      model_state = model.state_dict()
      optimizer_state = optimizer.state_dict()
      best_epoch = e
    elif recall>best_recall and accuracy>best_accuracy:
      patient = 0
      best_accuracy = accuracy
      best_recall = recall
      best_loss = loss.item()
      model_state = model.state_dict()
      optimizer_state = optimizer.state_dict()
      best_epoch = e
    else:
      patience = patience+1

    model.train()
    if patience>32:
      print("Train step stopped at epoch ",e+1," because the model doesn't improve!")
      if model_state is not None and optimizer_state is not None:
        model.load_state_dict(model_state) #Set the model with the best configuration
        optimizer.load_state_dict(optimizer_state)
      break
    print(f"Epoch [{e+1}] Loss: {loss.item():.4f}")

  print("The best model and optimizer configuration was achieved at the ",best_epoch," epoch")
  print("The best accuracy: ",best_accuracy," best recall: ",best_recall, "best loss: ",best_loss)
  print("Finished Training!")
  return model_state, optimizer_state


**TRAIN THE MODEL**

In [None]:
path_benign = "/content/drive/MyDrive/SE4AI/Data/Datasets/Dataset1/benign"
path_malignant = "/content/drive/MyDrive/SE4AI/Data/Datasets/Dataset1/malignant"

#Get the dataloader for each set
train_loader, valid_loader, test_loader = get_dataset("/content/drive/MyDrive/SE4AI/Data/Datasets/Dataset1/",percent_train=0.7, percent_valid=0.2, batch_size=64)

#Plot the samples for each set
plot_samples(train_loader, "TRAIN SET")
plot_samples(test_loader, "TEST SET")
plot_samples(valid_loader, "VALID SET")

#Define the CNN
model = ConvModel1().to(device)

#Define the loss function -> CrossEntropyLoss because i build a cnn for binary classification using two neurons in the output layer that represent the corresponding class
#This loss already apply the Softmax activation function in order to ensure that the probabilities sum up to 1.
criterion = nn.CrossEntropyLoss()

#Define the optimizer
optimizer = torch.optim.Adam(params=model.parameters(), lr= 0.001)

#Start the train step
best_model_configuration, best_optimizer_configuration = train_model(model=model, data_loader=train_loader, valid_loader=valid_loader, num_epoch= 64, criterion=criterion, optimizer=optimizer)

#Store the best configurations
torch.save(best_model_configuration, '/content/drive/MyDrive/SE4AI/Model/EvaluationOnDB1NoCleaned/model_parameters.pth')
torch.save(best_optimizer_configuration, '/content/drive/MyDrive/SE4AI/Model/EvaluationOnDB1NoCleaned/optimizer_parameters.pth')

**EVALUATE THE MODEL**

In [None]:
import seaborn as sns
import matplotlib.pyplot as plt
import csv

model.load_state_dict(best_model_configuration) #Load the best model configuration
model.to(device)

confusion, tp, tn, fp, fn, accuracy, precision, recall, f1 = evaluate_model(model, test_loader, 0)
#Create a heatmap of the confusion matrix
plt.figure(figsize=(8, 6))
sns.heatmap(confusion, annot=True, fmt="d", cmap="Blues", cbar=False)
plt.title("Confusion Matrix")
plt.xlabel("Predicted")
plt.ylabel("True")

#Save the confusion metrix
plt.savefig('/content/drive/MyDrive/SE4AI/Model/EvaluationOnDB1NoCleaned/confusion_matrix.png')

plt.show()


print("TP: ",tp," TN: ",tn," FP: ",fp," FN:",fn)

with open("/content/drive/MyDrive/SE4AI/Model/EvaluationOnDB1NoCleaned/evaluation_metrics.csv", "w", newline="") as csvfile:
    writer = csv.writer(csvfile)
    writer.writerow(["Accuracy","Precision","Recall","F1-Score"])
    writer.writerow([accuracy,precision,recall,f1])


