<a href="https://colab.research.google.com/github/MarcoParola/medical_images_classification/blob/main/utils.ipynb" target="_parent"><img src="https://colab.research.google.com/assets/colab-badge.svg" alt="Open In Colab"/></a>

# **Utils**
This notebook contains some functions used in other notebooks

In [25]:
import numpy as np
import os
from sklearn import metrics
import seaborn as sns
import matplotlib.pyplot as plt
import itertools
from sklearn.metrics import roc_curve
from sklearn.metrics import auc

In [26]:
def load_data(dir):
  imagesTrain = np.load(os.path.join(dir + 'train_tensor.npy'))
  labelsTrain = np.load(os.path.join(dir + 'train_labels.npy'))
  imagesTestPublic = np.load(os.path.join(dir + 'public_test_tensor.npy'))
  labelsTestPublic = np.load(os.path.join(dir + 'public_test_labels.npy'))
  imagesTestPrivate = np.load(os.path.join(dir + 'private_test_tensor.npy'))

  return imagesTrain, labelsTrain, imagesTestPublic, labelsTestPublic, imagesTestPrivate

In [27]:
def scaleData(image):
  scaledImage = image / (pow(2,16)-1)
  return scaledImage

## Utility function that plots the confusion matrix

In [None]:
def plot_confusionMatrix(model_, testSet_, testLabels_, classes):
    pred = model_.predict_classes(testSet_)
    cm = metrics.confusion_matrix(pred, testLabels_)
    plt.imshow(cm, interpolation='nearest', cmap='OrRd')
    plt.title('Confusion matrix')
    plt.colorbar()
    tick_marks = np.arange(len(classes))
    print(tick_marks)
    plt.xticks(tick_marks, classes, rotation=45)
    plt.yticks(tick_marks, classes)

    cm = np.round( cm.astype('float') / cm.sum(axis=1)[:, np.newaxis] ,2)
    print("Normalized confusion matrix")
    thresh = 0.6
    
    for i, j in itertools.product(range(cm.shape[0]), range(cm.shape[1])):
        plt.text(j, i, cm[i, j],horizontalalignment="center",color="white" if cm[i, j] > thresh else "black")

    plt.tight_layout()
    plt.ylabel('True label')
    plt.xlabel('Predicted label')
    plt.show()

## Utility function that plots roc curves

In [44]:
def plotRocCurves(models, test, labels):
  for i in range(len(models)):
    probs = models[i].predict_proba(test)
    preds = probs[:,1]
    fpr, tpr, threshold = metrics.roc_curve(labels, preds)
    roc_auc = metrics.auc(fpr, tpr)

    # method I: plt
    import matplotlib.pyplot as plt
    plt.title('Confusion matrix')
    plt.plot(fpr, tpr, 'b', color=(np.random.rand(),np.random.rand(),np.random.rand()), label = 'model' + str(i+1) + '= %0.2f' % roc_auc)
    plt.legend(loc = 'lower right')
    plt.plot([0, 1], [0, 1],'r--')
    plt.xlim([0, 1])
    plt.ylim([0, 1])
    plt.ylabel('True Positive Rate')
    plt.xlabel('False Positive Rate')
  plt.show()

In [29]:
def plot_accurancy_loss(hist):
  acc_1 = hist.history['accuracy']
  val_acc_1 = hist.history['val_accuracy']
  loss_1 = hist.history['loss']
  val_loss_1 = hist.history['val_loss']

  plt.ylim(0,1)
  
  epochs = range(len(acc_1))

  plt.plot(epochs, acc_1, 'bo', label='Training acc')
  plt.plot(epochs, val_acc_1, 'b', label='Validation acc')
  plt.title('Training and validation accuracy')
  plt.legend()

  plt.figure()
  plt.ylim(0,1)
  plt.plot(epochs, loss_1, 'bo', label='Training loss')
  plt.plot(epochs, val_loss_1, 'b', label='Validation loss')
  plt.title('Training and validation loss')
  plt.legend()

  plt.show()

In [30]:
def plot_roc_curve(model1, model1_name, model2, model2_name, set_):
  y_pred_1 = model1.predict_classes(set_)#.ravel()
  fpr_1, tpr_1, thresholds_1 = roc_curve(set_, y_pred_1)
  auc_1 = auc(fpr_1, tpr_1)
  '''
  y_pred_2 = model2.predict_classes(set_)#.ravel()
  fpr_2, tpr_2, thresholds_2 = roc_curve(set_, y_pred_2)
  auc_2 = auc(fpr_2, tpr_2)
  '''
  plt.figure(1)
  plt.plot([0, 1], [0, 1], 'k--')
  plt.plot(fpr_1, tpr_1, label='model1_name (area = {:.3f})'.format(auc_1)) #rivedere nomi modelli da stampare
  #plt.plot(fpr_2, tpr_2, label='model2_name (area = {:.3f})'.format(auc_2))
  plt.xlabel('False positive rate')
  plt.ylabel('True positive rate')
  plt.title('ROC curve')
  plt.legend(loc='best')
  plt.show()
  '''
  plt.figure(1)
  plt.plot([0, 1], [0, 1], 'k--')
  plt.plot(fpr_keras, tpr_keras, label='Keras (area = {:.3f})'.format(auc_keras))
  plt.plot(fpr_rf, tpr_rf, label='RF (area = {:.3f})'.format(auc_rf))
  plt.xlabel('False positive rate')
  plt.ylabel('True positive rate')
  plt.title('ROC curve')
  plt.legend(loc='best')
  plt.show()
  '''

In [31]:
def plot_roc_curve1(model1, model1_name, model2, model2_name, set_):
  # Plot linewidth.
  lw = 2

  # Compute ROC curve and ROC area for each class
  fpr = dict()
  tpr = dict()
  roc_auc = dict()
  for i in range(n_classes):
      fpr[i], tpr[i], _ = roc_curve(y_test[:, i], y_score[:, i])
      roc_auc[i] = auc(fpr[i], tpr[i])

  # Compute micro-average ROC curve and ROC area
  fpr["micro"], tpr["micro"], _ = roc_curve(y_test.ravel(), y_score.ravel())
  roc_auc["micro"] = auc(fpr["micro"], tpr["micro"])

  # Compute macro-average ROC curve and ROC area

  # First aggregate all false positive rates
  all_fpr = np.unique(np.concatenate([fpr[i] for i in range(n_classes)]))

  # Then interpolate all ROC curves at this points
  mean_tpr = np.zeros_like(all_fpr)
  for i in range(n_classes):
      mean_tpr += interp(all_fpr, fpr[i], tpr[i])

  # Finally average it and compute AUC
  mean_tpr /= n_classes

  fpr["macro"] = all_fpr
  tpr["macro"] = mean_tpr
  roc_auc["macro"] = auc(fpr["macro"], tpr["macro"])

  # Plot all ROC curves
  plt.figure(1)
  plt.plot(fpr["micro"], tpr["micro"],
          label='micro-average ROC curve (area = {0:0.2f})'
                ''.format(roc_auc["micro"]),
          color='deeppink', linestyle=':', linewidth=4)

  plt.plot(fpr["macro"], tpr["macro"],
          label='macro-average ROC curve (area = {0:0.2f})'
                ''.format(roc_auc["macro"]),
          color='navy', linestyle=':', linewidth=4)

  colors = cycle(['aqua', 'darkorange', 'cornflowerblue'])
  for i, color in zip(range(n_classes), colors):
      plt.plot(fpr[i], tpr[i], color=color, lw=lw,
              label='ROC curve of class {0} (area = {1:0.2f})'
              ''.format(i, roc_auc[i]))

  plt.plot([0, 1], [0, 1], 'k--', lw=lw)
  plt.xlim([0.0, 1.0])
  plt.ylim([0.0, 1.05])
  plt.xlabel('False Positive Rate')
  plt.ylabel('True Positive Rate')
  plt.title('Some extension of Receiver operating characteristic to multi-class')
  plt.legend(loc="lower right")
  plt.show()