# **IMPORT MODULES**

In [None]:
from sklearn import datasets, svm, metrics
import csv, os
import numpy as np
import matplotlib.pyplot as plt
from sklearn.naive_bayes import GaussianNB
from sklearn.neighbors import KNeighborsClassifier
from sklearn.ensemble import RandomForestClassifier


# **CONNECT TO GOOGLE DRIVE**

In [None]:
from google.colab import drive
drive.mount('/content/drive')

Mounted at /content/drive


# **MULTI CLASS CONFUSION MATRIX PROCESSOR**

In [None]:
def confusion_matrix_processor(matrix):
  first_tp, first_fp, first_tn, first_fn = matrix[0,0] , matrix[0,1] + matrix[0,2], matrix[1,1,] + matrix[1,2] + matrix[2,1] + matrix[2,2], matrix[1,0] + matrix[2,0]
  second_tp, second_fp, second_tn, second_fn = matrix[1,1] , matrix[1,0] + matrix[1,2], matrix[0,0] + matrix[0,2] + matrix[2,0] + matrix[2,2], matrix[0,1] + matrix[2,1]
  third_tp, third_fp, third_tn, third_fn = matrix[2,2] , matrix[2,0] + matrix[2,1], matrix[0,0] + matrix[0,1] + matrix[1,0] + matrix[1,1], matrix[0,2] + matrix[1,2]

  return [first_tp, first_fp, first_tn, first_fn], [second_tp, second_fp, second_tn, second_fn], [third_tp, third_fp, third_tn, third_fn]

# **MULTI CLASS CONFUSION MATRIX PROCESSOR**
*separate for each class*

In [None]:
def detailed_confusion_matrix_processor(matrix):
  # Confusion matrix processing
  confusion_matrix = metrics.confusion_matrix(y_test, prediction)
  fp = confusion_matrix.sum(axis=0) - np.diag(confusion_matrix) 
  fn = confusion_matrix.sum(axis=1) - np.diag(confusion_matrix)
  tp = np.diag(confusion_matrix)
  tn = confusion_matrix.sum() - (fp + fn + tp)

  # Overall accuracy for each class
  accuracy = (tp+tn)/(tp+fp+fn+tn)
  # Sensitivity, hit rate, recall, or true positive rate
  tpr = tp/(tp+fn)
  # Specificity or true negative rate
  tnr = tn/(tn+fp) 
  # Fall out or false positive rate
  fpr = fp/(fp+tn)
  # False negative rate
  fnr = fn/(tp+fn)
  # Precision or positive predictive value
  precision = tp/(tp+fp)
  # Negative predictive value
  npv = tn/(tn+fn)
  # False discovery rate
  fdr = fp/(tp+fp)

  print(accuracy, tpr, tnr, fpr, fnr, precision, npv, fdr)

# **DIRECTORY CHECKER**

In [None]:
def check_dir(path):
  if os.path.isdir(path) == False:
    os.makedirs(path)

# **GENERATE CONFUSION MATRIX**

In [None]:
# Before executing this cell please excute the cells named "Import Modules" and "Connect to Google Drive"
def generate_confusion_matrix(disease):
  # modify where the folds are in your drive, fold dir should have test_1.csv and Pred_1.csv and so on
  # in this code segment, Dataset is the root folder in my google drive
  #folds_dir = "/content/drive/MyDrive/Dataset/Alzheimer's and Schizophrenia/Dataset/" + disease + "/Folds/"
  folds_dir = "/content/drive/MyDrive/AD and SZ/" + disease + "/Folds/"

  # loops through files 1 to 5
  for i in range(1,6):
    # you can change test and Pred according to your file initials
    y_test_csv = folds_dir + "test_" + str(i) + ".csv"
    y_pred_csv = folds_dir + "Pred_" + str(i) + ".csv"

    y_test = []
    y_pred = []

    # reading file into lists
    with open(y_test_csv, newline='') as f:
      reader = csv.reader(f)
    
      for row in reader:
       y_test.append(row[-1])

    with open(y_pred_csv, newline='') as f:
      reader = csv.reader(f)
      
      for row in reader:
        y_pred.append(row[0])

    confusion_matrix = metrics.confusion_matrix(y_test, y_pred)

    print("--------------------- ","Fold ", str(i), " ---------------------")
    print(confusion_matrix)
    print("-----------------------------------------------------")

    disp = metrics.ConfusionMatrixDisplay(confusion_matrix=confusion_matrix)
    disp.plot()
    plt.show()

generate_confusion_matrix("AD")

#Model-free Metrics

In [None]:
i = 1
folds_dir = "/content/drive/MyDrive/AD and SZ/AD/Folds/"
y_test_csv = folds_dir + "test_" + str(i) + ".csv"
y_pred_csv = folds_dir + "Pred_" + str(i) + ".csv"

y_test = []
y_pred = []

# reading file into lists
with open(y_test_csv, newline='') as f:
  reader = csv.reader(f)
  for row in reader:
    y_test.append(row[-1])
with open(y_pred_csv, newline='') as f:
  reader = csv.reader(f)
  for row in reader:
    y_pred.append(row[0])
accuracy = metrics.accuracy_score(y_test, y_pred)
recall = metrics.recall_score(y_test, y_pred, average="macro")
precision = metrics.precision_score(y_test, y_pred, average="macro")
#roc_auc_score = metrics.roc_auc_score(y_test, classifier.p, multi_class="ovr")
f1 = metrics.f1_score(y_test, y_pred, average='weighted')

# Specificity for Multiclass
first_cm, second_cm, third_cm = confusion_matrix_processor(metrics.confusion_matrix(y_test, y_pred))
first_specificity = first_cm[1] / (first_cm[1] + first_cm[2])
second_specificity = second_cm[1] / (second_cm[1] + second_cm[2])
third_specificity = third_cm[1] / (third_cm[1] + third_cm[2])

print("----------------------------","Fold ", str(i), "---------------------------------------")
print('Accuracy: ', "%.2f" % (accuracy*100), "%")
print("Recall: ", "%.2f" % (recall*100), "%")
print("Precision: ", "%.2f" % (precision*100), "%")
#print("ROC AUC Score: ", "%.2f" % (roc_auc_score*100), "%")
print('F1: ', "%.2f" % (f1*100), "%")
print("First label Specificity: ", "%.2f" % (first_specificity*100), "%")
print("Second label Specificity: ", "%.2f" % (second_specificity*100), "%")
print("Third label Specificity: ", "%.2f" % (third_specificity*100), "%")
print("-------------------------------------------------------------------------------------------------")

---------------------------- Fold  1 ---------------------------------------
Accuracy:  34.51 %
Recall:  30.77 %
Precision:  22.61 %
F1:  29.13 %
First label Specificity:  36.59 %
Second label Specificity:  26.95 %
Third label Specificity:  40.98 %
-------------------------------------------------------------------------------------------------


# **CSV WRITER**

In [1]:
def csv_writer(disease, model_name, fold, data):
  if disease == "AD":
    header = ["Test", 0, 1, 2]
  else:
    header = ["Test", 0, 1]

  result_dir = "/content/drive/MyDrive/Results/" + disease + "/" + model_name + "/Fold " + str(fold) + "/"
  check_dir(result_dir)
  fold_csv = result_dir + "Fold_" + str(fold) + ".csv"

  print(fold_csv)

  with open(fold_csv, 'w', encoding='UTF8') as f:
    writer = csv.writer(f)

    # write the header
    writer.writerow(header)

    # write the data
    for i, row in enumerate(data):
      processed_row = []
      processed_row.append(i+1)
      
      for value in row:
        value = "%.2f" % (value*100) + "%"
        processed_row.append(value)

      writer.writerow(processed_row)
    
    f.close()

# **TEXT WRITER**

In [None]:
def text_writer(disease, model_name, fold, accuracy, recall, precision, roc_auc_score, f1, specificities):
  result_dir = "/content/drive/MyDrive/Results/" + disease + "/" + model_name + "/Fold " + str(fold) + "/"
  check_dir(result_dir)
  fold_txt = result_dir + "Fold_" + str(fold) + ".txt"
  labels = ["First Label ", "Second Label ", "Third Label "]
  specificity = ""

  if len(specificities) > 1:
    for i, s in enumerate(specificities):
      specificity = specificity + labels[i] + "Specificity: " + "%.2f" % (s*100) + "%\n"
  else:
    specificity = "Specificity: " + "%.2f" % (specificities[0]*100) + "%\n"

  text_data = "---------------------------- " + model_name.upper() + " (Without Age) - Fold " + str(fold) + " ---------------------------------------\n" + "Accuracy: " + "%.2f" % (accuracy*100) + "%\n" + "Recall: " + "%.2f" % (recall*100)+ "%\n" + "Precision: " + "%.2f" % (precision*100) + "%\n" + "ROC AUC Score: " + "%.2f" % (roc_auc_score*100) + "%\n" +'F1: ' + "%.2f" % (f1*100)+ "%\n" + specificity + "-------------------------------------------------------------------------------------------------"

  with open(fold_txt, 'w', encoding='UTF8') as f:
    f.write(text_data)
    f.close()

# **GAUSSIAN NAIVE BAYES MULTI CLASSIFIER**

In [None]:
 # gnb = GaussianNB()
  # gnb.fit(X_train, y_train)
  # gnb_pred = gnb.predict(X_test)

  # gnb_accuracy = metrics.accuracy_score(y_test, gnb_pred)
  # gnb_recall = metrics.recall_score(y_test, gnb_pred, average="macro")
  # gnb_precision = metrics.precision_score(y_test, gnb_pred, average="macro")
  # gnb_roc_auc_score = metrics.roc_auc_score(y_test, gnb_pred, multi_class="ovr")
  # gnb_auc = metrics.auc(y_test, gnb_pred)
  # gnb_roc_auc_score = metrics.roc_auc_score(y_test, gnb.predict_proba(X_test), multi_class="ovr")
  # gnb_f1 = metrics.f1_score(y_test, gnb_pred, average='weighted')

  # Specificity for Multiclass
  # first_cm, second_cm, third_cm = confusion_matrix_processor(metrics.confusion_matrix(y_test, gnb_pred))
  # first_specificity = first_cm[1] / (first_cm[1] + first_cm[2])
  # second_specificity = second_cm[1] / (second_cm[1] + second_cm[2])
  # third_specificity = third_cm[1] / (third_cm[1] + third_cm[2])

  # gnb_specificity = tn / (tn+fp)
  # gnb_confusion_matrix_plot = metrics.plot_confusion_matrix(gnb, X_test, y_test)
  # plt.show()
  # print("----------------------------","Fold ", str(i), "---------------------------------------")
  # print('Accuracy (Gaussian Naive Bayes): ', "%.2f" % (gnb_accuracy*100))
  # print("GNB Recall:", gnb_recall*100)
  # print("GNB Precision:", gnb_precision*100)
  # print("GNB Specificity: ", gnb_specificity)
  # print("GNB First label Specificity: ", first_specificity)
  # print("GNB Second label Specificity: ", second_specificity)
  # print("GNB Third label Specificity: ", third_specificity)

  # print("GNB AUC: ", gnb_roc_auc_score)
  # print('F1 (Gaussian Naive Bayes): ', "%.2f" % (gnb_f1*100))
  # print('Confusion Matrix: ')
  # print("-------------------------------------------------------------------")

def gnb_multi_classifier(X_train, y_train, X_test, y_test, disease, fold):
  classifier = GaussianNB().fit(X_train, y_train) # Gaussian Naive Bayes
  prediction = classifier.predict(X_test)

  accuracy = metrics.accuracy_score(y_test, prediction)
  recall = metrics.recall_score(y_test, prediction, average="macro")
  precision = metrics.precision_score(y_test, prediction, average="macro")
  roc_auc_score = metrics.roc_auc_score(y_test, classifier.predict_proba(X_test), multi_class="ovr")
  f1 = metrics.f1_score(y_test, prediction, average='weighted')

  # Specificity for Multiclass
  first_cm, second_cm, third_cm = confusion_matrix_processor(metrics.confusion_matrix(y_test, prediction))
  first_specificity = first_cm[1] / (first_cm[1] + first_cm[2])
  second_specificity = second_cm[1] / (second_cm[1] + second_cm[2])
  third_specificity = third_cm[1] / (third_cm[1] + third_cm[2])

  text_writer(disease, fold, accuracy, recall, precision, roc_auc_score, f1, [first_specificity, second_specificity, third_specificity])
  csv_writer(disease, fold, classifier.predict_proba(X_test))

  confusion_matrix = metrics.plot_confusion_matrix(classifier, X_test, y_test)
  plt.show()

  print("----------------------------","Gaussian Naive Bayes Multi Class (Without Age) - Fold ", str(i), "---------------------------------------")
  print('Accuracy: ', "%.2f" % (accuracy*100), "%")
  print("Recall: ", "%.2f" % (recall*100), "%")
  print("Precision: ", "%.2f" % (precision*100), "%")
  print("ROC AUC Score: ", "%.2f" % (roc_auc_score*100), "%")
  print('F1: ', "%.2f" % (f1*100), "%")
  print("First label Specificity: ", "%.2f" % (first_specificity*100), "%")
  print("Second label Specificity: ", "%.2f" % (second_specificity*100), "%")
  print("Third label Specificity: ", "%.2f" % (third_specificity*100), "%")
  print("-------------------------------------------------------------------------------------------------")

# **GAUSSIAN NAIVE BAYES BINARY CLASSIFIER**

In [None]:
def gnb_binary_classifier(X_train, y_train, X_test, y_test, disease, fold):
  classifier = GaussianNB().fit(X_train, y_train) # Gaussian Naive Bayes
  prediction = classifier.predict(X_test)

  accuracy = metrics.accuracy_score(y_test, prediction)
  recall = metrics.recall_score(y_test, prediction, average="macro")
  precision = metrics.precision_score(y_test, prediction, average="macro")
  roc_auc_score = metrics.roc_auc_score(y_test, classifier.predict_proba(X_test)[:, 1])
  f1 = metrics.f1_score(y_test, prediction, average='weighted')

  # Specificity for binary class
  tn, fp, fn, tp = metrics.confusion_matrix(y_test, prediction).ravel()
  specificity = tn / (tn + fp)

  text_writer(disease, fold, accuracy, recall, precision, roc_auc_score, f1, [specificity])
  csv_writer(disease, fold, classifier.predict_proba(X_test))

  confusion_matrix = metrics.plot_confusion_matrix(classifier, X_test, y_test)
  plt.show()

  print("----------------------------","Gaussian Naive Bayes Binary CLass (Without Age) - Fold ", str(i), "---------------------------------------")
  print('Accuracy: ', "%.2f" % (accuracy*100), "%")
  print("Recall: ", "%.2f" % (recall*100), "%")
  print("Precision: ", "%.2f" % (precision*100), "%")
  print("ROC AUC Score: ", "%.2f" % (roc_auc_score*100), "%")
  print('F1: ', "%.2f" % (f1*100), "%")
  print("Specificity: ", "%.2f" % (specificity*100), "%")
  print("-------------------------------------------------------------------------------------------------")

# **KNN MULTI CLASSIFIER**

In [None]:
def knn_multi_classifier(X_train, y_train, X_test, y_test, disease, model_name, fold):
  classifier = KNeighborsClassifier().fit(X_train, y_train) #KNN Classifier
  prediction = classifier.predict(X_test)

  accuracy = metrics.accuracy_score(y_test, prediction)
  recall = metrics.recall_score(y_test, prediction, average="macro")
  precision = metrics.precision_score(y_test, prediction, average="macro")
  roc_auc_score = metrics.roc_auc_score(y_test, classifier.predict_proba(X_test), multi_class="ovr")
  f1 = metrics.f1_score(y_test, prediction, average='weighted')

  # Specificity for Multiclass
  first_cm, second_cm, third_cm = confusion_matrix_processor(metrics.confusion_matrix(y_test, prediction))
  first_specificity = first_cm[1] / (first_cm[1] + first_cm[2])
  second_specificity = second_cm[1] / (second_cm[1] + second_cm[2])
  third_specificity = third_cm[1] / (third_cm[1] + third_cm[2])

  text_writer(disease, model_name, fold, accuracy, recall, precision, roc_auc_score, f1, [first_specificity, second_specificity, third_specificity])
  csv_writer(disease, model_name, fold, classifier.predict_proba(X_test))

  confusion_matrix = metrics.plot_confusion_matrix(classifier, X_test, y_test)
  plt.show()

  print("----------------------------","SVM RBF (Without Age) - Fold ", str(i), "---------------------------------------")
  print('Accuracy: ', "%.2f" % (accuracy*100), "%")
  print("Recall: ", "%.2f" % (recall*100), "%")
  print("Precision: ", "%.2f" % (precision*100), "%")
  print("ROC AUC Score: ", "%.2f" % (roc_auc_score*100), "%")
  print('F1: ', "%.2f" % (f1*100), "%")
  print("First label Specificity: ", "%.2f" % (first_specificity*100), "%")
  print("Second label Specificity: ", "%.2f" % (second_specificity*100), "%")
  print("Third label Specificity: ", "%.2f" % (third_specificity*100), "%")
  print("-------------------------------------------------------------------------------------------------")

# **KNN CLASSIFIER**

In [None]:
def knn_classifier(X_train, y_train, X_test, y_test, disease, model_name, fold):
  classifier = KNeighborsClassifier().fit(X_train, y_train) #KNN Classifier
  prediction = classifier.predict(X_test)

  accuracy = metrics.accuracy_score(y_test, prediction)
  recall = metrics.recall_score(y_test, prediction, average="macro")
  precision = metrics.precision_score(y_test, prediction, average="macro")
  roc_auc_score = metrics.roc_auc_score(y_test, classifier.predict_proba(X_test)[:, 1])
  f1 = metrics.f1_score(y_test, prediction, average='weighted')

  # Specificity for binary class
  tn, fp, fn, tp = metrics.confusion_matrix(y_test, prediction).ravel()
  specificity = tn / (tn + fp)

  text_writer(disease, model_name, fold, accuracy, recall, precision, roc_auc_score, f1, [specificity])
  csv_writer(disease, model_name, fold, classifier.predict_proba(X_test))

  confusion_matrix = metrics.plot_confusion_matrix(classifier, X_test, y_test)
  plt.show()

  print("----------------------------","SVM RBF (Without Age) - Fold ", str(i), "---------------------------------------")
  print('Accuracy: ', "%.2f" % (accuracy*100), "%")
  print("Recall: ", "%.2f" % (recall*100), "%")
  print("Precision: ", "%.2f" % (precision*100), "%")
  print("ROC AUC Score: ", "%.2f" % (roc_auc_score*100), "%")
  print('F1: ', "%.2f" % (f1*100), "%")
  print("Specificity: ", "%.2f" % (specificity*100), "%")
  print("-------------------------------------------------------------------------------------------------")

# **RANDOM FOREST MULTI CLASSIFIER**

In [None]:
def rf_multi_classifier(X_train, y_train, X_test, y_test, disease, model_name, fold):
  classifier = RandomForestClassifier().fit(X_train, y_train) #Random Forest Classifier
  prediction = classifier.predict(X_test)

  accuracy = metrics.accuracy_score(y_test, prediction)
  recall = metrics.recall_score(y_test, prediction, average="macro")
  precision = metrics.precision_score(y_test, prediction, average="macro")
  roc_auc_score = metrics.roc_auc_score(y_test, classifier.predict_proba(X_test), multi_class="ovr")
  f1 = metrics.f1_score(y_test, prediction, average='weighted')

  # Specificity for Multiclass
  first_cm, second_cm, third_cm = confusion_matrix_processor(metrics.confusion_matrix(y_test, prediction))
  first_specificity = first_cm[1] / (first_cm[1] + first_cm[2])
  second_specificity = second_cm[1] / (second_cm[1] + second_cm[2])
  third_specificity = third_cm[1] / (third_cm[1] + third_cm[2])

  text_writer(disease, model_name, fold, accuracy, recall, precision, roc_auc_score, f1, [first_specificity, second_specificity, third_specificity])
  csv_writer(disease, model_name, fold, classifier.predict_proba(X_test))

  confusion_matrix = metrics.plot_confusion_matrix(classifier, X_test, y_test)
  plt.show()

  print("----------------------------","Random Forest (Without Age) - Fold ", str(i), "---------------------------------------")
  print('Accuracy: ', "%.2f" % (accuracy*100), "%")
  print("Recall: ", "%.2f" % (recall*100), "%")
  print("Precision: ", "%.2f" % (precision*100), "%")
  print("ROC AUC Score: ", "%.2f" % (roc_auc_score*100), "%")
  print('F1: ', "%.2f" % (f1*100), "%")
  print("First label Specificity: ", "%.2f" % (first_specificity*100), "%")
  print("Second label Specificity: ", "%.2f" % (second_specificity*100), "%")
  print("Third label Specificity: ", "%.2f" % (third_specificity*100), "%")
  print("-------------------------------------------------------------------------------------------------")

# **RANDOM FOREST CLASSIFIER**

In [None]:
def rf_classifier(X_train, y_train, X_test, y_test, disease, model_name, fold):
  classifier = RandomForestClassifier().fit(X_train, y_train) #KNN Classifier
  prediction = classifier.predict(X_test)

  accuracy = metrics.accuracy_score(y_test, prediction)
  recall = metrics.recall_score(y_test, prediction, average="macro")
  precision = metrics.precision_score(y_test, prediction, average="macro")
  roc_auc_score = metrics.roc_auc_score(y_test, classifier.predict_proba(X_test)[:, 1])
  f1 = metrics.f1_score(y_test, prediction, average='weighted')

  # Specificity for binary class
  tn, fp, fn, tp = metrics.confusion_matrix(y_test, prediction).ravel()
  specificity = tn / (tn + fp)

  text_writer(disease, model_name, fold, accuracy, recall, precision, roc_auc_score, f1, [specificity])
  csv_writer(disease, model_name, fold, classifier.predict_proba(X_test))

  confusion_matrix = metrics.plot_confusion_matrix(classifier, X_test, y_test)
  plt.show()

  print("----------------------------","Random Forest (Without Age) - Fold ", str(i), "---------------------------------------")
  print('Accuracy: ', "%.2f" % (accuracy*100), "%")
  print("Recall: ", "%.2f" % (recall*100), "%")
  print("Precision: ", "%.2f" % (precision*100), "%")
  print("ROC AUC Score: ", "%.2f" % (roc_auc_score*100), "%")
  print('F1: ', "%.2f" % (f1*100), "%")
  print("Specificity: ", "%.2f" % (specificity*100), "%")
  print("-------------------------------------------------------------------------------------------------")

# **MAIN LOOPING - OUTPUT**

In [None]:
disease = "SZ" # same as directory under google drive
model_name = "Random Forest" # the folder under disease folder
base_dir = "/content/drive/MyDrive/Dataset/Alzheimer's and Schizophrenia/Dataset/" + disease + "/Folds/"

for i in range(1, 6):
  current_test = base_dir + "test_" + str(i) + ".csv"
  current_train = base_dir + "train_" + str(i) + ".csv"

  X_train = []
  y_train = []
  X_test = []
  y_test = []

  with open(current_train, newline='') as f:
    reader = csv.reader(f)
    
    for row in reader:
      X_train.append(row[0:13])
      y_train.append(row[-1])

  with open(current_test, newline='') as f:
    reader = csv.reader(f)
    
    for row in reader:
      X_test.append(row[0:13])
      y_test.append(row[-1])

  
  rf_classifier(X_train, y_train, X_test, y_test, disease, model_name, i)