## Import Libraries, Access Google Drive

In [15]:
# General Libraries
import pandas as pd
import numpy as np

# For validation
from sklearn.model_selection import GroupKFold

# For evaluation
from sklearn.metrics import accuracy_score, precision_score, confusion_matrix

# Supervised Models
from sklearn.linear_model import LogisticRegression
from sklearn.ensemble import RandomForestClassifier
from sklearn.neural_network import MLPClassifier

# Binary Logistic Regression

In [16]:
def binaryLogisticRegression(feature, label, folds, groups, fileName, saveLoc):
  # Lists to store results
  foldNumber = []
  trainAccuracy = []
  trainPrecision = []
  testAccuracy = []
  testPrecision = []
  confusionMatrixList = []

  group_kfold = GroupKFold(n_splits=folds)
  for train_index, test_index in group_kfold.split(feature, label, groups):
    # Getting Train and Test Sets
    inputTrain, inputTest = feature.iloc[train_index], feature.iloc[test_index]
    targetTrain, targetTest = label.iloc[train_index], label.iloc[test_index]

    # Creating and fitting Model
    model = LogisticRegression(solver='liblinear', max_iter=100000)
    model.fit(inputTrain, targetTrain)

    # Fold Number
    foldNumber.append(len(foldNumber)+1)

    # Train Accuracy and Precision
    trainPrediction = model.predict(inputTrain)
    trainAccuracy.append(accuracy_score(targetTrain, trainPrediction))
    trainPrecision.append(precision_score(targetTrain, trainPrediction))

    # Test Accuracy and Precision
    testPrediction = model.predict(inputTest)
    testAccuracy.append(accuracy_score(targetTest, testPrediction))
    testPrecision.append(precision_score(targetTest, testPrediction))

    # Confusion Matrix
    confusionMatrix = confusion_matrix(targetTest, testPrediction)
    confusionMatrixList.append(confusionMatrix)

  # Save per fold results
  csvFile = pd.DataFrame({"foldNumber": foldNumber, "trainAccuracy": trainAccuracy, "trainPrecision": trainPrecision, "testAccuracy": testAccuracy, "testPrecision": testPrecision, "confusionMatrix": confusionMatrixList})
  csvFile.to_csv(saveLoc + "Binary Logistic Regression/Per Fold Results/" + fileName + ".csv", index=False)

  # Save Confusion Matrix
  with open(saveLoc + "Binary Logistic Regression/ConfusionMatrices.csv", "a") as file:
    file.write(fileName + "," + str("".join(f"{row}" for row in np.mean(confusionMatrixList, axis=0)) + "\n"))
    file.close

# Random Forest

In [17]:
def randomForest(feature, label, folds, groups, fileName, saveLoc):
  # Lists to store results
  foldNumber = []
  trainAccuracy = []
  trainPrecision = []
  testAccuracy = []
  testPrecision = []
  confusionMatrixList = []

  group_kfold = GroupKFold(n_splits=folds)
  for train_index, test_index in group_kfold.split(feature, label, groups):
    # Getting Train and Test Sets
    inputTrain, inputTest = feature.iloc[train_index], feature.iloc[test_index]
    targetTrain, targetTest = label.iloc[train_index], label.iloc[test_index]

    # Creating and fitting Model
    model = RandomForestClassifier(n_estimators=50, random_state=42)
    model.fit(inputTrain, targetTrain)

    # Fold Number
    foldNumber.append(len(foldNumber)+1)

    # Train Accuracy and Precision
    trainPrediction = model.predict(inputTrain)
    trainAccuracy.append(accuracy_score(targetTrain, trainPrediction))
    trainPrecision.append(precision_score(targetTrain, trainPrediction))

    # Test Accuracy and Precision
    testPrediction = model.predict(inputTest)
    testAccuracy.append(accuracy_score(targetTest, testPrediction))
    testPrecision.append(precision_score(targetTest, testPrediction))

    # Confusion Matrix
    confusionMatrix = confusion_matrix(targetTest, testPrediction)
    confusionMatrixList.append(confusionMatrix)

  # Save per fold results
  csvFile = pd.DataFrame({"foldNumber": foldNumber, "trainAccuracy": trainAccuracy, "trainPrecision": trainPrecision, "testAccuracy": testAccuracy, "testPrecision": testPrecision, "confusionMatrix": confusionMatrixList})
  csvFile.to_csv(saveLoc + "Random Forest/Per Fold Results/" + fileName + ".csv", index=False)

  # Save average model results
  with open(saveLoc + "Random Forest/ConfusionMatrices.csv", "a") as file:
    file.write(fileName + "," + str("".join(f"{row}" for row in np.mean(confusionMatrixList, axis=0)) + "\n"))
    file.close

# Neural Network  
  

In [18]:
def neuralNetwork(feature, label, folds, groups, fileName, saveLoc):

  # Lists to store results
  foldNumber = []
  trainAccuracy = []
  trainPrecision = []
  testAccuracy = []
  testPrecision = []
  confusionMatrixList = []

  group_kfold = GroupKFold(n_splits=folds)
  for train_index, test_index in group_kfold.split(feature, label, groups):
    # Getting Train and Test Sets
    inputTrain, inputTest = feature.iloc[train_index], feature.iloc[test_index]
    targetTrain, targetTest = label.iloc[train_index], label.iloc[test_index]

    # Creating and fitting Model
    model = MLPClassifier(hidden_layer_sizes=(32,), max_iter=10000, random_state=42)
    model.fit(inputTrain, targetTrain)

    # Fold Number
    foldNumber.append(len(foldNumber)+1)

    # Train Accuracy and Precision
    trainPrediction = model.predict(inputTrain)
    trainAccuracy.append(accuracy_score(targetTrain, trainPrediction))
    trainPrecision.append(precision_score(targetTrain, trainPrediction))

    # Test Accuracy and Precision
    testPrediction = model.predict(inputTest)
    testAccuracy.append(accuracy_score(targetTest, testPrediction))
    testPrecision.append(precision_score(targetTest, testPrediction))

    # Confusion Matrix
    confusionMatrix = confusion_matrix(targetTest, testPrediction)
    confusionMatrixList.append(confusionMatrix)

  # Save per fold results
  csvFile = pd.DataFrame({"foldNumber": foldNumber, "trainAccuracy": trainAccuracy, "trainPrecision": trainPrecision, "testAccuracy": testAccuracy, "testPrecision": testPrecision, "confusionMatrix": confusionMatrixList})
  csvFile.to_csv(saveLoc + "Neural Network/Per Fold Results/" + fileName + ".csv", index=False)

  # Save average model results
  with open(saveLoc + "Neural Network/ConfusionMatrices.csv", "a") as file:
    file.write(fileName + "," + str("".join(f"{row}" for row in np.mean(confusionMatrixList, axis=0)) + "\n"))
    file.close