In [25]:
# 01/25

In [26]:
from ipynb.fs.full.performance import performanceEval as perf
from sklearn import tree
import matplotlib.pyplot as plt
class baseModel:
  def __init__(self, model, modelName):
    self.model = model
    self.modelName = modelName
  
  def train(self, Xtrain, Ytrain):
    self.model.fit(Xtrain, Ytrain)
    if self.modelName == "dtModel": 
      plt.figure(figsize=(20, 10), dpi=200)  # bigger figure and higher DPI
      tree.plot_tree(self.model, filled=True)
      plt.show()
    return ["training complete", self]
  
  def test(self, Xtest):
    Ypred = self.model.predict(Xtest)
    return Ypred
  
  def perfEval(self, Ypred, Ytest):
    return perf(Ypred, Ytest, self.modelName)

In [27]:
# 10/24
######### LOGISTIC REGRESSION #########
from sklearn.linear_model import LogisticRegression

class logReg(baseModel):
  def __init__(self):
    super().__init__(LogisticRegression(random_state=0), "regModel")

In [28]:
######### SVM #########
# https://scikit-learn.org/1.5/modules/svm.html#classification
# https://scikit-learn.org/1.5/modules/svm.html#svc (mathematical theory)

from sklearn import svm

class supVecMac(baseModel):
  def __init__(self):
    super().__init__(svm.LinearSVC(), "svmModel")


In [29]:
######### Naive Bayes #########
from sklearn import naive_bayes

class naiBayClass(baseModel):
  def __init__(self):
    super().__init__(naive_bayes.GaussianNB(), "nbcModel")


In [30]:
######### K-NN #########
# https://scikit-learn.org/1.5/modules/neighbors.html#nearest-neighbors-classification
from sklearn import neighbors

class kNearNeigh(baseModel):
  def __init__(self):
    super().__init__(neighbors.KNeighborsClassifier(algorithm = 'ball_tree', metric = 'manhattan'), "knnModel")
    # Reason for using ball-tree algorithm: https://scikit-learn.org/1.5/modules/neighbors.html#Ball_Tree
    # See lit review for the reasoning behind the chosen distance metric
    # Also: https://scikit-learn.org/1.5/modules/generated/sklearn.metrics.pairwise.distance_metrics.html#sklearn.metrics.pairwise.distance_metrics


# Below was last updated 27/12/2024
# Manhattan distance: {'accuracy': 96.49122807017544, 'specificity': 100.0, 'sensitivity': 89.74358974358975, 'recall': 89.74358974358975, 'precision': 100.0}
# Euclidean distance: {'accuracy': 95.6140350877193, 'specificity': 97.33333333333334, 'sensitivity': 92.3076923076923, 'recall': 92.3076923076923, 'precision': 94.73684210526315}
# Cityblock distance: {'accuracy': 96.49122807017544, 'specificity': 100.0, 'sensitivity': 89.74358974358975, 'recall': 89.74358974358975, 'precision': 100.0}
# Haversine distance: Not valid for > 2 dimensions
# L1 distance: {'accuracy': 96.49122807017544, 'specificity': 100.0, 'sensitivity': 89.74358974358975, 'recall': 89.74358974358975, 'precision': 100.0}
# L2 distance: {'accuracy': 95.6140350877193, 'specificity': 97.33333333333334, 'sensitivity': 92.3076923076923, 'recall': 92.3076923076923, 'precision': 94.73684210526315}

# From above, can be seen that Manhattan/L1/Cityblock yield the best results.

In [31]:
######### Decision Tree #########
# https://scikit-learn.org/1.5/modules/tree.html#classification
# https://scikit-learn.org/1.5/modules/tree.html#mathematical-formulation (mathematical theory)

# SKLearn defaultly uses an optimised implementation of the CART decision tree algorithm.
from sklearn import tree

class decTree(baseModel):
  def __init__(self):
    super().__init__(tree.DecisionTreeClassifier(criterion='gini', splitter='best'), "dtModel")


In [32]:
# 06/02/2025
def runModels(Xtrain, Ytrain, Xtest, modes = ["train"], inModels = None):
  regModel, svmModel, nbcModel, knnModel, dtModel = logReg(), supVecMac(), naiBayClass(), kNearNeigh(), decTree()
  Ypreds = {}
  models = {}

  if inModels == None and ("test" in modes and "train" not in modes):
      raise Exception("Error: Cannot test models without training them. Please either add 'train' to the modes list, or supply the trained models in the `models` param list.")

  models = inModels if inModels != None else {}

  if "train" in modes and models == {}:
    Ypreds["Ypred_regModel"], models["regModel"] = regModel.train(Xtrain, Ytrain)
    Ypreds["Ypred_svmModel"], models["svmModel"] = svmModel.train(Xtrain, Ytrain)
    Ypreds["Ypred_nbcModel"], models["nbcModel"] = nbcModel.train(Xtrain, Ytrain)
    Ypreds["Ypred_knnModel"], models["knnModel"] = knnModel.train(Xtrain, Ytrain)
    Ypreds["Ypred_dtModel"], models["dtModel"] = dtModel.train(Xtrain, Ytrain)

  if "test" in modes:
    for model in models:
      Ypreds["Ypred_" + model] = models[model].test(Xtest)

  return [Ypreds, models]
