<a href="https://colab.research.google.com/github/ImTeddyGraham/Data-ML-Projects/blob/main/KNN.ipynb" target="_parent"><img src="https://colab.research.google.com/assets/colab-badge.svg" alt="Open In Colab"/></a>

In [None]:
import pandas as pd
import numpy as np

from sklearn.neighbors import KNeighborsClassifier
from sklearn.model_selection import train_test_split
from sklearn.metrics import classification_report
from sklearn.model_selection import cross_val_predict
from sklearn.metrics import log_loss
from sklearn.metrics import DistanceMetric
from scipy.spatial.distance import mahalanobis
import seaborn as sns

In [None]:
dataset = pd.read_csv('/content/heart_failure_clinical_records_dataset.csv')
labels = dataset['DEATH_EVENT']
features = dataset.drop('DEATH_EVENT', axis=1)

In [None]:
X_train, X_test, y_train, y_test = train_test_split(features, labels, test_size=0.2, random_state=69) 

In [None]:
y_test.value_counts()

0    40
1    20
Name: DEATH_EVENT, dtype: int64

In [None]:
for n in (1, 25, len(X_train)):
  neigh_mahalanobis = KNeighborsClassifier(n_neighbors=n, metric=mahalanobis, metric_params={'VI':np.linalg.inv(np.cov(X_train.T))})
  neigh_cityblock = KNeighborsClassifier(n_neighbors=n, metric='cityblock')
  neigh_cosine = KNeighborsClassifier(n_neighbors=n, metric='cosine')
  neigh_minkowski = KNeighborsClassifier(n_neighbors=n, metric='minkowski')

  neigh_mahalanobis.fit(X_train, y_train)
  neigh_cityblock.fit(X_train, y_train)
  neigh_cosine.fit(X_train, y_train)
  neigh_minkowski.fit(X_train, y_train)

  print(f'MODEL ACCURACY FOR K = {n}\n'
        f'Mahalanobis Distance Model: {neigh_mahalanobis.score(X_test, y_test):.2f}\n'
        f'City Block Distance Model: {neigh_cityblock.score(X_test, y_test):.2f}\n'
        f'Cosine Distance Model: {neigh_cosine.score(X_test, y_test):.2f}\n'
        f'Minkowski Distance Model: {neigh_minkowski.score(X_test, y_test):.2f}','\n'*5)

MODEL ACCURACY FOR K = 1
Mahalanobis Distance Model: 0.63
City Block Distance Model: 0.57
Cosine Distance Model: 0.63
Minkowski Distance Model: 0.52 





MODEL ACCURACY FOR K = 25
Mahalanobis Distance Model: 0.70
City Block Distance Model: 0.67
Cosine Distance Model: 0.75
Minkowski Distance Model: 0.67 





MODEL ACCURACY FOR K = 239
Mahalanobis Distance Model: 0.67
City Block Distance Model: 0.67
Cosine Distance Model: 0.67
Minkowski Distance Model: 0.67 







In [None]:
for k_value in (5,10,20):
  neigh = KNeighborsClassifier(n_neighbors=k_value)
  neigh.fit(X_train, y_train)
  y_pred_train = neigh.predict(X_train)
  y_pred_test =  neigh.predict(X_test)

  print(f'TRAINING SET METRICS FOR K = {k_value}:\n\n'
        f'Log Loss = {log_loss(y_train, y_pred_train):.2f}\n\n'
        f'{classification_report(y_train, y_pred_train, zero_division=0)}\n\n\n'
        f'TEST SET METRICS FOR K = {k_value}:\n\n'
        f'Log Loss = {log_loss(y_test, y_pred_test):.2f}\n\n'
        f'{classification_report(y_test, y_pred_test, zero_division=0)}\n\n\n', '-'*53)

TRAINING SET METRICS FOR K = 5:

Log Loss = 9.3934479448766

              precision    recall  f1-score   support

           0       0.75      0.90      0.82       163
           1       0.62      0.37      0.46        76

    accuracy                           0.73       239
   macro avg       0.69      0.63      0.64       239
weighted avg       0.71      0.73      0.71       239



TEST SET METRICS FOR K = 5:

Log Loss = 13.239970897706455

              precision    recall  f1-score   support

           0       0.68      0.80      0.74        40
           1       0.38      0.25      0.30        20

    accuracy                           0.62        60
   macro avg       0.53      0.53      0.52        60
weighted avg       0.58      0.62      0.59        60



 -----------------------------------------------------
TRAINING SET METRICS FOR K = 10:

Log Loss = 10.838538194186578

              precision    recall  f1-score   support

           0       0.69      0.98      0.81   