**Train Test and Cross Validation**

In [10]:
import pandas as pd
from sklearn.model_selection import KFold, GridSearchCV
from sklearn.preprocessing import StandardScaler
from sklearn.metrics import classification_report, confusion_matrix
from sklearn.neighbors import KNeighborsClassifier

data_path = '/workspaces/Final-Year-Project/Cleaned Data/TrainTestData.csv'
df = pd.read_csv(data_path)

df = df.dropna()

X = df.drop('Diabetes Status', axis=1)
y = df['Diabetes Status']

scaler = StandardScaler()
X = scaler.fit_transform(X)

knn = KNeighborsClassifier()

param_grid = {
    'n_neighbors': [11],  
}

kf = KFold(n_splits=5, shuffle=True, random_state=42)
grid_search = GridSearchCV(knn, param_grid, scoring='accuracy', cv=kf, n_jobs=-1, verbose=1)

grid_search.fit(X, y)

best_knn = grid_search.best_estimator_

y_pred = best_knn.predict(X)

print("Best Parameters for KNN:", grid_search.best_params_)
print("Mean Accuracy with Best Parameters:", grid_search.best_score_)

print("KNN Classification Report:")
print(classification_report(y, y_pred))
print("Confusion Matrix:")
print(confusion_matrix(y, y_pred))


Fitting 5 folds for each of 1 candidates, totalling 5 fits
Best Parameters for KNN: {'n_neighbors': 11}
Mean Accuracy with Best Parameters: 0.6723926380368098
KNN Classification Report:
              precision    recall  f1-score   support

           0       0.76      0.58      0.66       367
           1       0.71      0.85      0.77       448

    accuracy                           0.73       815
   macro avg       0.74      0.71      0.72       815
weighted avg       0.73      0.73      0.72       815

Confusion Matrix:
[[212 155]
 [ 67 381]]


In [11]:
from sklearn.metrics import classification_report, confusion_matrix

valid_path = "/workspaces/Final-Year-Project/Cleaned Data/ValidationData.csv"

df_valid = pd.read_csv(valid_path)
df_valid = df_valid.dropna()

X_valid = df_valid.drop('Diabetes Status', axis=1)
y_valid = df_valid['Diabetes Status']

X_valid_scaled = scaler.transform(X_valid)

print(f"ðŸ§ª KNN - Validation dataset shape: {X_valid.shape}")

y_pred_valid = best_knn.predict(X_valid_scaled)

print(f"ðŸ“Œ KNN - Validation Classification Report:")
print(classification_report(y_valid, y_pred_valid))

print("ðŸŸ¦ KNN - Validation Confusion Matrix:")
print(confusion_matrix(y_valid, y_pred_valid))


ðŸ§ª KNN - Validation dataset shape: (92, 11)
ðŸ“Œ KNN - Validation Classification Report:
              precision    recall  f1-score   support

           0       0.84      0.64      0.73        42
           1       0.75      0.90      0.82        50

    accuracy                           0.78        92
   macro avg       0.80      0.77      0.77        92
weighted avg       0.79      0.78      0.78        92

ðŸŸ¦ KNN - Validation Confusion Matrix:
[[27 15]
 [ 5 45]]


best: nn = 5

ðŸ§ª KNN - Validation dataset shape: (335, 9)
ðŸ“Œ KNN - Validation Classification Report:
              precision    recall  f1-score   support

           0       0.72      0.68      0.70       145
           1       0.76      0.79      0.78       190

    accuracy                           0.74       335
   macro avg       0.74      0.74      0.74       335
weighted avg       0.74      0.74      0.74       335

ðŸŸ¦ KNN - Validation Confusion Matrix:
[[ 98  47]
 [ 39 151]]

nn = 7

ðŸ§ª KNN - Validation dataset shape: (335, 9)
ðŸ“Œ KNN - Validation Classification Report:
              precision    recall  f1-score   support

           0       0.68      0.66      0.67       145
           1       0.75      0.76      0.75       190

    accuracy                           0.72       335
   macro avg       0.71      0.71      0.71       335
weighted avg       0.72      0.72      0.72       335

ðŸŸ¦ KNN - Validation Confusion Matrix:
[[ 96  49]
 [ 46 144]]

nn = 3

ðŸ§ª KNN - Validation dataset shape: (335, 9)
ðŸ“Œ KNN - Validation Classification Report:
              precision    recall  f1-score   support

           0       0.66      0.68      0.67       145
           1       0.75      0.73      0.74       190

    accuracy                           0.71       335
   macro avg       0.70      0.70      0.70       335
weighted avg       0.71      0.71      0.71       335

ðŸŸ¦ KNN - Validation Confusion Matrix:
[[ 98  47]
 [ 51 139]]

nn = 9

ðŸ§ª KNN - Validation dataset shape: (335, 9)
ðŸ“Œ KNN - Validation Classification Report:
              precision    recall  f1-score   support

           0       0.67      0.62      0.64       145
           1       0.72      0.76      0.74       190

    accuracy                           0.70       335
   macro avg       0.70      0.69      0.69       335
weighted avg       0.70      0.70      0.70       335

ðŸŸ¦ KNN - Validation Confusion Matrix:
[[ 90  55]
 [ 45 145]]
