<h3>To implement a Machine Learning Classification model using a K Nearest Neighbors Classifier algorithm and enhance the model by K Fold and GridSearchCV cross-validation.</h3>

<h4>Import Lab</h4>

In [3]:
import pandas as pd
from sklearn.model_selection import train_test_split, KFold, GridSearchCV
from sklearn.neighbors import KNeighborsClassifier
from sklearn.metrics import accuracy_score, precision_score, recall_score, f1_score
from sklearn.preprocessing import StandardScaler

<h3>Data Preprocessing</h3>

In [4]:
df = pd.read_csv('./CSV files/diabetes.csv')

<h3>Print Data</h3>

In [5]:
df.head()

Unnamed: 0,Pregnancies,Glucose,BloodPressure,SkinThickness,Insulin,BMI,DiabetesPedigreeFunction,Age,Outcome
0,6,148,72,35,0,33.6,0.627,50,1
1,1,85,66,29,0,26.6,0.351,31,0
2,8,183,64,0,0,23.3,0.672,32,1
3,1,89,66,23,94,28.1,0.167,21,0
4,0,137,40,35,168,43.1,2.288,33,1


<h3>Implementing the K Nearest Neighbors Classifier</h3>

In [6]:
X = df.drop('Outcome', axis=1)
y = df['Outcome']
X_train, X_test, y_train, y_test = train_test_split(X, y, test_size=0.2, random_state=42)
scaler = StandardScaler()
X_train_scaled = scaler.fit_transform(X_train)
X_test_scaled = scaler.transform(X_test)

<h3>Hyperparameter Tuning with GridSearchCV</h3>

In [15]:
param_grid = {'n_neighbors': [3, 5, 7, 9]} 
knn = KNeighborsClassifier()
grid_search = GridSearchCV(knn, param_grid, cv=5, scoring='accuracy')
grid_search.fit(X_train_scaled, y_train)
best_params = grid_search.best_params_

<h3>Cross-Validation with K Fold</h3>

In [16]:
k_fold = KFold(n_splits=5)
accuracies = []
precisions = []
recalls = []
f1_scores = []
for train_idx, val_idx in k_fold.split(X_train_scaled):
    X_train_fold, X_val_fold = X_train_scaled[train_idx], X_train_scaled[val_idx]
    y_train_fold, y_val_fold = y_train.iloc[train_idx], y_train.iloc[val_idx]
    knn = KNeighborsClassifier(n_neighbors=best_params['n_neighbors'])
    knn.fit(X_train_fold, y_train_fold)
    y_pred_fold = knn.predict(X_val_fold)
    accuracies.append(accuracy_score(y_val_fold, y_pred_fold))
    precisions.append(precision_score(y_val_fold, y_pred_fold))
    recalls.append(recall_score(y_val_fold, y_pred_fold))
    f1_scores.append(f1_score(y_val_fold, y_pred_fold))

<h3>Evaluating Model Performance</h3>

In [17]:
print("Average Accuracy:", sum(accuracies) / len(accuracies))
print("Average Precision:", sum(precisions) / len(precisions))
print("Average Recall:", sum(recalls) / len(recalls))
print("Average F1 Score:", sum(f1_scores) / len(f1_scores))

Average Accuracy: 0.744368919099027
Average Precision: 0.6498736947900305
Average Recall: 0.5673835537665325
Average F1 Score: 0.6012838002178441
