In [1]:
# Import Libraries
import pandas as pd
from sklearn.model_selection import train_test_split
from sklearn.preprocessing import StandardScaler
from sklearn.neighbors import KNeighborsClassifier
from sklearn.metrics import confusion_matrix, accuracy_score, precision_score, recall_score

# Load Dataset
df = pd.read_csv("diabetes.csv")

# Features and Target
X = df.iloc[:, :-1]   # all columns except output
Y = df.iloc[:, -1]    # output column

# Split Data
X_train, X_test, Y_train, Y_test = train_test_split(X, Y, test_size=0.25, random_state=42)

# Standardize (important for KNN)
scaler = StandardScaler()
X_train = scaler.fit_transform(X_train)
X_test = scaler.transform(X_test)

# Apply KNN (choose k = 5 commonly)
knn = KNeighborsClassifier(n_neighbors=5)
knn.fit(X_train, Y_train)

# Predict
Y_pred = knn.predict(X_test)

# Confusion Matrix
cm = confusion_matrix(Y_test, Y_pred)

# Performance Metrics
accuracy = accuracy_score(Y_test, Y_pred)
error_rate = 1 - accuracy
precision = precision_score(Y_test, Y_pred)
recall = recall_score(Y_test, Y_pred)

# Display Results
print("Confusion Matrix:\n", cm)
print("\nAccuracy:", round(accuracy, 3))
print("Error Rate:", round(error_rate, 3))
print("Precision:", round(precision, 3))
print("Recall:", round(recall, 3))


Confusion Matrix:
 [[94 29]
 [32 37]]

Accuracy: 0.682
Error Rate: 0.318
Precision: 0.561
Recall: 0.536
