In [1]:
# Importing required libraries
import pandas as pd
import numpy as np
from sklearn.model_selection import train_test_split
from sklearn.preprocessing import StandardScaler
from sklearn.neighbors import KNeighborsClassifier
from sklearn.metrics import confusion_matrix, accuracy_score, precision_score, recall_score


In [2]:
df = pd.read_csv("diabetes.csv") 

In [3]:
print("First 5 rows of dataset:")
print(df.head())

First 5 rows of dataset:
   Pregnancies  Glucose  BloodPressure  SkinThickness  Insulin   BMI  \
0            6      148             72             35        0  33.6   
1            1       85             66             29        0  26.6   
2            8      183             64              0        0  23.3   
3            1       89             66             23       94  28.1   
4            0      137             40             35      168  43.1   

   Pedigree  Age  Outcome  
0     0.627   50        1  
1     0.351   31        0  
2     0.672   32        1  
3     0.167   21        0  
4     2.288   33        1  


In [4]:
# Separate features and target variable
X = df.drop('Outcome', axis=1)
y = df['Outcome']


In [5]:
# Split data into training and testing sets
X_train, X_test, y_train, y_test = train_test_split(X, y, test_size=0.25, random_state=42)

In [6]:
scaler = StandardScaler()
X_train = scaler.fit_transform(X_train)
X_test = scaler.transform(X_test)

In [7]:
knn = KNeighborsClassifier(n_neighbors=5)

In [8]:
knn.fit(X_train, y_train)

In [9]:
y_pred = knn.predict(X_test)

In [10]:
cm = confusion_matrix(y_test, y_pred)
print("\nConfusion Matrix:\n", cm)


Confusion Matrix:
 [[94 29]
 [32 37]]


In [11]:
accuracy = accuracy_score(y_test, y_pred)
error_rate = 1 - accuracy
precision = precision_score(y_test, y_pred)
recall = recall_score(y_test, y_pred)

In [12]:
print("\nModel Evaluation Metrics:")
print("Accuracy      :", round(accuracy, 4))
print("Error Rate    :", round(error_rate, 4))
print("Precision     :", round(precision, 4))
print("Recall        :", round(recall, 4))


Model Evaluation Metrics:
Accuracy      : 0.6823
Error Rate    : 0.3177
Precision     : 0.5606
Recall        : 0.5362
