In [None]:
import pandas as pd
import numpy as np
import matplotlib.pyplot as plt
import seaborn as sns
from sklearn.model_selection import train_test_split
from sklearn.preprocessing import StandardScaler
from sklearn.neighbors import KNeighborsClassifier
from sklearn.metrics import confusion_matrix, accuracy_score, precision_score, recall_score


data = pd.read_csv('diabetes.csv')
print(data.isnull().sum())


def boxplot(df) :
    plt.figure(figsize=(10, 5))
    for i in range(1,9):
        sns.boxplot(df.iloc[: , i-1:i] , color="#99004C")
        plt.title(df.columns[i-1])
        plt.show()

boxplot(data)

X = data.drop('Outcome', axis=1)  # Features
y = data['Outcome']  # Target variable


def outlier(df):
    for col in df.columns:
        q1 = df[col].quantile(0.25)
        q3 = df[col].quantile(0.75)
        IQR = q3 - q1
        min_range = q1 - 1.5 * IQR
        max_range = q3 + 1.5 * IQR

        # Create a boolean mask for outliers
        outliers_mask = (df[col] < min_range) | (df[col] > max_range)

        # Replace outliers with the median
        df.loc[outliers_mask, col] = int(np.median(df[col]))

    return df

outlier(X)
boxplot(X)

sns.heatmap(data.corr() , annot= True)

X_train, X_test, y_train, y_test = train_test_split(X, y, test_size=0.3, random_state=42)
scaler = StandardScaler()
X_train = scaler.fit_transform(X_train)
X_test = scaler.transform(X_test)


k = int(input("Enter the value of k for K-Nearest Neighbors: "))
# Initialize KNN classifier with the user-defined k value
knn = KNeighborsClassifier(n_neighbors=k)
# Train the classifier
knn.fit(X_train, y_train)
y_pred = knn.predict(X_test)


# Compute the confusion matrix
conf_matrix = confusion_matrix(y_test, y_pred)
accuracy = accuracy_score(y_test, y_pred)
error_rate = 1 - accuracy
precision = precision_score(y_test, y_pred)
recall = recall_score(y_test, y_pred)

print(f"Confusion Matrix:\n{conf_matrix}")
print(f"Accuracy: {accuracy * 100:.2f}%")
print(f"Error Rate: {error_rate * 100:.2f}%")
print(f"Precision: {precision * 100:.2f}%")
print(f"Recall: {recall * 100:.2f}%")
