In [1]:
#Importing Required Libraries
import pandas as pd
import numpy as np
import joblib
from sklearn.model_selection import train_test_split
from sklearn.preprocessing import StandardScaler
from sklearn.neighbors import KNeighborsClassifier

from sklearn.metrics import (
    accuracy_score,
    roc_auc_score,
    precision_score,
    recall_score,
    f1_score,
    matthews_corrcoef,
    confusion_matrix,
    classification_report
)


In [2]:
data = pd.read_csv("C:/Users/Lingesh.L/Downloads/data.csv") # Load dataset

data.drop(columns=["id", "Unnamed: 32"], inplace=True) # Drop unnecessary columns

In [3]:
data["diagnosis"] = data["diagnosis"].map({"B": 0, "M": 1}) # Encode target variable

X = data.drop("diagnosis", axis=1) # Features 
y = data["diagnosis"] #target

In [4]:
X_train, X_test, y_train, y_test = train_test_split(
    X, y,
    test_size=0.2,
    random_state=42,
    stratify=y
) #Trainâ€“Test Split

In [5]:
scaler = StandardScaler()  #Feature Scaling

X_train_scaled = scaler.fit_transform(X_train)
X_test_scaled = scaler.transform(X_test)

In [6]:
knn_model = KNeighborsClassifier(
    n_neighbors=5,       
    metric="minkowski",   # Euclidean distance
    p=2
)

knn_model.fit(X_train_scaled, y_train)


KNeighborsClassifier()

In [7]:
y_pred = knn_model.predict(X_test_scaled)
y_pred_proba = knn_model.predict_proba(X_test_scaled)[:, 1]

In [8]:
accuracy = accuracy_score(y_test, y_pred)
auc = roc_auc_score(y_test, y_pred_proba)
precision = precision_score(y_test, y_pred)
recall = recall_score(y_test, y_pred)
f1 = f1_score(y_test, y_pred)
mcc = matthews_corrcoef(y_test, y_pred)

print("KNN Performance")
print(f"Accuracy  : {accuracy:.4f}")
print(f"AUC       : {auc:.4f}")
print(f"Precision : {precision:.4f}")
print(f"Recall    : {recall:.4f}")
print(f"F1 Score  : {f1:.4f}")
print(f"MCC       : {mcc:.4f}")

KNN Performance
Accuracy  : 0.9561
AUC       : 0.9823
Precision : 0.9744
Recall    : 0.9048
F1 Score  : 0.9383
MCC       : 0.9058


In [9]:
print("\nConfusion Matrix:")
print(confusion_matrix(y_test, y_pred))

print("\nClassification Report:")
print(classification_report(y_test, y_pred))


Confusion Matrix:
[[71  1]
 [ 4 38]]

Classification Report:
              precision    recall  f1-score   support

           0       0.95      0.99      0.97        72
           1       0.97      0.90      0.94        42

    accuracy                           0.96       114
   macro avg       0.96      0.95      0.95       114
weighted avg       0.96      0.96      0.96       114



In [10]:
joblib.dump(knn_model, "C:/Users/Lingesh.L/Downloads/pkl/knn.pkl")
joblib.dump(scaler, "C:/Users/Lingesh.L/Downloads/pkl/scaler_knn.pkl")

['C:/Users/Lingesh.L/Downloads/pkl/scaler_knn.pkl']