In [4]:
import pandas as pd
from sklearn.neighbors import KNeighborsClassifier
from sklearn.preprocessing import StandardScaler
from sklearn.metrics import (
    accuracy_score,
    roc_auc_score,
    precision_score,
    recall_score,
    f1_score,
    matthews_corrcoef
)
from sklearn.pipeline import Pipeline
from utils.UtilityFunctions import save_model, write_json_to_file
#-- Read Test and Train Files Generated in DataPrep.ipynb -------------------------
X_train = pd.read_csv("../data/split/X_train.csv")
X_test = pd.read_csv("../data/split/X_test.csv")  
y_train = pd.read_csv("../data/split/y_train.csv")
y_test = pd.read_csv("../data/split/y_test.csv")

# -----------------------------
# Scale Features (VERY IMPORTANT FOR KNN)
# -----------------------------
scaler = StandardScaler()
X_train_scaled = scaler.fit_transform(X_train)
X_test_scaled = scaler.transform(X_test)

# -----------------------------
# Initialize KNN Model
# -----------------------------
knn_model = KNeighborsClassifier(
    n_neighbors=7,      # good default for multi-class
    metric="minkowski", # Euclidean distance
    p=2
)

# -----------------------------
# Train Model
# -----------------------------
knn_model.fit(X_train_scaled, y_train)


#-- Save the trained model using pickle
pipeline = Pipeline([
    ("scaler", scaler),
    ("model", knn_model)
])

trained_model_path="trained_models/KNNModel.pkl"
save_model(knn_model, trained_model_path)



# -----------------------------
# Predictions
# -----------------------------
y_pred_knn = knn_model.predict(X_test_scaled)
y_prob_knn = knn_model.predict_proba(X_test_scaled)

# -----------------------------
# Evaluation Metrics
# -----------------------------
knn_metrics = {
    "ML Model Name": "kNN",
    "Accuracy": accuracy_score(y_test, y_pred_knn),
    "AUC": roc_auc_score(
        y_test,
        y_prob_knn,
        multi_class="ovr",
        average="macro"
    ),
    "Precision": precision_score(y_test, y_pred_knn, average="macro"),
    "Recall": recall_score(y_test, y_pred_knn, average="macro"),
    "F1": f1_score(y_test, y_pred_knn, average="macro"),
    "MCC": matthews_corrcoef(y_test, y_pred_knn)
}

knn_metrics

write_json_to_file(knn_metrics, "../metrics/KNNMetrics.json")


  return self._fit(X, y)


True