In [None]:
import numpy as np
import pandas as pd
import matplotlib.pyplot as plt
from sklearn.model_selection import train_test_split
from sklearn.preprocessing import StandardScaler
from imblearn.over_sampling import RandomOverSampler
from sklearn.neighbors import KNeighborsClassifier
from sklearn.metrics import classification_report

In [None]:
# 1) Load dataset
cols = ["fLength", "fWidth", "fSize", "fConc", "fConc1",
        "fAsym", "fM3Long", "fM3Trans", "fAlpha", "fDist", "class"]
df = pd.read_csv("magic04.data", names=cols)

In [None]:
# Convert target to binary (1 = gamma, 0 = hadron)
df["class"] = (df["class"] == "g").astype(int)

In [None]:
# Separate features & target
#Also can use this way (separating target and the features)
#X = df[["fLength", "fWidth", "fSize", "fConc", "fConc1", "fAsym", "fM3Long", "fM3Trans", "fAlpha", "fDist"]]
#y = df["class"]

X = df.drop(columns=["class"]).values
y = df["class"].values

In [None]:
#Split into Train / Temp (Train = 70%, Temp = 30%)
X_train, X_temp, y_train, y_temp = train_test_split(
    X, y, test_size=0.3, random_state=42, shuffle=True
)

#Split Temp into Validation / Test (each 15% of original data)
X_val, X_test, y_val, y_test = train_test_split(
    X_temp, y_temp, test_size=0.5, random_state=42, shuffle=True
)

In [None]:
#Oversample only the training set
ros = RandomOverSampler()
X_train, y_train = ros.fit_resample(X_train, y_train)

#Scale using training set stats only
scaler = StandardScaler()
X_train_scaled = scaler.fit_transform(X_train)
X_val_scaled   = scaler.transform(X_val)
X_test_scaled  = scaler.transform(X_test)

In [None]:
#Train KNN model
knn_model = KNeighborsClassifier(n_neighbors=5)
knn_model.fit(X_train_scaled, y_train)

#Predict & Evaluate
y_pred = knn_model.predict(X_test_scaled)
print(classification_report(y_test, y_pred))

              precision    recall  f1-score   support

           0       0.75      0.75      0.75      1007
           1       0.86      0.87      0.87      1846

    accuracy                           0.83      2853
   macro avg       0.81      0.81      0.81      2853
weighted avg       0.82      0.83      0.82      2853

