In [1]:
# Import the modules
import numpy as np
import pandas as pd
from pathlib import Path
from sklearn.metrics import confusion_matrix, classification_report
from sklearn.model_selection import train_test_split
from sklearn.neighbors import KNeighborsClassifier
from sklearn.preprocessing import StandardScaler
import matplotlib.pyplot as plt
from io import StringIO
from imblearn.over_sampling import ADASYN

In [2]:
# Read the dataset
super_data_df = pd.read_csv("Resources/madness_label_final_data.csv")

# Drop unnecessary columns
X = super_data_df.drop(columns=["Years", "Madness", "Team"])
y = super_data_df["Madness"]

# Apply preprocessing to handle string columns
X['PTS'] = X['PTS'].str.replace(',', '').astype(float)
X['DRebs'] = X['DRebs'].str.replace(',', '').astype(float)
X['REB'] = X['REB'].str.replace(',', '').astype(float)
X['FGM_y'] = X['FGM_y'].str.replace(',', '').astype(float)
X['FGA_y'] = X['FGA_y'].str.replace(',', '').astype(float)
X['FTA'] = X['FTA'].str.replace(',', '').astype(float)

# Split the data using train_test_split
X_train, X_test, y_train, y_test = train_test_split(X, y, random_state=42)

# Apply ADASYN to handle class imbalance
adasyn = ADASYN(random_state=42)
X_train_resampled, y_train_resampled = adasyn.fit_resample(X_train, y_train)

# Feature scaling
scaler = StandardScaler()
X_train_resampled_scaled = scaler.fit_transform(X_train_resampled)
X_test_scaled = scaler.transform(X_test)

# Initialize and train the KNN model
knn_model = KNeighborsClassifier(n_neighbors=5)  # You can adjust the number of neighbors as needed
knn_model.fit(X_train_resampled_scaled, y_train_resampled)

# Make predictions
knn_prediction = knn_model.predict(X_test_scaled)


In [4]:
# Generate a confusion matrix for the model
cm_imbalanced = confusion_matrix(y_test, knn_prediction)
cm_imbalanced_df = pd.DataFrame(cm_imbalanced)
cm_imbalanced_df

Unnamed: 0,0,1
0,177,95
1,47,94


In [5]:
# Print the classification report for the model
report1 = classification_report(y_test, knn_prediction)
print(report1)

              precision    recall  f1-score   support

         0.0       0.79      0.65      0.71       272
         1.0       0.50      0.67      0.57       141

    accuracy                           0.66       413
   macro avg       0.64      0.66      0.64       413
weighted avg       0.69      0.66      0.66       413

