In [None]:
import pandas as pd
import numpy as np
from sklearn.model_selection import train_test_split
from sklearn.model_selection import GridSearchCV
from mlxtend.evaluate import PredefinedHoldoutSplit
from sklearn.neighbors import KNeighborsClassifier
from sklearn.metrics import classification_report

In [None]:
X_full = pd.read_csv('New_Train_Features_Full.csv')
X_test_full = pd.read_csv('New_Test_Features_Full.csv')
y = pd.read_csv('Train_Target.csv').iloc[:,1]

X_full_no = pd.read_csv('New_Train_Features.csv')
X_test_full_no = pd.read_csv('New_Test_Features.csv')

X_RFE = pd.read_csv('New_Train_Features_RFE.csv')
X_RFE_test = pd.read_csv('New_Test_Features_RFE.csv')

X_LASSO = pd.read_csv('New_Train_Features_LASSO.csv')
X_LASSO_test = pd.read_csv('New_Test_Features_LASSO.csv')

X_manual = pd.read_csv('New_Train_Features_Selected.csv')
X_manual_test = pd.read_csv('New_Test_Features_Selected.csv')

# Full Dataset:

In [None]:
np.random.seed(2025)
X_train, X_temp, y_train, y_temp = train_test_split(X_full, y, test_size = 0.3, random_state = 2025, stratify = y)
X_val, X_test, y_val, y_test = train_test_split(X_temp, y_temp, test_size = 0.5, random_state = 2025, stratify = y_temp)

In [None]:
np.random.seed(2025)
train_ind, val_ind = train_test_split(np.arange(X_temp.shape[0]), test_size = 0.3, random_state = 2025, stratify = y_temp)
split = PredefinedHoldoutSplit(valid_indices = val_ind)

In [None]:
params = {
    'n_neighbors': [3, 5, 7, 9, 11, 13, 15, 17, 19, 21, 23, 25, 27],
    'weights': ['uniform', 'distance'],
    'metric': ['euclidean', 'manhattan'],
    'algorithm': ['auto', 'brute']
    }

knn = KNeighborsClassifier()

In [None]:
np.random.seed(2025)
knn_grid = GridSearchCV(KNeighborsClassifier(), param_grid = params, cv = split, n_jobs = -1, scoring = 'f1')
knn_grid.fit(X_temp, y_temp)
print('Best Parameters:', knn_grid.best_params_)

Best Parameters: {'algorithm': 'auto', 'metric': 'manhattan', 'n_neighbors': 3, 'weights': 'uniform'}


In [None]:
np.random.seed(2025)
knn_model = KNeighborsClassifier(metric = 'manhattan', n_neighbors = 3, weights = 'uniform', algorithm = 'auto')
knn_model.fit(X_temp, y_temp)
y_pred = knn_model.predict(X_test)

print(classification_report(y_test, y_pred))

              precision    recall  f1-score   support

           0       0.84      0.86      0.85        36
           1       0.38      0.33      0.35         9

    accuracy                           0.76        45
   macro avg       0.61      0.60      0.60        45
weighted avg       0.75      0.76      0.75        45



In [None]:
np.random.seed(2025)
knn_model.fit(X_full, y)
y_pred_knn = knn_model.predict(X_test_full)

In [None]:
from google.colab import files

pred_knn = pd.DataFrame({'ID': range(1, 92), 'Placement': y_pred_knn})

with open('KNN_Full_Cov.csv', 'w') as file:
    pred_knn.to_csv(file, index = False, header = True)

files.download('KNN_Full_Cov.csv')

<IPython.core.display.Javascript object>

<IPython.core.display.Javascript object>

# RFE Dataset:

In [None]:
np.random.seed(2025)
X_train_rfe, X_temp_rfe, y_train_rfe, y_temp_rfe = train_test_split(X_RFE, y, test_size = 0.3, random_state = 2025, stratify = y)
X_val_rfe, X_test_rfe, y_val_rfe, y_test_rfe = train_test_split(X_temp_rfe, y_temp_rfe, test_size = 0.5, random_state = 2025, stratify = y_temp_rfe)

In [None]:
np.random.seed(2025)
train_rfe_ind, val_rfe_ind = train_test_split(np.arange(X_temp_rfe.shape[0]), test_size = 0.3, random_state = 2025, stratify = y_temp_rfe)
split_rfe = PredefinedHoldoutSplit(valid_indices = val_rfe_ind)

In [None]:
np.random.seed(2025)
knn_grid_rfe = GridSearchCV(KNeighborsClassifier(), param_grid = params, cv = split_rfe, n_jobs = -1, scoring = 'f1')
knn_grid_rfe.fit(X_temp_rfe, y_temp_rfe)
print('Best Parameters:', knn_grid_rfe.best_params_)

Best Parameters: {'algorithm': 'auto', 'metric': 'euclidean', 'n_neighbors': 3, 'weights': 'uniform'}


In [None]:
np.random.seed(2025)
knn_model_rfe = KNeighborsClassifier(metric = 'euclidean', n_neighbors = 3, weights = 'uniform', algorithm = 'auto')
knn_model_rfe.fit(X_temp_rfe, y_temp_rfe)
y_pred_rfe = knn_model_rfe.predict(X_test_rfe)

print(classification_report(y_test, y_pred_rfe))

              precision    recall  f1-score   support

           0       0.84      0.86      0.85        36
           1       0.38      0.33      0.35         9

    accuracy                           0.76        45
   macro avg       0.61      0.60      0.60        45
weighted avg       0.75      0.76      0.75        45



In [None]:
np.random.seed(2025)
knn_model_rfe.fit(X_RFE, y)
y_pred_rfe_knn = knn_model_rfe.predict(X_RFE_test)

In [None]:
from google.colab import files

pred_knn = pd.DataFrame({'ID': range(1, 92), 'Placement': y_pred_rfe_knn})

with open('KNN_RFE_Cov.csv', 'w') as file:
    pred_knn.to_csv(file, index = False, header = True)

files.download('KNN_RFE_Cov.csv')

<IPython.core.display.Javascript object>

<IPython.core.display.Javascript object>

# LASSO:

In [None]:
np.random.seed(2025)
X_train_lasso, X_temp_lasso, y_train_lasso, y_temp_lasso = train_test_split(X_LASSO, y, test_size = 0.3, random_state = 2025, stratify = y)
X_val_lasso, X_test_lasso, y_val_lasso, y_test_lasso = train_test_split(X_temp_lasso, y_temp_lasso, test_size = 0.5, random_state = 2025, stratify = y_temp_lasso)

In [None]:
np.random.seed(2025)
train_lasso_ind, val_lasso_ind = train_test_split(np.arange(X_temp_lasso.shape[0]), test_size = 0.3, random_state = 2025, stratify = y_temp_lasso)
split_lasso = PredefinedHoldoutSplit(valid_indices = val_lasso_ind)

In [None]:
np.random.seed(2025)
knn_grid_lasso = GridSearchCV(KNeighborsClassifier(), param_grid = params, cv = split_lasso, n_jobs = -1, scoring = 'f1')
knn_grid_lasso.fit(X_temp_lasso, y_temp_lasso)
print('Best Parameters:', knn_grid_lasso.best_params_)

Best Parameters: {'algorithm': 'auto', 'metric': 'euclidean', 'n_neighbors': 3, 'weights': 'uniform'}


In [None]:
np.random.seed(2025)
knn_model_lasso = KNeighborsClassifier(metric = 'euclidean', n_neighbors = 3, weights = 'uniform', algorithm = 'auto')
knn_model_lasso.fit(X_temp_lasso, y_temp_lasso)
y_pred_lasso = knn_model_lasso.predict(X_test_lasso)

print(classification_report(y_test, y_pred_lasso))

              precision    recall  f1-score   support

           0       0.83      0.97      0.90        36
           1       0.67      0.22      0.33         9

    accuracy                           0.82        45
   macro avg       0.75      0.60      0.62        45
weighted avg       0.80      0.82      0.78        45



In [None]:
np.random.seed(2025)
knn_model_lasso.fit(X_LASSO, y)
y_pred_lasso_knn = knn_model_lasso.predict(X_LASSO_test)

In [None]:
from google.colab import files

pred_knn = pd.DataFrame({'ID': range(1, 92), 'Placement': y_pred_lasso_knn})

with open('KNN_LASSO_Cov.csv', 'w') as file:
    pred_knn.to_csv(file, index = False, header = True)

files.download('KNN_LASSO_Cov.csv')

<IPython.core.display.Javascript object>

<IPython.core.display.Javascript object>

# Full - Without Standardization

In [None]:
np.random.seed(2025)
X_train, X_temp, y_train, y_temp = train_test_split(X_full_no, y, test_size = 0.3, random_state = 2025, stratify = y)
X_val, X_test, y_val, y_test = train_test_split(X_temp, y_temp, test_size = 0.5, random_state = 2025, stratify = y_temp)

In [None]:
np.random.seed(2025)
train_ind, val_ind = train_test_split(np.arange(X_temp.shape[0]), test_size = 0.3, random_state = 2025, stratify = y_temp)
split = PredefinedHoldoutSplit(valid_indices = val_ind)

In [None]:
params = {
    'n_neighbors': [3, 5, 7, 9, 11, 13, 15, 17, 19, 21, 23, 25, 27],
    'weights': ['uniform', 'distance'],
    'metric': ['euclidean', 'manhattan'],
    'algorithm': ['auto', 'brute']
    }

knn = KNeighborsClassifier()

In [None]:
np.random.seed(2025)
knn_grid = GridSearchCV(KNeighborsClassifier(), param_grid = params, cv = split, n_jobs = -1, scoring = 'f1')
knn_grid.fit(X_temp, y_temp)
print('Best Parameters:', knn_grid.best_params_)

Best Parameters: {'algorithm': 'auto', 'metric': 'manhattan', 'n_neighbors': 3, 'weights': 'distance'}


In [None]:
np.random.seed(2025)
knn_model = KNeighborsClassifier(metric = 'manhattan', n_neighbors = 3, weights = 'uniform', algorithm = 'auto')
knn_model.fit(X_temp, y_temp)
y_pred_knn_no = knn_model.predict(X_test)

print(classification_report(y_test, y_pred_knn_no))

              precision    recall  f1-score   support

           0       0.83      0.94      0.88        36
           1       0.50      0.22      0.31         9

    accuracy                           0.80        45
   macro avg       0.66      0.58      0.60        45
weighted avg       0.76      0.80      0.77        45



In [None]:
np.random.seed(2025)
knn_model.fit(X_full_no, y)
y_pred_knn_no = knn_model.predict(X_test_full_no)

In [None]:
from google.colab import files

pred_knn = pd.DataFrame({'ID': range(1, 92), 'Placement': y_pred_knn_no})

with open('KNN_Full_No_Cov.csv', 'w') as file:
    pred_knn.to_csv(file, index = False, header = True)

files.download('KNN_Full_No_Cov.csv')

<IPython.core.display.Javascript object>

<IPython.core.display.Javascript object>

# Manual Selection:

In [None]:
np.random.seed(2025)
X_train, X_temp, y_train, y_temp = train_test_split(X_manual, y, test_size = 0.3, random_state = 2025, stratify = y)
X_val, X_test, y_val, y_test = train_test_split(X_temp, y_temp, test_size = 0.5, random_state = 2025, stratify = y_temp)

In [None]:
np.random.seed(2025)
train_ind, val_ind = train_test_split(np.arange(X_temp.shape[0]), test_size = 0.3, random_state = 2025, stratify = y_temp)
split = PredefinedHoldoutSplit(valid_indices = val_ind)

In [None]:
params = {
    'n_neighbors': [3, 5, 7, 9, 11, 13, 15, 17, 19, 21, 23, 25, 27],
    'weights': ['uniform', 'distance'],
    'metric': ['euclidean', 'manhattan'],
    'algorithm': ['auto', 'brute']
    }

knn = KNeighborsClassifier()

In [None]:
np.random.seed(2025)
knn_grid = GridSearchCV(KNeighborsClassifier(), param_grid = params, cv = split, n_jobs = -1, scoring = 'f1')
knn_grid.fit(X_temp, y_temp)
print('Best Parameters:', knn_grid.best_params_)

Best Parameters: {'algorithm': 'auto', 'metric': 'euclidean', 'n_neighbors': 3, 'weights': 'uniform'}


In [None]:
np.random.seed(2025)
knn_model = KNeighborsClassifier(metric = 'euclidean', n_neighbors = 3, weights = 'uniform', algorithm = 'auto')
knn_model.fit(X_temp, y_temp)
y_pred_manual = knn_model.predict(X_test)

print(classification_report(y_test, y_pred_manual))

              precision    recall  f1-score   support

           0       0.81      0.97      0.89        36
           1       0.50      0.11      0.18         9

    accuracy                           0.80        45
   macro avg       0.66      0.54      0.53        45
weighted avg       0.75      0.80      0.75        45



In [None]:
np.random.seed(2025)
knn_model.fit(X_manual, y)
y_pred_knn_manual = knn_model.predict(X_manual_test)

In [None]:
from google.colab import files

pred_knn = pd.DataFrame({'ID': range(1, 92), 'Placement': y_pred_knn_manual})

with open('KNN_Manual_Cov.csv', 'w') as file:
    pred_knn.to_csv(file, index = False, header = True)

files.download('KNN_Manual_Cov.csv')

<IPython.core.display.Javascript object>

<IPython.core.display.Javascript object>

# SMOTE FOR BEST KNN:

In [None]:
from imblearn.over_sampling import SMOTE

In [None]:
smote = SMOTE(random_state = 2025)
X_resampled, y_resampled = smote.fit_resample(X_temp_lasso, y_temp_lasso)

In [None]:
np.random.seed(2025)
knn_model_smote = KNeighborsClassifier(metric = 'euclidean', n_neighbors = 3, weights = 'uniform', algorithm = 'auto')
knn_model_smote.fit(X_resampled, y_resampled)
y_pred_lasso_smote = knn_model_smote.predict(X_test_lasso)

print(classification_report(y_test, y_pred_lasso_smote))
y_pred_lasso_smote_knn = knn_model_smote.predict(X_LASSO_test)

              precision    recall  f1-score   support

           0       0.96      0.69      0.81        36
           1       0.42      0.89      0.57         9

    accuracy                           0.73        45
   macro avg       0.69      0.79      0.69        45
weighted avg       0.85      0.73      0.76        45



In [None]:
from google.colab import files

pred_knn = pd.DataFrame({'ID': range(1, 92), 'Placement': y_pred_lasso_smote_knn})

with open('KNN_SMOTE_Cov.csv', 'w') as file:
    pred_knn.to_csv(file, index = False, header = True)

files.download('KNN_SMOTE_Cov.csv')

<IPython.core.display.Javascript object>

<IPython.core.display.Javascript object>