In [15]:
import pandas as pd
import numpy as np
from sklearn.model_selection import train_test_split
from sklearn.model_selection import GridSearchCV
from mlxtend.evaluate import PredefinedHoldoutSplit
from sklearn.neural_network import MLPClassifier
from sklearn.metrics import classification_report

In [16]:
X_full = pd.read_csv('New_Train_Features_Full.csv')
X_test_full = pd.read_csv('New_Test_Features_Full.csv')
y = pd.read_csv('Train_Target.csv').iloc[:,1]

X_full_no = pd.read_csv('New_Train_Features.csv')
X_test_full_no = pd.read_csv('New_Test_Features.csv')

X_no_manual = pd.read_csv('New_Train_Features_No_Selected.csv')
X_test_no_manual = pd.read_csv('New_Test_Features_No_Selected.csv')

X_RFE = pd.read_csv('New_Train_Features_RFE.csv')
X_RFE_test = pd.read_csv('New_Test_Features_RFE.csv')

X_LASSO = pd.read_csv('New_Train_Features_LASSO.csv')
X_LASSO_test = pd.read_csv('New_Test_Features_LASSO.csv')

X_manual = pd.read_csv('New_Train_Features_Selected.csv')
X_manual_test = pd.read_csv('New_Test_Features_Selected.csv')

# Full Dataset:

In [17]:
np.random.seed(2025)
X_train, X_temp, y_train, y_temp = train_test_split(X_full, y, test_size = 0.3, random_state = 2025, stratify = y)
X_val, X_test, y_val, y_test = train_test_split(X_temp, y_temp, test_size = 0.5, random_state = 2025, stratify = y_temp)

In [18]:
np.random.seed(2025)
train_ind, val_ind = train_test_split(np.arange(X_temp.shape[0]), test_size = 0.3, random_state = 2025, stratify = y_temp)
split = PredefinedHoldoutSplit(valid_indices = val_ind)

In [19]:
params = {
    'hidden_layer_sizes': [(64,), (128,), (64, 32), (128, 64), (128, 64, 32)],
    'activation': ['relu', 'tanh'],
    'alpha': [0.0001, 0.001, 0.01, 0.1],
    'learning_rate': ['constant', 'invscaling', 'adaptive'],
    'learning_rate_init': [0.001, 0.01, 0.1],
    'batch_size': [16, 32, 64]
}


mlp = MLPClassifier(random_state = 2025)

In [20]:
np.random.seed(2025)
mlp_grid = GridSearchCV(mlp, param_grid = params, cv = split, n_jobs = -1, scoring = 'f1_macro')
mlp_grid.fit(X_temp, y_temp)
print('Best Parameters:', mlp_grid.best_params_)

Best Parameters: {'activation': 'relu', 'alpha': 0.1, 'batch_size': 32, 'hidden_layer_sizes': (128, 64, 32), 'learning_rate': 'constant', 'learning_rate_init': 0.1}


In [59]:
np.random.seed(2025)
mlp_model = MLPClassifier(activation = 'relu', alpha = 0.1, batch_size = 32, hidden_layer_sizes = (128, 64, 32), learning_rate = 'constant', learning_rate_init = 0.1, random_state = 2025)
mlp_model.fit(X_temp, y_temp,)
y_pred = mlp_model.predict(X_test)

print(classification_report(y_test, y_pred))

              precision    recall  f1-score   support

           0       1.00      0.92      0.96        36
           1       0.75      1.00      0.86         9

    accuracy                           0.93        45
   macro avg       0.88      0.96      0.91        45
weighted avg       0.95      0.93      0.94        45



In [23]:
np.random.seed(2025)
mlp_model.fit(X_full, y)
y_pred_mlp = mlp_model.predict(X_test_full)

In [24]:
from google.colab import files

pred_mlp = pd.DataFrame({'ID': range(1, 92), 'Placement': y_pred_mlp})

with open('MLP_Full_Cov.csv', 'w') as file:
    pred_mlp.to_csv(file, index = False, header = True)

files.download('MLP_Full_Cov.csv')

<IPython.core.display.Javascript object>

<IPython.core.display.Javascript object>

# RFE Dataset:

In [25]:
np.random.seed(2025)
X_train_rfe, X_temp_rfe, y_train_rfe, y_temp_rfe = train_test_split(X_RFE, y, test_size = 0.3, random_state = 2025, stratify = y)
X_val_rfe, X_test_rfe, y_val_rfe, y_test_rfe = train_test_split(X_temp_rfe, y_temp_rfe, test_size = 0.5, random_state = 2025, stratify = y_temp_rfe)

In [26]:
np.random.seed(2025)
train_rfe_ind, val_rfe_ind = train_test_split(np.arange(X_temp_rfe.shape[0]), test_size = 0.3, random_state = 2025, stratify = y_temp_rfe)
split_rfe = PredefinedHoldoutSplit(valid_indices = val_rfe_ind)

In [27]:
np.random.seed(2025)
mlp_grid_rfe = GridSearchCV(mlp, param_grid = params, cv = split_rfe, n_jobs = -1, scoring = 'f1_macro')
mlp_grid_rfe.fit(X_temp_rfe, y_temp_rfe)
print('Best Parameters:', mlp_grid_rfe.best_params_)

Best Parameters: {'activation': 'tanh', 'alpha': 0.0001, 'batch_size': 32, 'hidden_layer_sizes': (64, 32), 'learning_rate': 'constant', 'learning_rate_init': 0.1}


In [28]:
np.random.seed(2025)
mlp_model_rfe = MLPClassifier(activation = 'tanh', alpha = 0.0001, batch_size = 32, hidden_layer_sizes = (64, 32), learning_rate = 'constant', learning_rate_init = 0.1, random_state = 2025)
mlp_model_rfe.fit(X_temp_rfe, y_temp_rfe)
y_pred_rfe = mlp_model_rfe.predict(X_test_rfe)

print(classification_report(y_test, y_pred_rfe, zero_division = 1))

              precision    recall  f1-score   support

           0       1.00      1.00      1.00        36
           1       1.00      1.00      1.00         9

    accuracy                           1.00        45
   macro avg       1.00      1.00      1.00        45
weighted avg       1.00      1.00      1.00        45



In [29]:
np.random.seed(2025)
mlp_model_rfe.fit(X_RFE, y)
y_pred_rfe_mlp = mlp_model_rfe.predict(X_RFE_test)

In [30]:
from google.colab import files

pred_mlp = pd.DataFrame({'ID': range(1, 92), 'Placement': y_pred_rfe_mlp})

with open('MLP_RFE_Cov.csv', 'w') as file:
    pred_mlp.to_csv(file, index = False, header = True)

files.download('MLP_RFE_Cov.csv')

<IPython.core.display.Javascript object>

<IPython.core.display.Javascript object>

# LASSO:

In [31]:
np.random.seed(2025)
X_train_lasso, X_temp_lasso, y_train_lasso, y_temp_lasso = train_test_split(X_LASSO, y, test_size = 0.3, random_state = 2025, stratify = y)
X_val_lasso, X_test_lasso, y_val_lasso, y_test_lasso = train_test_split(X_temp_lasso, y_temp_lasso, test_size = 0.5, random_state = 2025, stratify = y_temp_lasso)

In [32]:
np.random.seed(2025)
train_lasso_ind, val_lasso_ind = train_test_split(np.arange(X_temp_lasso.shape[0]), test_size = 0.3, random_state = 2025, stratify = y_temp_lasso)
split_lasso = PredefinedHoldoutSplit(valid_indices = val_lasso_ind)

In [33]:
np.random.seed(2025)
mlp_grid_lasso = GridSearchCV(mlp, param_grid = params, cv = split_lasso, n_jobs = -1, scoring = 'f1_macro')
mlp_grid_lasso.fit(X_temp_lasso, y_temp_lasso)
print('Best Parameters:', mlp_grid_lasso.best_params_)

Best Parameters: {'activation': 'tanh', 'alpha': 0.0001, 'batch_size': 64, 'hidden_layer_sizes': (128, 64, 32), 'learning_rate': 'constant', 'learning_rate_init': 0.1}


In [34]:
np.random.seed(2025)
mlp_model_lasso = MLPClassifier(activation = 'tanh', alpha = 0.0001, batch_size = 64, hidden_layer_sizes = (128, 64, 32), learning_rate = 'constant', learning_rate_init = 0.1, random_state = 2025)
mlp_model_lasso.fit(X_temp_lasso, y_temp_lasso)
y_pred_lasso = mlp_model_lasso.predict(X_test_lasso)

print(classification_report(y_test, y_pred_lasso,zero_division = 1))

              precision    recall  f1-score   support

           0       0.80      1.00      0.89        36
           1       1.00      0.00      0.00         9

    accuracy                           0.80        45
   macro avg       0.90      0.50      0.44        45
weighted avg       0.84      0.80      0.71        45



In [35]:
np.random.seed(2025)
mlp_model_lasso.fit(X_LASSO, y)
y_pred_lasso_mlp = mlp_model_lasso.predict(X_LASSO_test)

In [36]:
from google.colab import files

pred_mlp = pd.DataFrame({'ID': range(1, 92), 'Placement': y_pred_lasso_mlp})

with open('MLP_LASSO_Cov.csv', 'w') as file:
    pred_mlp.to_csv(file, index = False, header = True)

files.download('MLP_LASSO_Cov.csv')

<IPython.core.display.Javascript object>

<IPython.core.display.Javascript object>

# Full - Without Standardization




In [37]:
np.random.seed(2025)
X_train, X_temp, y_train, y_temp = train_test_split(X_full_no, y, test_size = 0.3, random_state = 2025, stratify = y)
X_val, X_test, y_val, y_test = train_test_split(X_temp, y_temp, test_size = 0.5, random_state = 2025, stratify = y_temp)

In [38]:
np.random.seed(2025)
train_ind, val_ind = train_test_split(np.arange(X_temp.shape[0]), test_size = 0.3, random_state = 2025, stratify = y_temp)
split = PredefinedHoldoutSplit(valid_indices = val_ind)

In [39]:
np.random.seed(2025)
mlp_grid = GridSearchCV(mlp, param_grid = params, cv = split, n_jobs = -1, scoring = 'f1_macro')
mlp_grid.fit(X_temp, y_temp)
print('Best Parameters:', mlp_grid.best_params_)

Best Parameters: {'activation': 'tanh', 'alpha': 0.01, 'batch_size': 32, 'hidden_layer_sizes': (128, 64), 'learning_rate': 'constant', 'learning_rate_init': 0.01}


In [41]:
np.random.seed(2025)
mlp_model = MLPClassifier(activation = 'tanh', alpha = 0.01, batch_size = 32, hidden_layer_sizes = (128, 64), learning_rate = 'constant', learning_rate_init = 0.01, random_state = 2025)
mlp_model.fit(X_temp, y_temp)
y_pred_mlp_no = mlp_model.predict(X_test)

print(classification_report(y_test, y_pred_mlp_no, zero_division = 1))

              precision    recall  f1-score   support

           0       0.80      1.00      0.89        36
           1       1.00      0.00      0.00         9

    accuracy                           0.80        45
   macro avg       0.90      0.50      0.44        45
weighted avg       0.84      0.80      0.71        45



In [42]:
np.random.seed(2025)
mlp_model.fit(X_full_no, y)
y_pred_mlp_no = mlp_model.predict(X_test_full_no)

In [43]:
from google.colab import files

pred_mlp = pd.DataFrame({'ID': range(1, 92), 'Placement': y_pred_mlp_no})

with open('MLP_Full_No_Cov.csv', 'w') as file:
    pred_mlp.to_csv(file, index = False, header = True)

files.download('MLP_Full_No_Cov.csv')

<IPython.core.display.Javascript object>

<IPython.core.display.Javascript object>

# Full - Without Standardization - Manual



In [44]:
np.random.seed(2025)
X_train, X_temp, y_train, y_temp = train_test_split(X_no_manual, y, test_size = 0.3, random_state = 2025, stratify = y)
X_val, X_test, y_val, y_test = train_test_split(X_temp, y_temp, test_size = 0.5, random_state = 2025, stratify = y_temp)

In [45]:
np.random.seed(2025)
train_ind, val_ind = train_test_split(np.arange(X_temp.shape[0]), test_size = 0.3, random_state = 2025, stratify = y_temp)
split = PredefinedHoldoutSplit(valid_indices = val_ind)

In [46]:
np.random.seed(2025)
mlp_grid = GridSearchCV(mlp, param_grid = params, cv = split, n_jobs = -1, scoring = 'f1_macro')
mlp_grid.fit(X_temp, y_temp)
print('Best Parameters:', mlp_grid.best_params_)

Best Parameters: {'activation': 'tanh', 'alpha': 0.1, 'batch_size': 64, 'hidden_layer_sizes': (128, 64, 32), 'learning_rate': 'constant', 'learning_rate_init': 0.01}


In [48]:
np.random.seed(2025)
mlp_model = MLPClassifier(activation = 'tanh', alpha = 0.1, batch_size = 64, hidden_layer_sizes = (128, 64, 32), learning_rate = 'constant', learning_rate_init = 0.01, random_state = 2025)
mlp_model.fit(X_temp, y_temp)
y_pred_manual = mlp_model.predict(X_test)

print(classification_report(y_test, y_pred_manual, zero_division = 1))

              precision    recall  f1-score   support

           0       0.80      1.00      0.89        36
           1       1.00      0.00      0.00         9

    accuracy                           0.80        45
   macro avg       0.90      0.50      0.44        45
weighted avg       0.84      0.80      0.71        45



In [49]:
np.random.seed(2025)
mlp_model.fit(X_no_manual, y)
y_pred_mlp_no_manual = mlp_model.predict(X_test_no_manual)

In [50]:
from google.colab import files

pred_mlp = pd.DataFrame({'ID': range(1, 92), 'Placement': y_pred_mlp_no_manual})

with open('MLP_No_Manual_Cov.csv', 'w') as file:
    pred_mlp.to_csv(file, index = False, header = True)

files.download('MLP_No_Manual_Cov.csv')

<IPython.core.display.Javascript object>

<IPython.core.display.Javascript object>

# Manual Selection:

In [51]:
np.random.seed(2025)
X_train, X_temp, y_train, y_temp = train_test_split(X_manual, y, test_size = 0.3, random_state = 2025, stratify = y)
X_val, X_test, y_val, y_test = train_test_split(X_temp, y_temp, test_size = 0.5, random_state = 2025, stratify = y_temp)

In [52]:
np.random.seed(2025)
train_ind, val_ind = train_test_split(np.arange(X_temp.shape[0]), test_size = 0.3, random_state = 2025, stratify = y_temp)
split = PredefinedHoldoutSplit(valid_indices = val_ind)

In [53]:
np.random.seed(2025)
mlp_grid = GridSearchCV(mlp, param_grid = params, cv = split, n_jobs = -1, scoring = 'f1_macro')
mlp_grid.fit(X_temp, y_temp)
print('Best Parameters:', mlp_grid.best_params_)

Best Parameters: {'activation': 'tanh', 'alpha': 0.001, 'batch_size': 32, 'hidden_layer_sizes': (128, 64, 32), 'learning_rate': 'constant', 'learning_rate_init': 0.1}


In [54]:
np.random.seed(2025)
mlp_model = MLPClassifier(activation = 'tanh', alpha = 0.001, batch_size = 32, hidden_layer_sizes = (128, 64, 32), learning_rate = 'constant', learning_rate_init = 0.1, random_state = 2025)
mlp_model.fit(X_temp, y_temp)
y_pred_manual = mlp_model.predict(X_test)

print(classification_report(y_test, y_pred_manual))

              precision    recall  f1-score   support

           0       0.82      1.00      0.90        36
           1       1.00      0.11      0.20         9

    accuracy                           0.82        45
   macro avg       0.91      0.56      0.55        45
weighted avg       0.85      0.82      0.76        45



In [55]:
np.random.seed(2025)
mlp_model.fit(X_manual, y)
y_pred_mlp_manual = mlp_model.predict(X_manual_test)

In [56]:
from google.colab import files

pred_mlp = pd.DataFrame({'ID': range(1, 92), 'Placement': y_pred_mlp_manual})

with open('MLP_Manual_Cov.csv', 'w') as file:
    pred_mlp.to_csv(file, index = False, header = True)

files.download('MLP_Manual_Cov.csv')

<IPython.core.display.Javascript object>

<IPython.core.display.Javascript object>

# SMOTE FOR BEST MLP:

In [62]:
from imblearn.over_sampling import SMOTE

In [63]:
np.random.seed(2025)
X_train, X_temp, y_train, y_temp = train_test_split(X_RFE, y, test_size = 0.3, random_state = 2025, stratify = y)
X_val, X_test, y_val, y_test = train_test_split(X_temp, y_temp, test_size = 0.5, random_state = 2025, stratify = y_temp)

np.random.seed(2025)
train_ind, val_ind = train_test_split(np.arange(X_temp.shape[0]), test_size = 0.3, random_state = 2025, stratify = y_temp)
split = PredefinedHoldoutSplit(valid_indices = val_ind)

smote = SMOTE(random_state = 2025)
X_resampled, y_resampled = smote.fit_resample(X_temp, y_temp)

In [64]:
np.random.seed(2025)
mlp_grid = GridSearchCV(MLPClassifier(random_state = 2025), param_grid = params, cv = split, n_jobs = -1, scoring = 'f1_macro')
mlp_grid.fit(X_temp, y_temp)
print('Best Parameters:', mlp_grid.best_params_)

Best Parameters: {'activation': 'tanh', 'alpha': 0.0001, 'batch_size': 32, 'hidden_layer_sizes': (64, 32), 'learning_rate': 'constant', 'learning_rate_init': 0.1}


In [66]:
np.random.seed(2025)
mlp_model_smote = MLPClassifier(activation = 'tanh', alpha = 0.0001, batch_size = 32, hidden_layer_sizes = (64, 32), learning_rate = 'constant', learning_rate_init = 0.1, random_state = 2025)
mlp_model_smote.fit(X_resampled, y_resampled)
y_pred_smote = mlp_model_smote.predict(X_test)

print(classification_report(y_test, y_pred_smote))
y_pred_smote_mlp = mlp_model_smote.predict(X_RFE_test)

              precision    recall  f1-score   support

           0       1.00      1.00      1.00        36
           1       1.00      1.00      1.00         9

    accuracy                           1.00        45
   macro avg       1.00      1.00      1.00        45
weighted avg       1.00      1.00      1.00        45



In [67]:
from google.colab import files

pred_mlp = pd.DataFrame({'ID': range(1, 92), 'Placement': y_pred_smote_mlp})

with open('MLP_SMOTE_Cov.csv', 'w') as file:
    pred_mlp.to_csv(file, index = False, header = True)

files.download('MLP_SMOTE_Cov.csv')

<IPython.core.display.Javascript object>

<IPython.core.display.Javascript object>