In [None]:
import pandas as pd
import pickle as pkl
import numpy as np
from sklearn.preprocessing import StandardScaler
from sklearn.preprocessing import MinMaxScaler
from sklearn.neural_network import MLPRegressor
from sklearn.metrics import mean_squared_error

In [None]:
# import os
# from google.colab import drive
# drive.mount('/content/drive') # Added shortcut to drive
# %cd "/content/drive/My Drive/IS4242/notebooks/data"

In [None]:
X_train = pd.read_csv('X_train.csv')
X_test = pd.read_csv('X_test.csv')
y_train = pd.read_csv('y_train.csv')
y_test = pd.read_csv('y_test.csv')

X_train = X_train.drop(columns=['Unnamed: 0'])
X_test = X_test.drop(columns=['Unnamed: 0'])
y_train = y_train.drop(columns=['Unnamed: 0'])
y_test = y_test.drop(columns=['Unnamed: 0'])

In [None]:
with open('scaler.pkl', 'rb') as f:
    scaler = pkl.load(f)

with open('selectkbest_f_regression.pkl', 'rb') as f:
    selectkbest_f_regression = pkl.load(f)

with open('selectkbest_mutual_info_regression.pkl', 'rb') as f:
    selectkbest_mutual_info_regression = pkl.load(f)

https://scikit-learn.org/stable/modules/model_persistence.html#security-maintainability-limitations
https://scikit-learn.org/stable/modules/model_persistence.html#security-maintainability-limitations


In [None]:
# Scale
X_train = scaler.transform(X_train)
X_test = scaler.transform(X_test)

# # MinMax Scaled data
# mm_scaler = MinMaxScaler().fit(X_train)
# X_train = mm_scaler.transform(X_train)
# X_test = mm_scaler.transform(X_test)

# SelectKBest F Regression
X_train_f = selectkbest_f_regression.transform(X_train)
X_test_f = selectkbest_f_regression.transform(X_test)

# SelectKBest Mutual Info Regression
X_train_MI = selectkbest_mutual_info_regression.transform(X_train)
X_test_MI = selectkbest_mutual_info_regression.transform(X_test)

In [None]:
# Train MLPRegressor model on Scaled training data
mlp_reg = MLPRegressor(hidden_layer_sizes=(128,64,32), activation="relu" , random_state=0, max_iter=400).fit(X_train, y_train)
y_pred = mlp_reg.predict(X_test)
rmse_scaled = mean_squared_error(y_test, y_pred, squared=False)
print('Root Mean Squared Error on scaled training data: ', rmse_scaled)

  y = column_or_1d(y, warn=True)


Root Mean Squared Error on scaled training data:  3158141.4037107513


In [None]:
# Train MLPRegressor model on Scaled training data after F Regression
mlp_reg_f = MLPRegressor(hidden_layer_sizes=(128,64,32), activation="relu" , random_state=0, max_iter=400).fit(X_train_f, y_train)
y_pred_f = mlp_reg_f.predict(X_test_f)
rmse_f = mean_squared_error(y_test, y_pred_f, squared=False)
print('Root Mean Squared Error on training data after F Regression: ', rmse_f)

  y = column_or_1d(y, warn=True)


Root Mean Squared Error on training data after F Regression:  3215463.26626019


In [None]:
# Train MLPRegressor model on Scaled training data after Mutual Info Regression
mlp_reg_MI = MLPRegressor(hidden_layer_sizes=(128,64,32), activation="relu" , random_state=0, max_iter=400).fit(X_train_MI, y_train)
y_pred_MI = mlp_reg_MI.predict(X_test_MI)
rmse_MI = mean_squared_error(y_test, y_pred_MI, squared=False)
print('Root Mean Squared Error on training data after Mutual Info Regression: ', rmse_MI)

  y = column_or_1d(y, warn=True)


Root Mean Squared Error on training data after Mutual Info Regression:  3138585.7843748564


In [None]:
# Hyperparameter Tuning using Grid Search for Scaled data
from sklearn.model_selection import GridSearchCV

mlp_reg = MLPRegressor()

param_grid = {
    'hidden_layer_sizes': [(128,64,32), (256, 128, 64)],
    'max_iter': [200, 400, 600],
    'activation': ['tanh', 'relu'],
    'solver': ['sgd', 'adam'],
    'alpha': [0.0001, 0.05],
    'learning_rate': ['constant','adaptive'],
}

mlp_grid = GridSearchCV(mlp_reg, param_grid, n_jobs= -1, cv=5, verbose=3)
mlp_grid.fit(X_train, y_train)

# Best model hyperparameters
print('Best model hyperparameters: ', mlp_grid.best_params_)
# Model accuracy of best model
print('Best model accuracy: ', mlp_grid.best_score_)

Fitting 5 folds for each of 96 candidates, totalling 480 fits


  1.09663203e-002 -6.16207438e-001  1.21478718e-002 -6.16839283e-001
  1.20775284e-002 -6.16521839e-001  2.11938671e-002 -6.16203453e-001
  1.25510930e-002 -6.16529527e-001  2.32830001e-002 -6.15909511e-001
  7.23067672e-003 -6.15299329e-001  2.32650482e-002 -6.16529361e-001
  2.08872064e-002 -6.15904799e-001  2.52882282e-002 -6.15389775e-001
  1.93449236e-002 -6.16838835e-001  1.45524411e-002 -6.16520752e-001
  2.03122376e-002 -6.16202509e-001  8.35139510e-003 -6.16839027e-001
  2.06212201e-002 -6.16522681e-001  1.03195845e-002 -6.16204429e-001
  7.16385444e-003 -6.16529352e-001  1.75536071e-002 -6.15903088e-001
  2.92389530e-002 -6.15690119e-001  2.08482534e-002 -6.16528763e-001
  2.85160594e-002 -6.15902297e-001  2.80519746e-002 -6.15324178e-001
 -5.62799105e+125 -1.01683771e+000              nan -1.30501120e+000
              nan -1.57896977e+000 -3.63378154e+184 -9.99127981e-001
              nan -1.37267825e+000              nan -1.73819794e+000
              nan -1.27925703e+000

Best model hyperparameters:  {'activation': 'tanh', 'alpha': 0.05, 'hidden_layer_sizes': (256, 128, 64), 'learning_rate': 'constant', 'max_iter': 600, 'solver': 'sgd'}
Best model accuracy:  0.029238952989894585


In [None]:
# Feature Importance scores for MLPRegressor model on Scaled data


In [None]:
# Hyperparameter Tuning using Grid Search for training data after F Regression
from sklearn.model_selection import GridSearchCV

mlp_reg_f = MLPRegressor()

param_grid = {
    'hidden_layer_sizes': [(128,64,32), (256, 128, 64)],
    'max_iter': [200, 400, 600],
    'activation': ['tanh', 'relu'],
    'solver': ['sgd', 'adam'],
    'alpha': [0.0001, 0.05],
    'learning_rate': ['constant','adaptive'],
}

mlp_grid_f = GridSearchCV(mlp_reg_f, param_grid, n_jobs= -1, cv=5, verbose=3)
mlp_grid_f.fit(X_train_f, y_train)

# Best model hyperparameters
print('Best model hyperparameters: ', mlp_grid_f.best_params_)
# Model accuracy of best model
print('Best model accuracy: ', mlp_grid_f.best_score_)

Fitting 5 folds for each of 96 candidates, totalling 480 fits


  0.02157692 -0.61683934  0.04338086 -0.61652223  0.03290938 -0.61620315
  0.05367436 -0.61652889  0.0445328  -0.61590429  0.06897503 -0.61527666
  0.06490286 -0.61653003  0.05819868 -0.61590437  0.03958667 -0.61527674
  0.01308827 -0.61683981  0.01936918 -0.61652073 -0.00602141 -0.61620286
  0.04273437 -0.61683856  0.08260931 -0.61652089  0.0235392  -0.61620352
  0.03820673 -0.61653034  0.01332533 -0.61590301  0.05444821 -0.61527696
  0.03578746 -0.61653014  0.0261694  -0.61590387  0.02428969 -0.61527669
         nan -0.93887081         nan -0.9418617          nan -0.93115769
         nan -0.90733335         nan -0.93909458         nan -0.95332131
         nan -0.93845248         nan -0.95220398         nan -0.93951413
         nan -0.92313838         nan -0.92279403         nan -0.90649561
         nan -0.92749434         nan -0.9448944          nan -0.91483749
         nan -0.95223227         nan -0.9431185          nan -0.95342024
         nan -0.93952268         nan -0.9517376    

Best model hyperparameters:  {'activation': 'tanh', 'alpha': 0.05, 'hidden_layer_sizes': (128, 64, 32), 'learning_rate': 'adaptive', 'max_iter': 400, 'solver': 'sgd'}
Best model accuracy:  0.08260931183673392


In [None]:
# Hyperparameter Tuning using Grid Search for training data after Mutual Info Regression
from sklearn.model_selection import GridSearchCV

mlp_reg_MI = MLPRegressor()

param_grid = {
    'hidden_layer_sizes': [(128,64,32), (256, 128, 64)],
    'max_iter': [200, 400, 600],
    'activation': ['tanh', 'relu'],
    'solver': ['sgd', 'adam'],
    'alpha': [0.0001, 0.05],
    'learning_rate': ['constant','adaptive'],
}

mlp_grid_MI = GridSearchCV(mlp_reg_MI, param_grid, n_jobs= -1, cv=5, verbose=3)
mlp_grid_MI.fit(X_train_MI, y_train)

# Best model hyperparameters
print('Best model hyperparameters: ', mlp_grid_MI.best_params_)
# Model accuracy of best model
print('Best model hyperparameters: ', mlp_grid_MI.best_score_)

Fitting 5 folds for each of 96 candidates, totalling 480 fits


  2.39203494e-02 -6.16203349e-01  2.73563312e-02 -6.16838961e-01
  2.13562445e-02 -6.16521997e-01  1.33859267e-02 -6.16203136e-01
  3.06893441e-02 -6.16528288e-01 -3.41388113e-03 -6.15902772e-01
  9.83248688e-05 -6.15276596e-01  1.02451538e-02 -6.16528502e-01
  1.56546460e-02 -6.15902627e-01  9.62596555e-03 -6.15276851e-01
  1.48542350e-02 -6.16839342e-01  1.62759093e-02 -6.16521437e-01
  4.17052953e-03 -6.16203667e-01  1.02333546e-02 -6.16838693e-01
  3.56573205e-03 -6.16521111e-01  1.87491282e-03 -6.16203629e-01
  1.17031369e-03 -6.16528380e-01  8.92093916e-03 -6.15902251e-01
 -1.74603469e-04 -6.15277017e-01  8.98204310e-03 -6.16528907e-01
  6.66318509e-03 -6.15903211e-01  1.73434985e-03 -6.15277213e-01
             nan -9.15967084e-01             nan -9.33816994e-01
             nan -9.12519595e-01             nan -9.10034116e-01
             nan -9.45450467e-01             nan -8.97688398e-01
             nan -9.06451040e-01             nan -9.05006801e-01
             nan -9.17711

Best model hyperparameters:  {'activation': 'tanh', 'alpha': 0.0001, 'hidden_layer_sizes': (256, 128, 64), 'learning_rate': 'constant', 'max_iter': 200, 'solver': 'sgd'}
Best model hyperparameters:  0.03068934414760698
