In [1]:
import pandas as pd
import numpy as np
from scipy.stats import mode
from sklearn.metrics import accuracy_score
from sklearn.preprocessing import StandardScaler
from sklearn.model_selection import cross_val_score
from sklearn.pipeline import Pipeline
from sklearn.impute import SimpleImputer
from sklearn.metrics import accuracy_score
from sklearn.model_selection import StratifiedShuffleSplit
from sklearn.pipeline import make_pipeline
from sklearn.model_selection import train_test_split
from sklearn.metrics import mean_absolute_error
from sklearn.model_selection import StratifiedKFold
from skopt import BayesSearchCV
from sklearn.neural_network import MLPClassifier

In [2]:
data_path = 'procDF2.csv'
processed_df = pd.read_csv(data_path)
processed_df

Unnamed: 0,DIS,FLO,LOY,PRI,PIM,FEA,LAG,CHO
0,-0.43600,-0.056,1.0,0.1429,0.03510,-0.175710,1.0,9
1,3.85290,-0.576,1.0,0.8136,-0.48915,-0.588930,1.0,9
2,4.58810,0.260,1.0,0.1502,-0.09050,-0.195670,1.0,9
3,61.55460,-0.139,1.0,0.0817,-0.08200,-0.435940,1.0,9
4,1.70100,0.259,1.0,-0.1534,-0.05960,0.015470,1.0,9
...,...,...,...,...,...,...,...,...
3146269,4.10573,0.135,0.0,-2.1534,0.08141,5.524900,1.0,5
3146270,37.22023,2.173,0.0,-2.5829,0.11651,0.619060,1.0,5
3146271,0.03146,0.348,0.0,-1.2892,-0.19316,0.032053,1.0,5
3146272,5.57263,0.168,0.0,-2.1721,0.12531,1.746300,1.0,5


In [4]:
shops_features = ['DIS','FLO','LOY','PRI','PIM','FEA','LAG']
X = processed_df[shops_features]
y = processed_df.CHO


transformer_num = make_pipeline(
    StandardScaler()
)


X_train, X_valid, y_train, y_valid = \
    train_test_split(X, y,stratify=y, test_size=0.33)



X_train = transformer_num.fit_transform(X_train)
X_valid = transformer_num.transform(X_valid)

Untuned model parameters

In [6]:
default_model = MLPClassifier(random_state=1)
print("Default parameters in model: \n", default_model.get_params())

Default parameters in model: 
 {'activation': 'relu', 'alpha': 0.0001, 'batch_size': 'auto', 'beta_1': 0.9, 'beta_2': 0.999, 'early_stopping': False, 'epsilon': 1e-08, 'hidden_layer_sizes': (100,), 'learning_rate': 'constant', 'learning_rate_init': 0.001, 'max_fun': 15000, 'max_iter': 200, 'momentum': 0.9, 'n_iter_no_change': 10, 'nesterovs_momentum': True, 'power_t': 0.5, 'random_state': 1, 'shuffle': True, 'solver': 'adam', 'tol': 0.0001, 'validation_fraction': 0.1, 'verbose': False, 'warm_start': False}


Test of early stopping

In [9]:
MLP_model = MLPClassifier(random_state=1, early_stopping=True)

MLP_model.fit(X_train, y_train)

y_pred_valid = MLP_model.predict(X_valid)
y_pred_train = MLP_model.predict(X_train)

acc_valid = accuracy_score(y_valid, y_pred_valid)
acc_train = accuracy_score(y_train, y_pred_train)
mae_valid = mean_absolute_error(y_valid, y_pred_valid)
mae_train = mean_absolute_error(y_train, y_pred_train)

print("Validation accuracy: ", acc_valid)
print("Training accuracy: ", acc_train)
print("Validation mae: ", mae_valid)
print("Training mae: ", mae_train)

Validation accuracy:  0.6693136955573256
Training accuracy:  0.6696612860607883
Validation mae:  1.6655622664988234
Training mae:  1.6658614812218009


Testing different hidden layer sizes

In [11]:
MLP_model = MLPClassifier(random_state=1, early_stopping=True, hidden_layer_sizes=(50,50,50))

MLP_model.fit(X_train, y_train)

y_pred_valid = MLP_model.predict(X_valid)
y_pred_train = MLP_model.predict(X_train)

acc_valid = accuracy_score(y_valid, y_pred_valid)
acc_train = accuracy_score(y_train, y_pred_train)
mae_valid = mean_absolute_error(y_valid, y_pred_valid)
mae_train = mean_absolute_error(y_train, y_pred_train)

print("Validation accuracy: ", acc_valid)
print("Training accuracy: ", acc_train)
print("Validation mae: ", mae_valid)
print("Training mae: ", mae_train)

Validation accuracy:  0.7101411866458757
Training accuracy:  0.710809709473848
Validation mae:  1.471899918229441
Training mae:  1.4683299786575257


In [14]:
MLP_model = MLPClassifier(random_state=1, early_stopping=True, hidden_layer_sizes=(125, 100, 75))

MLP_model.fit(X_train, y_train)

y_pred_valid = MLP_model.predict(X_valid)
y_pred_train = MLP_model.predict(X_train)

acc_valid = accuracy_score(y_valid, y_pred_valid)
acc_train = accuracy_score(y_train, y_pred_train)
mae_valid = mean_absolute_error(y_valid, y_pred_valid)
mae_train = mean_absolute_error(y_train, y_pred_train)

print("Validation accuracy: ", acc_valid)
print("Training accuracy: ", acc_train)
print("Validation mae: ", mae_valid)
print("Training mae: ", mae_train)

Validation accuracy:  0.8107295686771565
Training accuracy:  0.8135420110882196
Validation mae:  0.9302118618356865
Training mae:  0.9169773477551977


Checking adaptive learning rate

In [15]:
MLP_model = MLPClassifier(random_state=1, early_stopping=True, hidden_layer_sizes=(125, 100, 75), learning_rate='adaptive')

MLP_model.fit(X_train, y_train)

y_pred_valid = MLP_model.predict(X_valid)
y_pred_train = MLP_model.predict(X_train)

acc_valid = accuracy_score(y_valid, y_pred_valid)
acc_train = accuracy_score(y_train, y_pred_train)
mae_valid = mean_absolute_error(y_valid, y_pred_valid)
mae_train = mean_absolute_error(y_train, y_pred_train)

print("Validation accuracy: ", acc_valid)
print("Training accuracy: ", acc_train)
print("Validation mae: ", mae_valid)
print("Training mae: ", mae_train)

Validation accuracy:  0.8107295686771565
Training accuracy:  0.8135420110882196
Validation mae:  0.9302118618356865
Training mae:  0.9169773477551977


Checking different size of alpha

In [5]:
MLP_model = MLPClassifier(random_state=1, early_stopping=True, hidden_layer_sizes=(125, 100, 75), alpha=0.05)

MLP_model.fit(X_train, y_train)

y_pred_valid = MLP_model.predict(X_valid)
y_pred_train = MLP_model.predict(X_train)

acc_valid = accuracy_score(y_valid, y_pred_valid)
acc_train = accuracy_score(y_train, y_pred_train)
mae_valid = mean_absolute_error(y_valid, y_pred_valid)
mae_train = mean_absolute_error(y_train, y_pred_train)

print("Validation accuracy: ", acc_valid)
print("Training accuracy: ", acc_train)
print("Validation mae: ", mae_valid)
print("Training mae: ", mae_train)

Validation accuracy:  0.7584628675942986
Training accuracy:  0.760935824095127
Validation mae:  1.2106636899229584
Training mae:  1.1969897576047093
