# Multi-layer Perceptron

In [70]:
%matplotlib inline
import pandas as pd
import numpy as np
import matplotlib.pyplot as plt
from sklearn.pipeline import Pipeline
from sklearn.preprocessing import OneHotEncoder
from sklearn.neural_network import MLPRegressor
from sklearn.metrics import mean_squared_error
from sklearn.model_selection import PredefinedSplit
from sklearn.model_selection import GridSearchCV
import pickle
import os
import scipy.stats as st

In [3]:
model_name = "mlp"
aml_dir = os.environ['AZUREML_NATIVE_SHARE_DIRECTORY']
train = pd.read_csv(aml_dir + 'nyc_demand_train.csv', parse_dates=['timeStamp'])
X = train.drop(['demand', 'timeStamp'], axis=1)

Due to the relatively long training times of neural networks, in this example we use one validation set in place of cross-validation. The validation set will be the final 25% of the training dataset.

In [72]:
len_train = int(len(X) * 0.75)
len_valid = len(X) - len_train
test_fold = [-1]*len_train + [0]*len_valid
ps = PredefinedSplit(test_fold)

In [74]:
regr = MLPRegressor(solver='lbfgs', verbose=True)

In [75]:
hidden_layer_size = [(5,), (10,), (15,), (20,), (25,), (30,), (35,), (40,), (10,10), (20,20), (30,30), (40,40), (10,10,10), (20,20,20), (30,30,30), (40,40,40)]

In [76]:
param_grid = {'hidden_layer_sizes': hidden_layer_size,
             'alpha': [0.0001, 0.001, 0.01, 0.1]}
regr_cv = GridSearchCV(estimator=regr,
            param_grid=param_grid,
            cv=ps,
            scoring='neg_mean_squared_error',
            verbose=2,
            n_jobs=-1)

In [81]:
regr_pipe = Pipeline([('regr_cv', regr_cv)])
regr_pipe.fit(X, y=train['demand'])

Fitting 1 folds for each of 64 candidates, totalling 64 fits


[Parallel(n_jobs=-1)]: Done  33 tasks      | elapsed:  1.8min
[Parallel(n_jobs=-1)]: Done  64 out of  64 | elapsed:  3.9min finished


Pipeline(memory=None,
     steps=[('regr_cv', GridSearchCV(cv=PredefinedSplit(test_fold=array([-1, -1, ...,  0,  0])),
       error_score='raise',
       estimator=MLPRegressor(activation='relu', alpha=0.0001, batch_size='auto', beta_1=0.9,
       beta_2=0.999, early_stopping=False, epsilon=1e-08,
       hidden_layer_sizes=(1...*n_jobs', refit=True, return_train_score=True,
       scoring='neg_mean_squared_error', verbose=2))])

Save model to AML shared directory

In [83]:
with open(aml_dir + model_name + '.pkl', 'wb') as f:
    pickle.dump(regr_pipe, f)

### Cross validation results

In [82]:
cv_results = pd.DataFrame(regr_pipe.named_steps['regr_cv'].cv_results_)
cv_results.sort_values(by='rank_test_score', inplace=True)
cv_results.head()

Unnamed: 0,mean_fit_time,mean_score_time,mean_test_score,mean_train_score,param_alpha,param_hidden_layer_sizes,params,rank_test_score,split0_test_score,split0_train_score,std_fit_time,std_score_time,std_test_score,std_train_score
63,25.703727,0.01451,-6545.191157,-7504.598997,0.1,"(40, 40, 40)","{'hidden_layer_sizes': (40, 40, 40), 'alpha': ...",1,-6545.191157,-7504.598997,0.0,0.0,0.0,0.0
58,19.732062,0.030499,-6909.698549,-8102.512347,0.1,"(30, 30)","{'hidden_layer_sizes': (30, 30), 'alpha': 0.1}",2,-6909.698549,-8102.512347,0.0,0.0,0.0,0.0
27,22.725273,0.016504,-6936.20457,-8098.076132,0.001,"(40, 40)","{'hidden_layer_sizes': (40, 40), 'alpha': 0.001}",3,-6936.20457,-8098.076132,0.0,0.0,0.0,0.0
60,13.073128,0.0085,-6937.873643,-8074.213731,0.1,"(10, 10, 10)","{'hidden_layer_sizes': (10, 10, 10), 'alpha': ...",4,-6937.873643,-8074.213731,0.0,0.0,0.0,0.0
4,8.230503,0.007001,-6939.629724,-7879.820117,0.0001,"(25,)","{'hidden_layer_sizes': (25,), 'alpha': 0.0001}",5,-6939.629724,-7879.820117,0.0,0.0,0.0,0.0
