# Multi-layer perceptron
**Important**: Change the kernel to *PROJECT_NAME local*. You can do this from the *Kernel* menu under *Change kernel*.

In [1]:
%matplotlib inline
import pandas as pd
import numpy as np
import matplotlib
matplotlib.use('Agg')
import matplotlib.pyplot as plt
from sklearn.pipeline import Pipeline
from sklearn.preprocessing import OneHotEncoder
from sklearn.neural_network import MLPRegressor
from sklearn.metrics import mean_squared_error
from sklearn.model_selection import PredefinedSplit
from sklearn.model_selection import GridSearchCV
import pickle
import os

In [2]:
from azureml.logging import get_azureml_logger
run_logger = get_azureml_logger()
run_logger.log('amlrealworld.timeseries.mlp','true')

<azureml.logging.script_run_request.ScriptRunRequest at 0x7ff5a5fa8f28>

In [3]:
model_name = "mlp"
aml_dir = os.environ['AZUREML_NATIVE_SHARE_DIRECTORY']
train = pd.read_csv(os.path.join(aml_dir, 'nyc_demand_train.csv'), parse_dates=['timeStamp'])
X = train.drop(['demand', 'timeStamp'], axis=1)

Due to the relatively long training times of neural networks, in this example we use one validation set in place of cross-validation. The validation set will be the final 25% of the training dataset. Training takes about 3 minutes.

In [4]:
len_train = int(len(X) * 0.75)
len_valid = len(X) - len_train
test_fold = [-1]*len_train + [0]*len_valid
ps = PredefinedSplit(test_fold)

In [5]:
regr = MLPRegressor(solver='lbfgs', verbose=True)

We first specify a list of network structures to try. Each tuple in this list specifies the number of hidden layers and the number of units in each. For example, (5,0) specifies a network with one hidden layer containing 5 units while (10,10) creates a network with 2 hidden layers, each with 10 units. We then perform grid search to test the various network structures and regularization parameters. 

In [6]:
hidden_layer_size = [(5,), (10,), (15,), (20,), (25,), (30,), (35,), (40,), (10,10), (20,20), (30,30), (40,40)]

In [7]:
param_grid = {'hidden_layer_sizes': hidden_layer_size,
             'alpha': [0.01, 0.1, 1.0, 10.0]}
regr_cv = GridSearchCV(estimator=regr,
            param_grid=param_grid,
            cv=ps,
            scoring='neg_mean_squared_error',
            verbose=2,
            n_jobs=-1)

In [8]:
regr_pipe = Pipeline([('regr_cv', regr_cv)])
regr_pipe.fit(X, y=train['demand'])

Fitting 1 folds for each of 48 candidates, totalling 48 fits
[CV] alpha=0.01, hidden_layer_sizes=(5,) .............................
[CV] alpha=0.01, hidden_layer_sizes=(10,) ............................
[CV] .............. alpha=0.01, hidden_layer_sizes=(5,), total=   0.2s
[CV] alpha=0.01, hidden_layer_sizes=(15,) ............................
[CV] ............. alpha=0.01, hidden_layer_sizes=(10,), total=   3.0s
[CV] alpha=0.01, hidden_layer_sizes=(20,) ............................
[CV] ............. alpha=0.01, hidden_layer_sizes=(15,), total=   4.8s
[CV] alpha=0.01, hidden_layer_sizes=(25,) ............................
[CV] ............. alpha=0.01, hidden_layer_sizes=(20,), total=   8.6s
[CV] alpha=0.01, hidden_layer_sizes=(30,) ............................
[CV] ............. alpha=0.01, hidden_layer_sizes=(25,), total=   8.4s
[CV] alpha=0.01, hidden_layer_sizes=(35,) ............................
[CV] ............. alpha=0.01, hidden_layer_sizes=(30,), total=   7.2s
[CV] alpha=0.01,

[Parallel(n_jobs=-1)]: Done  37 tasks      | elapsed:  2.6min


[CV] ............. alpha=10.0, hidden_layer_sizes=(15,), total=   3.9s
[CV] alpha=10.0, hidden_layer_sizes=(20,) ............................
[CV] ........... alpha=1.0, hidden_layer_sizes=(40, 40), total=  20.9s
[CV] alpha=10.0, hidden_layer_sizes=(25,) ............................
[CV] ............. alpha=10.0, hidden_layer_sizes=(20,), total=   4.7s
[CV] alpha=10.0, hidden_layer_sizes=(30,) ............................
[CV] ............. alpha=10.0, hidden_layer_sizes=(25,), total=   6.0s
[CV] alpha=10.0, hidden_layer_sizes=(35,) ............................
[CV] ............. alpha=10.0, hidden_layer_sizes=(30,), total=   7.6s
[CV] alpha=10.0, hidden_layer_sizes=(40,) ............................
[CV] ............. alpha=10.0, hidden_layer_sizes=(35,), total=   8.5s
[CV] alpha=10.0, hidden_layer_sizes=(10, 10) .........................
[CV] ............. alpha=10.0, hidden_layer_sizes=(40,), total=   8.5s
[CV] alpha=10.0, hidden_layer_sizes=(20, 20) .........................
[CV] .

[Parallel(n_jobs=-1)]: Done  48 out of  48 | elapsed:  3.4min finished


Pipeline(memory=None,
     steps=[('regr_cv', GridSearchCV(cv=PredefinedSplit(test_fold=array([-1, -1, ...,  0,  0])),
       error_score='raise',
       estimator=MLPRegressor(activation='relu', alpha=0.0001, batch_size='auto', beta_1=0.9,
       beta_2=0.999, early_stopping=False, epsilon=1e-08,
       hidden_layer_sizes=(1..._jobs', refit=True, return_train_score='warn',
       scoring='neg_mean_squared_error', verbose=2))])

In [9]:
with open(os.path.join(aml_dir, model_name + '.pkl'), 'wb') as f:
    pickle.dump(regr_pipe, f)

Cross validation results

In [10]:
cv_results = pd.DataFrame(regr_pipe.named_steps['regr_cv'].cv_results_)
cv_results.sort_values(by='rank_test_score', inplace=True)
cv_results.head()

Unnamed: 0,mean_fit_time,mean_score_time,mean_test_score,mean_train_score,param_alpha,param_hidden_layer_sizes,params,rank_test_score,split0_test_score,split0_train_score,std_fit_time,std_score_time,std_test_score,std_train_score
41,7.551556,0.011675,-6219.410426,-7470.614132,10.0,"(30,)","{'alpha': 10.0, 'hidden_layer_sizes': (30,)}",1,-6219.410426,-7470.614132,0.0,0.0,0.0,0.0
44,5.79677,0.006628,-6392.601883,-7315.619267,10.0,"(10, 10)","{'alpha': 10.0, 'hidden_layer_sizes': (10, 10)}",2,-6392.601883,-7315.619267,0.0,0.0,0.0,0.0
19,8.678896,0.014193,-6696.008485,-7790.304639,0.1,"(40,)","{'alpha': 0.1, 'hidden_layer_sizes': (40,)}",3,-6696.008485,-7790.304639,0.0,0.0,0.0,0.0
2,4.809886,0.00667,-6945.595519,-8050.22084,0.01,"(15,)","{'alpha': 0.01, 'hidden_layer_sizes': (15,)}",4,-6945.595519,-8050.22084,0.0,0.0,0.0,0.0
18,8.441728,0.008543,-6998.60951,-8207.820822,0.1,"(35,)","{'alpha': 0.1, 'hidden_layer_sizes': (35,)}",5,-6998.60951,-8207.820822,0.0,0.0,0.0,0.0
