In [38]:
%load_ext autoreload
import pandas as pd
import numpy as np
from sklearn.model_selection import RandomizedSearchCV
from sklearn.neural_network import MLPRegressor
from scipy.stats import randint as sp_randint
from scipy.stats import uniform as sp_uniform

The autoreload extension is already loaded. To reload it, use:
  %reload_ext autoreload


# Loading the Data

In [2]:
X_train_1 = pd.read_csv('data/dengue_features_train.csv')
y_train = pd.read_csv('data/dengue_labels_train.csv')['total_cases']
attr = list(X_train_1)
attr

['city',
 'year',
 'weekofyear',
 'week_start_date',
 'ndvi_ne',
 'ndvi_nw',
 'ndvi_se',
 'ndvi_sw',
 'precipitation_amt_mm',
 'reanalysis_air_temp_k',
 'reanalysis_avg_temp_k',
 'reanalysis_dew_point_temp_k',
 'reanalysis_max_air_temp_k',
 'reanalysis_min_air_temp_k',
 'reanalysis_precip_amt_kg_per_m2',
 'reanalysis_relative_humidity_percent',
 'reanalysis_sat_precip_amt_mm',
 'reanalysis_specific_humidity_g_per_kg',
 'reanalysis_tdtr_k',
 'station_avg_temp_c',
 'station_diur_temp_rng_c',
 'station_max_temp_c',
 'station_min_temp_c',
 'station_precip_mm']

## Cleaning the noisy training data

In [3]:
def bools_to_indexes(booleans):
    r = []
    for idx, x in enumerate(booleans):
        if x:
            r.append(idx)
    return r

idx = bools_to_indexes(X_train_1['weekofyear'] == 53)
y_train.drop(idx, inplace=True)
y_train.reset_index(drop=True, inplace=True)
X_train_1.drop(idx, inplace=True)
X_train_1.reset_index(drop=True, inplace=True)
X_train_1.shape

(1451, 24)

# Trying models

In [None]:
score_metric='neg_mean_absolute_error'
n_jobs=-1
iid = False
verbose_level = 2
k_folds=10 
n_iter_search=20

## Simple MLP
* First we will try with a simple Multilayer Perceptron.
* According to the book 'Hands-On Machine Learning with Scikit-Learn and TensorFlow: Concepts, Tools, and Techniques to Build Intelligent Systems', applying the RELU activation function to all the hidden layers is a good idea.
* Here we will simply be searching through the different initial learning rates
* The ideal number of layers appears to be 7, however the scores are still very high.

### Preparing the data

In [9]:
%autoreload
from utils.OurPipeline import create_pipeline

pipeline = create_pipeline(attr, n_weeks=1)
X_train = pipeline.fit_transform(X_train_1)

#### Defining generic values

In [69]:
activation = 'relu'
train_algorithm = 'adam'
learn_rate_mode = 'adaptive'
learn_rate_val = 0.001
batch_size = 200
max_iter = 5000
random_n = 42
tol_val = 1e-4
n_iter_tol = 20
verb=False

early_stop=True
val_faction = 0.1

params = {
    #'hidden_layer_sizes': [sp_randint(30, 52), sp_randint(16, 30), sp_randint(4, 16)],
    'learning_rate_init': sp_uniform(1e-4, 1)
    
}

### 7-layer perceptron

In [77]:
neurons_per_layer = [35, 25, 20, 15, 10]

mlp = MLPRegressor(hidden_layer_sizes=neurons_per_layer, activation=activation, max_iter=max_iter, solver=train_algorithm, learning_rate=learn_rate_mode, batch_size=batch_size, random_state=random_n, tol=tol_val, verbose=verb, early_stopping=early_stop, validation_fraction=val_faction, n_iter_no_change=n_iter_tol)
MLP_Optimizer = RandomizedSearchCV(mlp, param_distributions=params, cv=k_folds, n_iter=n_iter_search, scoring=score_metric, n_jobs=n_jobs, verbose=verbose_level, iid=iid, return_train_score=True)
MLP_Optimizer.fit(X_train, y_train)
MLP_Optimizer.best_estimator_, MLP_Optimizer.best_score_

Fitting 10 folds for each of 20 candidates, totalling 200 fits


[Parallel(n_jobs=-1)]: Using backend LokyBackend with 4 concurrent workers.
[Parallel(n_jobs=-1)]: Done  33 tasks      | elapsed:    3.8s
[Parallel(n_jobs=-1)]: Done 154 tasks      | elapsed:   30.3s
[Parallel(n_jobs=-1)]: Done 200 out of 200 | elapsed:   35.0s finished


(MLPRegressor(activation='relu', alpha=0.0001, batch_size=200, beta_1=0.9,
        beta_2=0.999, early_stopping=True, epsilon=1e-08,
        hidden_layer_sizes=[35, 25, 20, 15, 10], learning_rate='adaptive',
        learning_rate_init=0.04694728278293715, max_iter=5000, momentum=0.9,
        n_iter_no_change=20, nesterovs_momentum=True, power_t=0.5,
        random_state=42, shuffle=True, solver='adam', tol=0.0001,
        validation_fraction=0.1, verbose=False, warm_start=False),
 -20.78080881244646)

### 6-layer perceptron

In [64]:
neurons_per_layer = [35, 25, 15, 10]

mlp = MLPRegressor(hidden_layer_sizes=neurons_per_layer, activation=activation, max_iter=max_iter, solver=train_algorithm, learning_rate=learn_rate_mode, batch_size=batch_size, random_state=random_n, tol=tol_val, verbose=verb, early_stopping=early_stop, validation_fraction=val_faction, n_iter_no_change=n_iter_tol)
MLP_Optimizer = RandomizedSearchCV(mlp, param_distributions=params, cv=k_folds, n_iter=n_iter_search, scoring=score_metric, n_jobs=n_jobs, verbose=verbose_level, iid=iid, return_train_score=True)
MLP_Optimizer.fit(X_train, y_train)
MLP_Optimizer.best_estimator_, MLP_Optimizer.best_score_

Fitting 10 folds for each of 20 candidates, totalling 200 fits


[Parallel(n_jobs=-1)]: Using backend LokyBackend with 4 concurrent workers.
[Parallel(n_jobs=-1)]: Done  33 tasks      | elapsed:    2.9s
[Parallel(n_jobs=-1)]: Done 154 tasks      | elapsed:   24.0s
[Parallel(n_jobs=-1)]: Done 200 out of 200 | elapsed:   28.4s finished


(MLPRegressor(activation='relu', alpha=0.0001, batch_size=200, beta_1=0.9,
        beta_2=0.999, early_stopping=True, epsilon=1e-08,
        hidden_layer_sizes=[35, 25, 15, 10], learning_rate='adaptive',
        learning_rate_init=0.0866318376475216, max_iter=5000, momentum=0.9,
        n_iter_no_change=20, nesterovs_momentum=True, power_t=0.5,
        random_state=42, shuffle=True, solver='adam', tol=0.0001,
        validation_fraction=0.1, verbose=False, warm_start=False),
 -19.513873846292523)

### 5-layer perceptron

In [71]:
neurons_per_layer = [35, 20, 10]

mlp = MLPRegressor(hidden_layer_sizes=neurons_per_layer, activation=activation, max_iter=max_iter, solver=train_algorithm, learning_rate=learn_rate_mode, batch_size=batch_size, random_state=random_n, tol=tol_val, verbose=verb, early_stopping=early_stop, validation_fraction=val_faction, n_iter_no_change=n_iter_tol)
MLP_Optimizer = RandomizedSearchCV(mlp, param_distributions=params, cv=k_folds, n_iter=n_iter_search, scoring=score_metric, n_jobs=n_jobs, verbose=verbose_level, iid=iid, return_train_score=True)
MLP_Optimizer.fit(X_train, y_train)
MLP_Optimizer.best_estimator_, MLP_Optimizer.best_score_

Fitting 10 folds for each of 20 candidates, totalling 200 fits


[Parallel(n_jobs=-1)]: Using backend LokyBackend with 4 concurrent workers.
[Parallel(n_jobs=-1)]: Done  33 tasks      | elapsed:    9.2s
[Parallel(n_jobs=-1)]: Done 154 tasks      | elapsed:   33.8s
[Parallel(n_jobs=-1)]: Done 200 out of 200 | elapsed:   40.3s finished


(MLPRegressor(activation='relu', alpha=0.0001, batch_size=200, beta_1=0.9,
        beta_2=0.999, early_stopping=True, epsilon=1e-08,
        hidden_layer_sizes=[35, 20, 10], learning_rate='adaptive',
        learning_rate_init=0.134954482930001, max_iter=5000, momentum=0.9,
        n_iter_no_change=20, nesterovs_momentum=True, power_t=0.5,
        random_state=42, shuffle=True, solver='adam', tol=0.0001,
        validation_fraction=0.1, verbose=False, warm_start=False),
 -21.349379403662667)

### 4-layer perceptron

In [73]:
neurons_per_layer = [30, 15]

mlp = MLPRegressor(hidden_layer_sizes=neurons_per_layer, activation=activation, max_iter=max_iter, solver=train_algorithm, learning_rate=learn_rate_mode, batch_size=batch_size, random_state=random_n, tol=tol_val, verbose=verb, early_stopping=early_stop, validation_fraction=val_faction, n_iter_no_change=n_iter_tol)
MLP_Optimizer = RandomizedSearchCV(mlp, param_distributions=params, cv=k_folds, n_iter=n_iter_search, scoring=score_metric, n_jobs=n_jobs, verbose=verbose_level, iid=iid, return_train_score=True)
MLP_Optimizer.fit(X_train, y_train)
MLP_Optimizer.best_estimator_, MLP_Optimizer.best_score_

Fitting 10 folds for each of 20 candidates, totalling 200 fits


[Parallel(n_jobs=-1)]: Using backend LokyBackend with 4 concurrent workers.
[Parallel(n_jobs=-1)]: Done  33 tasks      | elapsed:    3.0s
[Parallel(n_jobs=-1)]: Done 154 tasks      | elapsed:   15.8s
[Parallel(n_jobs=-1)]: Done 200 out of 200 | elapsed:   21.0s finished


(MLPRegressor(activation='relu', alpha=0.0001, batch_size=200, beta_1=0.9,
        beta_2=0.999, early_stopping=True, epsilon=1e-08,
        hidden_layer_sizes=[30, 15], learning_rate='adaptive',
        learning_rate_init=0.09571433102280714, max_iter=5000, momentum=0.9,
        n_iter_no_change=20, nesterovs_momentum=True, power_t=0.5,
        random_state=42, shuffle=True, solver='adam', tol=0.0001,
        validation_fraction=0.1, verbose=False, warm_start=False),
 -21.32883347845756)

In [75]:
pd.DataFrame(MLP_Optimizer.cv_results_)[['params','mean_test_score','std_test_score', 'mean_train_score']]

Unnamed: 0,params,mean_test_score,std_test_score,mean_train_score
0,{'learning_rate_init': 0.8315891950432374},-22.302382,9.035754,-21.971457
1,{'learning_rate_init': 0.037523578901123025},-20.126625,13.36253,-14.724124
2,{'learning_rate_init': 0.6643309158762516},-26.809253,16.331691,-27.055716
3,{'learning_rate_init': 0.08089179566977546},-20.658345,13.547042,-15.583598
4,{'learning_rate_init': 0.20921007320386353},-22.518933,8.826091,-22.245434
5,{'learning_rate_init': 0.756712543872126},-21.661455,9.592722,-23.415124
6,{'learning_rate_init': 0.03326783490960829},-19.907474,13.097476,-14.390898
7,{'learning_rate_init': 0.37005066532944875},-22.516135,8.821664,-22.244133
8,{'learning_rate_init': 0.4228326653937886},-22.521593,8.820328,-22.242738
9,{'learning_rate_init': 0.8657256207998367},-24.127015,11.297909,-22.320675
