In [42]:
import math
import collections

import pandas as pd

from keras import regularizers
from keras.models import Model, Sequential
from keras.layers import *
from keras.wrappers.scikit_learn import KerasClassifier
from sklearn.metrics import matthews_corrcoef, make_scorer

import params
from utils.sequence_data import data_to_sequences_and_labels
from utils.metrics import print_report_for_binary_classfier
from utils.preprocessing import probs_to_binary_classes
from utils.hyperparams import hyperparams_search
from utils.plot import plot_train_validation_metric

In [43]:
dataset = pd.read_csv("../datasets/data_for_models/dataset_1996-01-01_2019-08-22.csv",
                           header=0, parse_dates=[0], index_col=0)

INPUT_SHAPE = (params.LOOKBACK//params.STEP, dataset.shape[-1] )

In [44]:
#params for generating sequences 
train_max_idx = math.ceil(len(dataset)*params.TRAIN_RATIO)
val_max_idx = math.ceil(len(dataset)*(params.TRAIN_RATIO+params.VAL_RATIO))
label_index = len(dataset.columns) -1


# prepare data
train_X, train_Y = data_to_sequences_and_labels(dataset.to_numpy(), params.LOOKBACK, 
                                                params.STEP, 
                                                0, train_max_idx, 
                                                params.DELAY,
                                                label_index) 
val_X, val_Y = data_to_sequences_and_labels(dataset.to_numpy(), params.LOOKBACK, 
                                                params.STEP, 
                                                train_max_idx+1, val_max_idx, 
                                                params.DELAY,
                                                label_index)
test_X, test_Y = data_to_sequences_and_labels(dataset.to_numpy(), params.LOOKBACK, 
                                                params.STEP, 
                                                val_max_idx+1, None, 
                                                params.DELAY,
                                                label_index)
X, Y = data_to_sequences_and_labels(dataset.to_numpy(), params.LOOKBACK,
                                   params.STEP,
                                   0, val_max_idx,
                                   params.DELAY
                                   ,label_index)

In [47]:
#Prepare for gridsearchCV
from sklearn.metrics import matthews_corrcoef, make_scorer
from sklearn.model_selection import TimeSeriesSplit, GridSearchCV 
cv = TimeSeriesSplit(n_splits=3)
matthews_score = make_scorer(matthews_corrcoef, greater_is_better=True)


In [48]:
def fc_model(input_shape, hidden_unit=32, n_layer=2, l2_weight=0.01):
    model = Sequential()
    model.add(Flatten(input_shape=input_shape))
    
    for _ in range(n_layer):
        model.add(Dense(hidden_unit, activation='relu', kernel_regularizer=regularizers.l2(l2_weight)))
        
    model.add(Dense(1, activation='sigmoid'))
    model.compile(optimizer='adam', loss='binary_crossentropy', metrics=['accuracy'])
    return model

In [None]:
fc_params = dict(hidden_unit=[32, 48, 64], 
                 n_layer=[2,3,4], 
                 epochs=[100, 250, 500],
                 l2_weight=[0,0.0001,0.001, 0.01])

fc_clf = GridSearchCV(estimator=KerasClassifier(build_fn=fc_model, input_shape=INPUT_SHAPE, verbose=0),
                      param_grid=fc_params,
                      cv =cv,
                      scoring=matthews_score)
grid_result=fc_clf.fit(X,Y)

  % delta_t_median)


In [None]:
# summarize results
print("Best: %f using %s" % (grid_result.best_score_, grid_result.best_params_))
means = grid_result.cv_results_['mean_test_score']
stds = grid_result.cv_results_['std_test_score']
params = grid_result.cv_results_['params']
for mean, stdev, param in zip(means, stds, params):
    print("%f (%f) with: %r" % (mean, stdev, param))

# best result 
best_params = {'epochs': 1048, 'n_layer': 3, 'hidden_unti': 32, 'l2_weight': 0}

### We have 2984 positive and 984 negtive cases. 

In [None]:
#Make predictions for train set   
print(collections.Counter(train_Y))
print_report_for_binary_classfier(train_Y, fc_clf.predict(train_X))

In [None]:
# #Make predictions from dev set
print(collections.Counter(val_Y))
print_report_for_binary_classfier(val_Y, fc_clf.predict(val_X))

In [None]:
#Make predictions for test set
print(collections.Counter(test_Y))
print_report_for_binary_classfier(test_Y, fc_clf.predict(test_X))

# Save the best model 

In [16]:
# from keras.models import load_model

# model.save(params.BEST_FC_MODEL_PATH)  # creates a HDF5 file 'my_model.h5'