In [1]:
import math
import collections

import pandas as pd

from keras import regularizers
from keras.models import Model, Sequential
from keras.layers import *

import params
from utils.sequence_data import data_to_sequences_and_labels
from utils.metrics import print_report_for_binary_classfier
from utils.preprocessing import probs_to_binary_classes
from utils.hyperparams import hyperparams_search
from utils.plot import plot_train_validation_metric

Using TensorFlow backend.


In [2]:
dataset = pd.read_csv("../datasets/data_for_models/dataset_1996-01-01_2019-08-22.csv",
                           header=0, parse_dates=[0], index_col=0)

input_shape = (params.LOOKBACK//params.STEP, dataset.shape[-1] )

In [3]:
#params for generating sequences 
train_max_idx = math.ceil(len(dataset)*params.TRAIN_RATIO)
val_max_idx = math.ceil(len(dataset)*(params.TRAIN_RATIO+params.VAL_RATIO))
label_index = len(dataset.columns) -1


# prepare data
train_X, train_Y = data_to_sequences_and_labels(dataset.to_numpy(), params.LOOKBACK, 
                                                params.STEP, 
                                                0, train_max_idx, 
                                                params.DELAY,
                                                label_index) 
val_X, val_Y = data_to_sequences_and_labels(dataset.to_numpy(), params.LOOKBACK, 
                                                params.STEP, 
                                                train_max_idx+1, val_max_idx, 
                                                params.DELAY,
                                                label_index)
test_X, test_Y = data_to_sequences_and_labels(dataset.to_numpy(), params.LOOKBACK, 
                                                params.STEP, 
                                                val_max_idx+1, None, 
                                                params.DELAY,
                                                label_index)

In [6]:
def fc_model(hidden_unit, n_layer, l2_weight, input_shape):
    model = Sequential()
    model.add(Flatten(input_shape=input_shape))
    
    for _ in range(n_layer):
        model.add(Dense(hidden_unit, activation='relu', kernel_regularizer=regularizers.l2(l2_weight)))
        
    model.add(Dense(1, activation='sigmoid'))
    model.compile(optimizer='adam', loss='binary_crossentropy', metrics=['accuracy'])
    return model

In [None]:
best_params = hyperparams_search(model_func=fc_model, 
                                 input_shape=input_shape,
                                 train_X=train_X, train_Y=train_Y, 
                                 val_X=val_X, val_Y=val_Y,
                                 num_epoch=2000,
                                 hidden_units=[32,64], 
                                 n_layers=[2,3], 
                                 l2_weights=[0,0.0001,0.001, 0.01],
                                 is_verbose=0)
print("\nbest params = {0}".format(best_params))

# best result 
best_params = {'epochs': 1048, 'n_layer': 3, 'hidden_unti': 32, 'l2_weight': 0}

In [None]:
#hyperparameters
num_epoch = best_params['epochs'] 
n_layer= best_params['n_layer'] 
hidden_unit=best_params['hidden_unti']  
l2_weight=best_params['l2_weight']
                    

model = fc_model(hidden_unit, n_layer, l2_weight, input_shape)

# #fit the model
history = model.fit(train_X, train_Y,
                    validation_data=(val_X, val_Y),
                    epochs=num_epoch,
                    verbose=1)



In [None]:
plot_train_validation_metric(history, 'loss', 'val_loss')

### We have 2984 positive and 984 negtive cases. 

In [None]:
#Make predictions for train set   
print(collections.Counter(train_Y))
print_report_for_binary_classfier(train_Y, probs_to_binary_classes(model.predict(train_X)))

In [None]:
#Make predictions from dev set
print(collections.Counter(val_Y))
print_report_for_binary_classfier(val_Y, probs_to_binary_classes(model.predict(val_X)))

In [None]:
#Make predictions for test set
print(collections.Counter(test_Y))
print_report_for_binary_classfier(test_Y, probs_to_binary_classes(model.predict(test_X)))

# Save the best model 

In [None]:
from keras.models import load_model

model.save(params.BEST_FC_MODEL_PATH)  # creates a HDF5 file 'my_model.h5'