In [1]:
import math

import pandas as pd
import numpy as np
from keras import Input,backend
from keras.models import Model, Sequential
from keras.layers import *
from keras.callbacks import Callback
from sklearn.metrics import confusion_matrix, f1_score, precision_score, recall_score, accuracy_score, matthews_corrcoef

Using TensorFlow backend.


In [39]:
dataset = pd.read_csv("../datasets/train_test_data.csv",
                           header=0, parse_dates=[0], index_col=0)

In [40]:
#hyperparamters
batch_size=64

#params for generator
label_index = len(dataset.columns) -1
delay=0
step=1 # 1 timestep = 1 day
lookback=10

#ratio for train/val/test split
train_ratio=0.7
val_ratio=0.15

train_max_idx = math.ceil(len(dataset)*train_ratio)
val_max_idx = math.ceil(len(dataset)*(train_ratio+val_ratio))

# 1 step = 1 batche of samples 
train_steps = (train_max_idx+1) // batch_size
val_steps =  (val_max_idx - train_max_idx - lookback) // batch_size
test_steps = (len(dataset) - val_max_idx - lookback) // batch_size

In [6]:
def generator(data, label_index, lookback, delay, min_index, max_index,
              shuffle=False, batch_size=64, step=1):
    if max_index is None:
        max_index = len(data) - delay - 1
    i = min_index + lookback
    while 1:
        if shuffle:
            rows = np.random.randint(
                min_index + lookback, max_index, size=batch_size)
        else:
            if i + batch_size >= max_index: 
                i = min_index + lookback #reset 'i'
            rows = np.arange(i, min(i + batch_size, max_index))
            i += len(rows)

        samples = np.zeros((len(rows),
                           lookback // step,
                           (data.shape[-1] - 1)))
        targets = np.zeros((len(rows),))
        for j, row in enumerate(rows):
            indices = range(rows[j] - lookback, rows[j], step)
            samples[j] = np.delete(data[indices], label_index, axis=1) # an np array without the label col
            targets[j] = data[rows[j] + delay][label_index]
        yield samples, targets

In [7]:
train_rand_gen = generator(dataset.to_numpy(),
                      label_index=label_index,
                      lookback=lookback,
                      delay=delay,
                      min_index=0,
                      max_index=train_max_idx,
                      shuffle=True,
                      step=step, 
                      batch_size=batch_size)

train_gen = generator(dataset.to_numpy(),
                      label_index=label_index,
                      lookback=lookback,
                      delay=delay,
                      min_index=0,
                      max_index=train_max_idx,
                      shuffle=False,
                      step=step, 
                      batch_size=batch_size)


val_gen = generator(dataset.to_numpy(),
                    label_index=label_index,
                      lookback=lookback,
                      delay=delay,
                      min_index=train_max_idx+1,
                      max_index=val_max_idx,
                      shuffle=False,
                      step=step, 
                      batch_size=batch_size)

test_gen = generator(dataset.to_numpy(),
                      label_index=label_index,
                      lookback=lookback,
                      delay=delay,
                      min_index=val_max_idx+1,
                      max_index=None,
                      shuffle=False,
                      step=step, 
                      batch_size=batch_size)

In [8]:
# count = 0
# max_count=1
# for samples, targets in t_gen:
#     if(count >= max_count):
#         break
#     print(samples.shape)
#     print(targets)
#     print(samples[0])
#     print(samples[1])
#     count+=1

In [9]:
def print_metric(y_true, y_pred):
    print('F1 score: %f' % f1_score(y_true, y_pred))
    print('precision score: %f' % precision_score(y_true, y_pred))
    print('recall score: %f' % recall_score(y_true, y_pred))
    print('accuracy score: %f' % accuracy_score(y_true, y_pred))
    print('matthews_corrcoef: %f' % matthews_corrcoef(y_true, y_pred))
    print('\nConfusion matrix:')
    print(confusion_matrix(y_true, y_pred, labels=[0,1]))

In [10]:
def probs_to_binary_classes(preds, threshold=0.5):
    """
    preds: np array
    threshold: scalar
    """
    return np.where(preds > threshold, 1, 0)

In [11]:
def print_unique_counts(x):
    unique, counts = np.unique(x, return_counts=True)
    print(np.asarray((unique, counts)).T)


In [12]:
#build s FC model from the book

input_shape = (lookback//step, dataset.shape[-1] - 1)

model = Sequential()
model.add(Flatten(input_shape=input_shape))
model.add(Dense(32, activation='relu'))
model.add(Dense(1, activation='sigmoid'))

model.compile(optimizer='adam', loss='binary_crossentropy', metrics=['accuracy'])
print(model.summary())

# #fit the model
model.fit_generator(train_rand_gen,
                    steps_per_epoch=train_steps,
                    epochs=100, verbose=1)

_________________________________________________________________
Layer (type)                 Output Shape              Param #   
flatten_1 (Flatten)          (None, 250)               0         
_________________________________________________________________
dense_1 (Dense)              (None, 32)                8032      
_________________________________________________________________
dense_2 (Dense)              (None, 1)                 33        
Total params: 8,065
Trainable params: 8,065
Non-trainable params: 0
_________________________________________________________________
None
Epoch 1/400
Epoch 2/400
Epoch 3/400
Epoch 4/400
Epoch 5/400
Epoch 6/400
Epoch 7/400
Epoch 8/400
Epoch 9/400
Epoch 10/400
Epoch 11/400
Epoch 12/400
Epoch 13/400
Epoch 14/400
Epoch 15/400
Epoch 16/400
Epoch 17/400
Epoch 18/400
Epoch 19/400
Epoch 20/400
Epoch 21/400
Epoch 22/400
Epoch 23/400
Epoch 24/400
Epoch 25/400
Epoch 26/400
Epoch 27/400
Epoch 28/400
Epoch 29/400
Epoch 30/400
Epoch 31/400
Epoch

Epoch 80/400
Epoch 81/400
Epoch 82/400
Epoch 83/400
Epoch 84/400
Epoch 85/400
Epoch 86/400
Epoch 87/400
Epoch 88/400
Epoch 89/400
Epoch 90/400
Epoch 91/400
Epoch 92/400
Epoch 93/400
Epoch 94/400
Epoch 95/400
Epoch 96/400
Epoch 97/400
Epoch 98/400
Epoch 99/400
Epoch 100/400
Epoch 101/400
Epoch 102/400
Epoch 103/400
Epoch 104/400
Epoch 105/400
Epoch 106/400
Epoch 107/400
Epoch 108/400
Epoch 109/400
Epoch 110/400
Epoch 111/400
Epoch 112/400
Epoch 113/400
Epoch 114/400
Epoch 115/400
Epoch 116/400
Epoch 117/400
Epoch 118/400
Epoch 119/400
Epoch 120/400
Epoch 121/400
Epoch 122/400
Epoch 123/400
Epoch 124/400
Epoch 125/400
Epoch 126/400
Epoch 127/400
Epoch 128/400
Epoch 129/400
Epoch 130/400
Epoch 131/400
Epoch 132/400
Epoch 133/400
Epoch 134/400
Epoch 135/400
Epoch 136/400
Epoch 137/400
Epoch 138/400
Epoch 139/400
Epoch 140/400
Epoch 141/400
Epoch 142/400
Epoch 143/400
Epoch 144/400
Epoch 145/400
Epoch 146/400
Epoch 147/400
Epoch 148/400
Epoch 149/400
Epoch 150/400
Epoch 151/400
Epoch 152/40

Epoch 163/400
Epoch 164/400
Epoch 165/400
Epoch 166/400
Epoch 167/400
Epoch 168/400
Epoch 169/400
Epoch 170/400
Epoch 171/400
Epoch 172/400
Epoch 173/400
Epoch 174/400
Epoch 175/400
Epoch 176/400
Epoch 177/400
Epoch 178/400
Epoch 179/400
Epoch 180/400
Epoch 181/400
Epoch 182/400
Epoch 183/400
Epoch 184/400
Epoch 185/400
Epoch 186/400
Epoch 187/400
Epoch 188/400
Epoch 189/400
Epoch 190/400
Epoch 191/400
Epoch 192/400
Epoch 193/400
Epoch 194/400
Epoch 195/400
Epoch 196/400
Epoch 197/400
Epoch 198/400
Epoch 199/400
Epoch 200/400
Epoch 201/400
Epoch 202/400
Epoch 203/400
Epoch 204/400
Epoch 205/400
Epoch 206/400
Epoch 207/400
Epoch 208/400
Epoch 209/400
Epoch 210/400
Epoch 211/400
Epoch 212/400
Epoch 213/400
Epoch 214/400
Epoch 215/400
Epoch 216/400
Epoch 217/400
Epoch 218/400
Epoch 219/400
Epoch 220/400
Epoch 221/400
Epoch 222/400
Epoch 223/400
Epoch 224/400
Epoch 225/400
Epoch 226/400
Epoch 227/400
Epoch 228/400
Epoch 229/400
Epoch 230/400
Epoch 231/400
Epoch 232/400
Epoch 233/400
Epoch 

Epoch 248/400
Epoch 249/400
Epoch 250/400
Epoch 251/400
Epoch 252/400
Epoch 253/400
Epoch 254/400
Epoch 255/400
Epoch 256/400
Epoch 257/400
Epoch 258/400
Epoch 259/400
Epoch 260/400
Epoch 261/400
Epoch 262/400
Epoch 263/400
Epoch 264/400
Epoch 265/400
Epoch 266/400
Epoch 267/400
Epoch 268/400
Epoch 269/400
Epoch 270/400
Epoch 271/400
Epoch 272/400
Epoch 273/400
Epoch 274/400
Epoch 275/400
Epoch 276/400
Epoch 277/400
Epoch 278/400
Epoch 279/400
Epoch 280/400
Epoch 281/400
Epoch 282/400
Epoch 283/400
Epoch 284/400
Epoch 285/400
Epoch 286/400
Epoch 287/400
Epoch 288/400
Epoch 289/400
Epoch 290/400
Epoch 291/400
Epoch 292/400
Epoch 293/400
Epoch 294/400
Epoch 295/400
Epoch 296/400
Epoch 297/400
Epoch 298/400
Epoch 299/400
Epoch 300/400
Epoch 301/400
Epoch 302/400
Epoch 303/400
Epoch 304/400
Epoch 305/400
Epoch 306/400
Epoch 307/400
Epoch 308/400
Epoch 309/400
Epoch 310/400
Epoch 311/400
Epoch 312/400
Epoch 313/400
Epoch 314/400
Epoch 315/400
Epoch 316/400
Epoch 317/400
Epoch 318/400
Epoch 

Epoch 333/400
Epoch 334/400
Epoch 335/400
Epoch 336/400
Epoch 337/400
Epoch 338/400
Epoch 339/400
Epoch 340/400
Epoch 341/400
Epoch 342/400
Epoch 343/400
Epoch 344/400
Epoch 345/400
Epoch 346/400
Epoch 347/400
Epoch 348/400
Epoch 349/400
Epoch 350/400
Epoch 351/400
Epoch 352/400
Epoch 353/400
Epoch 354/400
Epoch 355/400
Epoch 356/400
Epoch 357/400
Epoch 358/400
Epoch 359/400
Epoch 360/400
Epoch 361/400
Epoch 362/400
Epoch 363/400
Epoch 364/400
Epoch 365/400
Epoch 366/400
Epoch 367/400
Epoch 368/400
Epoch 369/400
Epoch 370/400
Epoch 371/400
Epoch 372/400
Epoch 373/400
Epoch 374/400
Epoch 375/400
Epoch 376/400
Epoch 377/400
Epoch 378/400
Epoch 379/400
Epoch 380/400
Epoch 381/400
Epoch 382/400
Epoch 383/400
Epoch 384/400
Epoch 385/400
Epoch 386/400
Epoch 387/400
Epoch 388/400
Epoch 389/400
Epoch 390/400
Epoch 391/400
Epoch 392/400
Epoch 393/400
Epoch 394/400
Epoch 395/400
Epoch 396/400
Epoch 397/400
Epoch 398/400
Epoch 399/400
Epoch 400/400


<keras.callbacks.History at 0x7f413527acc0>

In [77]:
def get_data_from_generator(generator, steps):
    count=0
    X, Y = [], []
    for samples, targets in generator:
        if count >= steps:
            break;
        else:
            count+=1

        X.append(samples)
        Y.append(targets)

    return np.concatenate(X, axis=0), np.concatenate(Y, axis=0)


In [84]:
neg = 2762+158
pos = 177+807
print("We have {0} neg cases and {1} pos cases from train data".format(neg,pos))
print("the common sense baseline (accuracy score) is {0}".format(neg/(pos+neg)))

We have 2920 neg cases and 984 pos cases from train data
the common sense baseline (accuracy score) is 0.7479508196721312


In [79]:
#Make predictions for train set
    
X_train, Y_train = get_data_from_generator(train_gen, train_steps)    
y_pred = model.predict(X)
print_metric(Y, probs_to_binary_classes(y_pred))

F1 score: 0.828117
precision score: 0.836269
recall score: 0.820122
accuracy score: 0.914191
matthews_corrcoef: 0.771012

Confusion matrix:
[[2762  158]
 [ 177  807]]


In [80]:
#Make predictions from dev set
X, Y = get_data_from_generator(val_gen, val_steps)    
y_pred = model.predict(X)
print_metric(Y, probs_to_binary_classes(y_pred))


F1 score: 0.081633
precision score: 0.045977
recall score: 0.363636
accuracy score: 0.891827
matthews_corrcoef: 0.098000

Confusion matrix:
[[738  83]
 [  7   4]]


In [81]:
#Make predictions for test set
X, Y = get_data_from_generator(test_gen, test_steps)    
y_pred = model.predict(X)
print_metric(Y, probs_to_binary_classes(y_pred))


F1 score: 0.000000
precision score: 0.000000
recall score: 0.000000
accuracy score: 0.854567
matthews_corrcoef: -0.067777

Confusion matrix:
[[711  91]
 [ 30   0]]
