In [15]:
import pandas as pd
import numpy as np
from keras import Input,backend
from keras.models import Model
from keras.layers import *
from keras.callbacks import Callback
from sklearn.metrics import confusion_matrix, f1_score, precision_score, recall_score, accuracy_score, matthews_corrcoef

In [16]:
dataset = pd.read_csv("../datasets/train_test_data.csv",
                           header=0, parse_dates=[0], index_col=0)
label_strt_index = -1
data, labels  = dataset.iloc[:,:label_strt_index], dataset.iloc[:,label_strt_index:]

In [23]:
dataset[dataset["Past 20-day events"] > 0]

Unnamed: 0,^SSEC,^GDAXHI,^PSI,^HSI,^TWII,^GSPC,^KLSE,^AORD,^N225,^SET,...,USLTGBY,TWINDEXSCORE,TWTRADESURPLUS,TWRIR,TWFERC,TWDINDEX,TWM2YOY,Past 5-day events,Past 20-day events,Crisis in next 10 days
1997-02-27,0.470925,0.447339,0.410245,0.422330,0.595548,0.374391,0.470603,0.482441,0.452704,0.483013,...,0.672196,0.761905,0.454044,0.594583,0.373358,0.407287,0.300396,0.333333,0.166667,0.0
1997-02-28,0.575238,0.396141,0.407266,0.387672,0.538787,0.412342,0.501103,0.492838,0.350665,0.456254,...,0.672196,0.761905,0.454044,0.594583,0.373358,0.407287,0.300396,0.333333,0.166667,0.0
1997-03-01,0.612154,0.408283,0.432117,0.446161,0.600191,0.465813,0.463060,0.450593,0.419327,0.456254,...,0.672196,0.761905,0.454044,0.594583,0.373358,0.407287,0.300396,0.333333,0.166667,0.0
1997-03-03,0.612154,0.408283,0.432117,0.446161,0.401271,0.465813,0.463060,0.450593,0.419327,0.456254,...,0.649267,0.170507,0.387853,0.590024,0.373323,0.424107,0.258919,0.333333,0.166667,0.0
1997-03-04,0.525695,0.531801,0.408308,0.408220,0.569612,0.411679,0.457613,0.570574,0.475192,0.455121,...,0.649267,0.170507,0.387853,0.590024,0.373323,0.424107,0.258919,0.333333,0.166667,0.0
1997-03-05,0.425558,0.447163,0.415655,0.412257,0.556535,0.505979,0.490189,0.540003,0.384984,0.525376,...,0.649267,0.170507,0.387853,0.590024,0.373323,0.424107,0.258919,0.000000,0.166667,0.0
1997-03-06,0.631096,0.435908,0.413549,0.422524,0.554819,0.417525,0.476742,0.581055,0.396426,0.514014,...,0.649267,0.170507,0.387853,0.590024,0.373323,0.424107,0.258919,0.000000,0.166667,0.0
1997-03-07,0.494769,0.441620,0.433766,0.403136,0.572953,0.477209,0.483569,0.510361,0.480585,0.699265,...,0.649267,0.170507,0.387853,0.590024,0.373323,0.424107,0.258919,0.000000,0.166667,0.0
1997-03-08,0.604456,0.424088,0.429793,0.405426,0.515678,0.490579,0.456507,0.608263,0.428041,0.699265,...,0.649267,0.170507,0.387853,0.590024,0.373323,0.424107,0.258919,0.000000,0.166667,0.0
1997-03-10,0.604456,0.424088,0.429793,0.405426,0.490120,0.490579,0.456507,0.608263,0.428041,0.699265,...,0.649267,0.170507,0.387853,0.590024,0.373323,0.424107,0.258919,0.000000,0.166667,1.0


In [6]:
def print_metric(y_true, y_pred):
    print('F1 score: %f' % f1_score(y_true, y_pred))
    print('precision score: %f' % precision_score(y_true, y_pred))
    print('recall score: %f' % recall_score(y_true, y_pred))
    print('accuracy score: %f' % accuracy_score(y_true, y_pred))
    print('matthews_corrcoef: %f' % matthews_corrcoef(y_true, y_pred))
    print('\nConfusion matrix:')
    print(confusion_matrix(y_true, y_pred, labels=[0,1]))

In [7]:
def train_dev_test_split(dataset, train_ratio, dev_ratio, test_ratio):
    size_of_data = dataset.shape[0]
    train_data, temp = dataset[:int(size_of_data* train_ratio)], dataset[int(size_of_data* train_ratio):] 
 
    #split for dev and test sets
    dev_split_ratio = dev_ratio/(dev_ratio+test_ratio)
    dev_data, test_data = temp[:int(len(temp)*dev_split_ratio)], temp[int(len(temp)*dev_split_ratio):]
    
    return train_data, dev_data, test_data

In [8]:
def predict_classes(model, X):
    """
    Input
        model: keras model
        X: input data

    Output
        2D numpy array of shape (X.shape[0], 1)
    """
    y_pred = model.predict(X)
    
    #convert probs to binary classes
    y_pred = np.where(y_pred > 0.5, 1, 0)
    
    return y_pred

In [9]:
def print_unique_counts(x):
    unique, counts = np.unique(x, return_counts=True)
    print(np.asarray((unique, counts)).T)


In [10]:
#Split data
DEV_RATIO, TEST_RATIO = 0.15, 0.15
TRAIN_RATIO = 1-DEV_RATIO-TEST_RATIO

train_data, dev_data, test_data = train_dev_test_split(data, TRAIN_RATIO, DEV_RATIO, TEST_RATIO)
train_label, dev_label, test_label = train_dev_test_split(labels, TRAIN_RATIO, DEV_RATIO, TEST_RATIO)

In [11]:
input_dim = train_data.shape[1]


#Build layers
econ_indicator_input = Input(shape=(input_dim,))
x = Dense(32,activation='relu')(econ_indicator_input)
x = Dense(16,activation='relu')(x)

#output activations
crisis_in_next_N_days = [Dense(1, activation='sigmoid')(x)]

#compile the model
model = Model(econ_indicator_input, crisis_in_next_N_days)
model.compile(optimizer='adam', loss='binary_crossentropy')


print(model.summary())
#fit the model

model.fit(train_data, train_label, epochs=10, batch_size=64, verbose=1)


_________________________________________________________________
Layer (type)                 Output Shape              Param #   
input_1 (InputLayer)         (None, 169)               0         
_________________________________________________________________
dense_1 (Dense)              (None, 32)                5440      
_________________________________________________________________
dense_2 (Dense)              (None, 16)                528       
_________________________________________________________________
dense_3 (Dense)              (None, 1)                 17        
Total params: 5,985
Trainable params: 5,985
Non-trainable params: 0
_________________________________________________________________
None
Epoch 1/10
Epoch 2/10
Epoch 3/10
Epoch 4/10
Epoch 5/10
Epoch 6/10
Epoch 7/10
Epoch 8/10
Epoch 9/10
Epoch 10/10


<keras.callbacks.History at 0x7f400c246d68>

In [12]:
#Make predictions for train set
y_pred = predict_classes(model,train_data)
print_metric(train_label, y_pred)

F1 score: 0.194660
precision score: 0.638418
recall score: 0.114837
accuracy score: 0.763531
matthews_corrcoef: 0.195049

Confusion matrix:
[[2906   64]
 [ 871  113]]


In [13]:
#Make predictions from dev set
y_pred = predict_classes(model,dev_data)
print_metric(dev_label, y_pred)


F1 score: 0.000000
precision score: 0.000000
recall score: 0.000000
accuracy score: 0.981132
matthews_corrcoef: -0.008829

Confusion matrix:
[[832   5]
 [ 11   0]]


In [14]:
#Make predictions for test set
y_pred = predict_classes(model,test_data)
print_metric(test_label, y_pred)

F1 score: 0.000000
precision score: 0.000000
recall score: 0.000000
accuracy score: 0.964623
matthews_corrcoef: 0.000000

Confusion matrix:
[[818   0]
 [ 30   0]]


  'precision', 'predicted', average, warn_for)
  'precision', 'predicted', average, warn_for)
  mcc = cov_ytyp / np.sqrt(cov_ytyt * cov_ypyp)
