In [1]:
from keras.preprocessing.text import Tokenizer
from keras.preprocessing.sequence import pad_sequences
from keras.layers import Dense, Input, LSTM, Embedding, Activation, GRU
from keras.layers import Dropout, SpatialDropout1D
from keras.layers import Bidirectional,TimeDistributed, concatenate
from keras.layers import GlobalMaxPool1D, GlobalAvgPool1D, Masking
from keras.models import Model,Sequential
from keras import initializers, regularizers, constraints, optimizers, layers
from keras import backend as K
from keras.callbacks import TensorBoard
from keras import metrics

Using TensorFlow backend.


In [2]:
import pandas as pd
import numpy as np
import scipy
import sys

In [4]:
from process_data import load_data,get_rank_of_size
from layers import AttentionLayer,MyMeanPool
from data_util import unpack_news_data,DATE_INTERVAL_NEWS,MAX_NEWS_NUM,EMBEDDING_SIZE

In [5]:
rank = get_rank_of_size()

In [6]:
data = load_data(rank[:1])

data of FB is loaded


In [7]:
data[rank[0]]['train'][0][0][0].toarray().shape

(50, 768)

# model

In [14]:
from keras.callbacks import Callback
from sklearn.metrics import f1_score, precision_score, recall_score

class Metrics(Callback):
    def on_train_begin(self, logs={}):
        self.val_f1s = []
        self.val_recalls = []
        self.val_precisions = []

    def on_epoch_end(self, epoch, logs={}):
        val_predict = np.argmax(np.asarray(self.model.predict(self.validation_data[0])), axis=1)
        val_targ = np.argmax(self.validation_data[1], axis=1)
        _val_f1 = f1_score(val_targ, val_predict, average='macro')
        self.val_f1s.append(_val_f1)
        print(' — val_f1:' ,_val_f1)
        return

In [8]:
def build_model(code='Default'):
    news_input = Input(shape=(DATE_INTERVAL_NEWS,MAX_NEWS_NUM,EMBEDDING_SIZE))    
    
    x = news_input
    x = TimeDistributed(Masking(mask_value=0.))(x)
    x = TimeDistributed(AttentionLayer())(x)
    x = TimeDistributed(Dropout(0.4))(x)
    
    x = GRU(100,return_sequences=True)(x)
    x = AttentionLayer()(x)
    x = Dropout(0.4)(x)
    
    x = Dense(2, activation='softmax')(x)
    model = Model(inputs=news_input,outputs=x)
    model.compile(loss='categorical_crossentropy', optimizer='adam',metrics=['accuracy'])
    return model

In [15]:
model = build_model()

In [16]:
print(model.summary())

_________________________________________________________________
Layer (type)                 Output Shape              Param #   
input_2 (InputLayer)         (None, 7, 50, 768)        0         
_________________________________________________________________
time_distributed_4 (TimeDist (None, 7, 50, 768)        0         
_________________________________________________________________
time_distributed_5 (TimeDist (None, 7, 768)            7700      
_________________________________________________________________
time_distributed_6 (TimeDist (None, 7, 768)            0         
_________________________________________________________________
gru_2 (GRU)                  (None, 7, 100)            260700    
_________________________________________________________________
attention_layer_4 (Attention (None, 100)               1020      
_________________________________________________________________
dropout_4 (Dropout)          (None, 100)               0         
__________

In [17]:
for i in range(1):
    model.fit(x=unpack_news_data(data[rank[i]]['train'][0]),y=data[rank[i]]['train'][2],
            batch_size=64,epochs=10,verbose=1,
            validation_data=(unpack_news_data(data[rank[i]]['val'][0]),data[rank[i]]['val'][2]),
            callbacks=[TensorBoard(log_dir='model_log/'),Metrics()])

Train on 1150 samples, validate on 56 samples
Epoch 1/10
 — val_f1: 0.2911392405063291
Epoch 2/10
  64/1150 [>.............................] - ETA: 3s - loss: 0.7338 - acc: 0.5000

  'precision', 'predicted', average, warn_for)


 — val_f1: 0.2911392405063291
Epoch 3/10
 — val_f1: 0.2911392405063291
Epoch 4/10
 — val_f1: 0.48059086224665065
Epoch 5/10
 — val_f1: 0.2911392405063291
Epoch 6/10
 — val_f1: 0.38596491228070173
Epoch 7/10
 — val_f1: 0.3242835595776772
Epoch 8/10
 — val_f1: 0.41477477477477476
Epoch 9/10
 — val_f1: 0.41477477477477476
Epoch 10/10
 — val_f1: 0.3242835595776772


In [None]:
model.evaluate(x=unpack_news_data(data[rank[0]]['val'][0]), y=data[rank[0]]['val'][2],batch_size=64)

# NUMERICAL MODEL

In [None]:
numerical_timestep = 20 #  correspond to the 'size' of  the window
attribute_num = 5 # Open/High/Low/AdjClose/Volume 

def build_numerical_model(code='Default'):
    numerical_input = Input(shape=(numerical_timestep,attribute_num))
    x = GRU(50,return_sequences=True)(numerical_input)
    x = Dropout(0.5)(x)
    x = GRU(50)(x)
    x = Dropout(0.5)(x)
    x = Dense(10)(x)
    x = Dense(2,activation='softmax')(x)
    model = Model(inputs=numerical_input,outputs=x)
    model.compile(loss='categorical_crossentropy',optimizer='adam',metrics=['accuracy'])
    return model

In [None]:
nmodel = build_numerical_model()

In [None]:
print(nmodel.summary())

In [None]:
nmodel.fit(x=data[rank[0]]['train'][1],y=data[rank[0]]['train'][2],batch_size=16,epochs=100,verbose=2,\
          validation_data=(data[rank[0]]['val'][1],data[rank[0]]['val'][2]),\
          callbacks=[TensorBoard(log_dir='num_log/')])