In [None]:
import pandas as pd
import numpy as np

import keras
from keras.models import Sequential
from keras.utils import to_categorical
from keras.layers.recurrent import LSTM
from keras_self_attention import SeqSelfAttention
from keras.layers.core import Activation, Dense, Dropout
from keras.callbacks import EarlyStopping

from sklearn.preprocessing import MinMaxScaler

In [None]:
df = pd.read_csv('processed_data.csv')
tickers = df['Ticker'].unique()
df.set_index(['Ticker', 'Date'], inplace=True)

In [None]:
test = df.loc[804][35:]
test.insert(0, 'Change_Indicator', np.where(test['ROCP_Close'] >= 0, 1, 0))
dimensions = len(test.columns)
del test

In [None]:
def load_data(df, sequence_length=10):
    data_all = np.array(df).astype(float)
    scaler = MinMaxScaler()
    data_all = scaler.fit_transform(data_all)
    data = []
    for i in range(len(data_all) - sequence_length - 1):
        data.append(data_all[i: i + sequence_length + 1])
    reshaped_data = np.array(data).astype('float64')
    np.random.shuffle(reshaped_data)
    
    x = reshaped_data[:, :-1]
    y = reshaped_data[:, -1]
    split_boundary = reshaped_data.shape[0] - 700
    train_x = x[:split_boundary]
    test_x = x[split_boundary:]

    train_y = y[:split_boundary, 0]
    test_y = y[split_boundary:, 0]
    return train_x, train_y, test_x, test_y, scaler

In [None]:
def build_model(using_attention = False):
    model = Sequential()
    if using_attention:
        model.add(SeqSelfAttention(attention_activation='sigmoid'))
        model.add(Dropout(.2))
    model.add(LSTM(200, input_dim=dimensions, return_sequences=True))
    model.add(Dropout(.2))
    model.add(LSTM(200, return_sequences=False))
    model.add(Dropout(.2))
    model.add(Dense(200))
    model.add(Dropout(.2))
    model.add(Dense(2, activation='softmax'))
    model.add(Activation('linear'))

    model.compile(loss='binary_crossentropy', optimizer='adam', metrics=['accuracy'])
    return model

In [None]:
def train_model(train_x, train_y, test_x, test_y, using_attention = False):
    model = build_model(using_attention)

    try:
        labels = to_categorical(train_y, num_classes=2)
        callbacks = [EarlyStopping(monitor='loss', min_delta=0.001, patience=3, verbose=0, mode='auto')]
        model.fit(train_x, labels, batch_size=512, nb_epoch=300, callbacks=callbacks, verbose=0, validation_split=0.3)
        predict = model.predict(test_x)
        predict = np.reshape(predict, (predict.size, ))
    except KeyboardInterrupt:
        print(predict)
        print(test_y)
        
    return predict, test_y, callbacks[0].stopped_epoch

In [None]:
model_results = pd.DataFrame(columns=['name', 'ticker', 'accuracy', 'total_change', 'predicted_investment', 'early_stop', 'time_in_seconds'])
model_results.set_index(['name', 'ticker'], inplace=True)

In [None]:
for tick in tickers:
    df_stock = df.loc[tick][35:]
    df_stock = df_stock.replace([np.inf, -np.inf], np.nan)
    df_stock = df_stock.dropna()
    df_stock = df_stock[df_stock['Volume'] > 0]
    
    if len(df_stock) < 1060:
        print("Not enough data for " + str(tick))
        continue
        
    df_stock.insert(0, 'Change_Indicator', np.where(df_stock['ROCP_Close'] >= 0, 1, 0))
    
    for using_attention in (False, True):
        name = 'Attention-LSTM' if using_attention else 'LSTM'
        print("Starting " + name + " " + str(tick))
        
        train_x, train_y, test_x, test_y, scaler = load_data(df_stock.copy())
        train_x = np.reshape(train_x, (train_x.shape[0], train_x.shape[1], dimensions))
        test_x = np.reshape(test_x, (test_x.shape[0], test_x.shape[1], dimensions))
        predict_y, test_y, early_stop_time = train_model(train_x, train_y, test_x, test_y, using_attention)

        indicator = list()
        count = 0
        for i in range(1, len(predict_y), 2):
            count += 1
            indicator.append(1 if predict_y[i] >= predict_y[i - 1] else 0)

        correct = 0
        comp = list()
        test_position = 1
        
        for i in range(len(indicator)):
            if indicator[i] == test_y[i]:
                correct += 1
                comp.append(1)
            else:
                comp.append(0)

            daily_change = (df_stock['Close'].iloc[-700 + i] - df_stock['Open'].iloc[-700 + i])/df_stock['Open'].iloc[-700 + i]
            test_position *= 1 + (daily_change * (1 if indicator[i] == 1 else -1))

        total_change = df_stock['Close'][-1]/df_stock['Close'][-700]
        model_results.loc[(name, tick), ('accuracy', 'total_change', 'predicted_investment', 'early_stop')] = [correct/len(indicator), total_change, test_position, early_stop_time]

In [None]:
model_results.to_csv('keras_validation_results.csv')