In [24]:
import sys
from pathlib import Path
import pandas as pd 
import numpy as np
import keras
import math 
from keras.layers import LSTM, Dense, Dropout, LeakyReLU, GRU, BatchNormalization, Input, LayerNormalization
from keras.regularizers import l1, l2, l1_l2
from tensorflow.keras.optimizers import SGD
from keras.models import Sequential
from keras.callbacks import LearningRateScheduler, EarlyStopping
from keras.preprocessing.sequence import TimeseriesGenerator
sys.path.append(str(Path("../..").resolve()))
from src.constants import model_data_dir
from src.utils import use_target

In [4]:
df_train = pd.read_csv(model_data_dir / "train_classification.csv")
df_test = pd.read_csv(model_data_dir / "test_classification.csv")
test_dates = pd.to_datetime(df_test["date"])
df_train = df_train.drop(columns=["date"])
df_test = df_test.drop("date", axis=1)
df_train = df_train.loc[:, ~df_train.columns.str.contains("_mv_")]
df_test = df_test.loc[:, ~df_test.columns.str.contains("_mv_")]

## LSTM

In [5]:
win_length = 40
batch_size = 120 
num_features = df_train.shape[1] - 3

In [28]:
model = Sequential()
model.add(Input(shape=(win_length, num_features)))
model.add(LSTM(64, return_sequences=True, bias_initializer="zeros", unit_forget_bias=True, kernel_regularizer=l1(1e-4), recurrent_regularizer=l2(2e-4)))
model.add(LeakyReLU(alpha=0.5)) 
model.add(LayerNormalization())
model.add(Dropout(0.3)) 
model.add(LSTM(32, return_sequences=False, bias_initializer="zeros", unit_forget_bias=True, kernel_regularizer=l1(1e-4), recurrent_regularizer=l2(2e-4)))
model.add(LeakyReLU(alpha=0.5)) 
model.add(LayerNormalization())
model.add(Dropout(0.3)) 
model.add(Dense(1, activation="sigmoid", kernel_regularizer=l1_l2(1e-4, 2e-4)))

In [29]:
early_stop = EarlyStopping(monitor = "val_loss",
                           patience = 5)
initial_learning_rate = 0.001

def lr_exp_decay(epoch, lr):
    k = 0.1
    return initial_learning_rate * math.exp(-k*epoch)

model.compile(loss=keras.losses.binary_crossentropy,
              optimizer=SGD(learning_rate=initial_learning_rate, momentum=0.2),
              metrics=["accuracy"])

## size

In [30]:
y_train_size, x_train_size = use_target(df_train, "sc_1d_fwd_rel_d", "classification")
y_test_size, x_test_size = use_target(df_test, "sc_1d_fwd_rel_d", "classification")

train_generator_size = TimeseriesGenerator(x_train_size, y_train_size, length=win_length, sampling_rate=1, batch_size=batch_size)
test_generator_size = TimeseriesGenerator(x_test_size, y_test_size, length=win_length, sampling_rate=1, batch_size=batch_size)

In [31]:
history = model.fit(train_generator_size, epochs=50,
                    validation_data=test_generator_size,
                    shuffle=False,
                    callbacks=[LearningRateScheduler(lr_exp_decay, verbose=1), early_stop])


Epoch 1: LearningRateScheduler setting learning rate to 0.001.
Epoch 1/50

Epoch 2: LearningRateScheduler setting learning rate to 0.0009048374180359595.
Epoch 2/50

Epoch 3: LearningRateScheduler setting learning rate to 0.0008187307530779819.
Epoch 3/50

Epoch 4: LearningRateScheduler setting learning rate to 0.0007408182206817179.
Epoch 4/50

Epoch 5: LearningRateScheduler setting learning rate to 0.0006703200460356394.
Epoch 5/50

Epoch 6: LearningRateScheduler setting learning rate to 0.0006065306597126335.
Epoch 6/50

Epoch 7: LearningRateScheduler setting learning rate to 0.0005488116360940264.
Epoch 7/50


In [21]:
preds = model.predict(test_generator_size).flatten()
true = y_test_size[0:len(y_test_size) - 40]
np.mean(np.where(preds, ))

0.0

## momentum

In [16]:
y_train_mom, x_train_mom = use_target(df_train, "mom_1d_fwd_rel_d", "classification")
y_test_mom, x_test_mom = use_target(df_test, "mom_1d_fwd_rel_d", "classification")

train_generator_mom = TimeseriesGenerator(x_train_mom, y_train_mom, length=win_length, sampling_rate=1, batch_size=batch_size)
test_generator_mom = TimeseriesGenerator(x_test_mom, y_test_mom, length=win_length, sampling_rate=1, batch_size=batch_size)

In [17]:
history = model.fit(train_generator_mom, epochs=50,
                    validation_data=test_generator_mom,
                    shuffle=False,
                    callbacks=[LearningRateScheduler(lr_exp_decay, verbose=1), early_stop])


Epoch 1: LearningRateScheduler setting learning rate to 0.001.
Epoch 1/50

Epoch 2: LearningRateScheduler setting learning rate to 0.0009048374180359595.
Epoch 2/50

Epoch 3: LearningRateScheduler setting learning rate to 0.0008187307530779819.
Epoch 3/50

Epoch 4: LearningRateScheduler setting learning rate to 0.0007408182206817179.
Epoch 4/50

Epoch 5: LearningRateScheduler setting learning rate to 0.0006703200460356394.
Epoch 5/50

Epoch 6: LearningRateScheduler setting learning rate to 0.0006065306597126335.
Epoch 6/50

Epoch 7: LearningRateScheduler setting learning rate to 0.0005488116360940264.
Epoch 7/50

Epoch 8: LearningRateScheduler setting learning rate to 0.0004965853037914095.
Epoch 8/50

Epoch 9: LearningRateScheduler setting learning rate to 0.0004493289641172216.
Epoch 9/50

Epoch 10: LearningRateScheduler setting learning rate to 0.00040656965974059914.
Epoch 10/50

Epoch 11: LearningRateScheduler setting learning rate to 0.00036787944117144236.
Epoch 11/50

Epoch 12

## value

In [18]:
y_train_value, x_train_value = use_target(df_train, "value_1d_fwd_rel_d", "classification")
y_test_value, x_test_value = use_target(df_test, "value_1d_fwd_rel_d", "classification")

train_generator_value = TimeseriesGenerator(x_train_value, y_train_value, length=win_length, sampling_rate=1, batch_size=batch_size)
test_generator_value = TimeseriesGenerator(x_test_value, y_test_value, length=win_length, sampling_rate=1, batch_size=batch_size)

In [21]:
model.compile(loss=keras.losses.binary_crossentropy,
              optimizer=SGD(learning_rate=initial_learning_rate, momentum=0.2),
              metrics=["accuracy"])
history = model.fit(train_generator_value, epochs=50,
                    validation_data=test_generator_value,
                    shuffle=False,
                    callbacks=[LearningRateScheduler(lr_exp_decay, verbose=1), early_stop])


Epoch 1: LearningRateScheduler setting learning rate to 0.001.
Epoch 1/50

Epoch 2: LearningRateScheduler setting learning rate to 0.0009048374180359595.
Epoch 2/50

Epoch 3: LearningRateScheduler setting learning rate to 0.0008187307530779819.
Epoch 3/50

Epoch 4: LearningRateScheduler setting learning rate to 0.0007408182206817179.
Epoch 4/50

Epoch 5: LearningRateScheduler setting learning rate to 0.0006703200460356394.
Epoch 5/50

Epoch 6: LearningRateScheduler setting learning rate to 0.0006065306597126335.
Epoch 6/50

Epoch 7: LearningRateScheduler setting learning rate to 0.0005488116360940264.
Epoch 7/50

Epoch 8: LearningRateScheduler setting learning rate to 0.0004965853037914095.
Epoch 8/50

Epoch 9: LearningRateScheduler setting learning rate to 0.0004493289641172216.
Epoch 9/50

Epoch 10: LearningRateScheduler setting learning rate to 0.00040656965974059914.
Epoch 10/50

Epoch 11: LearningRateScheduler setting learning rate to 0.00036787944117144236.
Epoch 11/50

Epoch 12