In [1]:
import sys
import os
import pickle
import pandas as pd
import json
import datetime
import time
import copy
import numpy as np

py_file_location = '../PrivatePackages'
sys.path.append(os.path.abspath(py_file_location))
from sklearn.model_selection import train_test_split

import JiaoCheng
import NingXiang



In [2]:
LABEL = 'trend'
MODEL = 'lstmc'

In [3]:
curated_data = pd.read_csv('../data/curated_data.csv')
curated_data = curated_data[['date', 
                        'stock',
                        'trend',
                        'closePrice_lag_1', 
                        'log_ret_normalised_by_day_lag_1', 
                        'closePriceNorm_lag_1', 
                        'log_ret_lag_1', 
                        'mean_log_ret_lag_1',
                        'mean_closePrice_lag_1',
                        'mean_closePriceNorm_lag_1',
                        'mean_log_ret_normalised_by_day_lag_1', 
                        'pos_log_ret_lag_1',
                        'pos_closePrice_lag_1',
                        'pos_closePriceNorm_lag_1',
                        'pos_log_ret_normalised_by_day_lag_1']]
curated_data['date'] = pd.to_datetime(curated_data['date'])

In [4]:
date = [datetime.datetime(year=2022, month=12, day = 31) + datetime.timedelta(days=i) for i in range(250)]
val_date_start = date[175]
test_date_start = date[175+38]

In [5]:
curated_data['date'] = pd.to_datetime(curated_data['date'])
curated_data = curated_data.dropna()

train = curated_data[(curated_data['date'] < val_date_start)]
val = curated_data[(curated_data['date'] < test_date_start) & (curated_data['date'] >= val_date_start)]
test = curated_data[(curated_data['date'] >= test_date_start)]

In [6]:
def data_factory(data, lag):
    x_list, y_list = list(), list()

    for id, stock_data in data.groupby('stock'):
        stock_data.sort_values(by='date', inplace=True)
        stock_data.drop(['stock', 'date'], axis=1, inplace=True)
        stock_data.index = range(len(stock_data))

        for i in range(len(stock_data)):
            
            if i < lag-1:
                continue

            x = stock_data.iloc[i-lag+1:i+1].values.copy()
            x[-1:, 0] = 0 # mask last day's return
            y = stock_data.loc[i]['trend']
            
            x_list.append(x)
            y_list.append(y)
    
    return x_list, y_list

In [7]:
LAG = 5

train_x, train_y = data_factory(train, LAG)
val_x, val_y = data_factory(val, LAG)
test_x, test_y = data_factory(test, LAG)

In [9]:
jiaocheng = JiaoCheng.JiaoCheng()

from models.LSTM import LSTMC_pt as LSTMC

jiaocheng.read_in_model(LSTMC, 'Classification')

jiaocheng.read_in_data(train_x, train_y, val_x, val_y, test_x, test_y)

parameter_choices = {
    'lstm_hidden_layer_n_neurons': [6, 12, 25, 50, 100],
    'lstm_n_hidden_layers': [1, 2, 3],
    'bi_lstm': [False, True],
    'n_hidden_layers': [1, 2, 3],
    'batch_size': [64, 128, 256, 512],
    'learning_rate': [0.001, 0.01, 0.1, 1],
    'dense_hidden_layer_n_neurons': [6, 12, 25, 50, 100],
    'activation': ['relu', 'sigmoid', 'tanh', 'softmax'],
    'dropout_prob': [0, 0.1, 0.3, 0.5],
    'batch_normalisation': [False, True],
    'dense_layer_type': ['Dense', 'Residual'],
    'attention_num_heads': [0, 1],

}

jiaocheng.set_hyperparameters(parameter_choices)

jiaocheng.set_non_tuneable_hyperparameters({'verbose' : True, 'random_state' : 19260817, 'loss_function':'CrossEntropy', 'num_epochs':1000, 'grad_clip': False})

jiaocheng.set_tuning_order(['lstm_n_hidden_layers', 'n_hidden_layers', 'lstm_hidden_layer_n_neurons', 'dense_hidden_layer_n_neurons', 
                            'bi_lstm', 'attention_num_heads', 'activation', 'dense_layer_type',
                            'dropout_prob', 'batch_size', 'batch_normalisation', 'learning_rate'])

jiaocheng.set_hyperparameter_default_values({
    'lstm_hidden_layer_n_neurons': 6,
    'lstm_n_hidden_layers': 1,
    'bi_lstm': False,
    'n_hidden_layers': 1,
    'batch_size': 128,
    'learning_rate': 0.01,
    'dense_hidden_layer_n_neurons': 6,
    'activation': 'relu',
    'dropout_prob': 0.1,
    'batch_normalisation': False,
    'dense_layer_type': 'Dense',
    'attention_num_heads': 0}
)

try:
    jiaocheng.read_in_tuning_result_df(f'../models/tuning/jiaocheng_{MODEL}_{LABEL}.csv')
except:
    pass

jiaocheng.set_tuning_result_saving_address(f'../models/tuning/jiaocheng_{MODEL}_{LABEL}.csv')
jiaocheng.set_best_model_saving_address(f'../models/tmp_models/jiaocheng_{MODEL}_{LABEL}.pickle')

jiaocheng.tune()

print('\n\n')

JiaoCheng Initialised
Successfully read in model <class 'models.LSTM.LSTMC_pt'>, which is a Classification model
Read in Train X data
Read in Train y data
Read in Val X data
Read in Val y data
Read in Test X data
Read in Test y data
