In [1]:
import sys
import os
import pickle
import pandas as pd
import json
import datetime
import time
import copy
import numpy as np

py_file_location = '../PrivatePackages'
sys.path.append(os.path.abspath(py_file_location))
from sklearn.model_selection import train_test_split

import JiaoCheng
import NingXiang



In [2]:
LABEL = 'logret'
MODEL = 'tcnnr'

In [3]:
curated_data = pd.read_csv('../data/curated_data.csv')
curated_data = curated_data[['date', 
                        'stock',
                        'log_ret',
                        'closePrice_lag_1', 
                        'log_ret_normalised_by_day_lag_1', 
                        'closePriceNorm_lag_1', 
                        'log_ret_lag_1', 
                        'mean_log_ret_lag_1',
                        'mean_closePrice_lag_1',
                        'mean_closePriceNorm_lag_1',
                        'mean_log_ret_normalised_by_day_lag_1', 
                        'pos_log_ret_lag_1',
                        'pos_closePrice_lag_1',
                        'pos_closePriceNorm_lag_1',
                        'pos_log_ret_normalised_by_day_lag_1']]
curated_data['date'] = pd.to_datetime(curated_data['date'])

In [4]:
date = [datetime.datetime(year=2022, month=12, day = 31) + datetime.timedelta(days=i) for i in range(250)]
val_date_start = date[175]
test_date_start = date[175+38]

In [5]:
curated_data['date'] = pd.to_datetime(curated_data['date'])
curated_data = curated_data.dropna()

train = curated_data[(curated_data['date'] < val_date_start)]
val = curated_data[(curated_data['date'] < test_date_start) & (curated_data['date'] >= val_date_start)]
test = curated_data[(curated_data['date'] >= test_date_start)]

In [6]:
def data_factory(data, lag):
    x_list, y_list = list(), list()

    for id, stock_data in data.groupby('stock'):
        stock_data.sort_values(by='date', inplace=True)
        stock_data.drop(['stock', 'date'], axis=1, inplace=True)
        stock_data.index = range(len(stock_data))

        for i in range(len(stock_data)):
            
            if i < lag-1:
                continue

            x = stock_data.iloc[i-lag+1:i+1].values.copy()
            x[-1:, 0] = 0 # mask last day's return
            y = stock_data.loc[i]['log_ret']
            
            x_list.append(x)
            y_list.append(y)
    
    return x_list, y_list

In [7]:
LAG = 5

train_x, train_y = data_factory(train, LAG)
val_x, val_y = data_factory(val, LAG)
test_x, test_y = data_factory(test, LAG)

In [8]:
jiaocheng = JiaoCheng.JiaoCheng()

from models.TemporalConvolutionalNeuralNetwork_pt import TemporalConvolutionalNeuralNetworkRegressor_pt as TCNNR

jiaocheng.read_in_model(TCNNR, 'Regression')

jiaocheng.read_in_data(train_x, train_y, val_x, val_y, test_x, test_y)

parameter_choices = {

    'cnn_n_hidden_layers': [1, 2, 3],
    'output_channels_per_input_channel': [1, 2, 3, 4],
    'convolution_kernel_dim': [2, 3],
    'activation': ['relu', 'sigmoid', 'tanh'],
    'pool_kernel': [2, 3],
    'dense_n_hidden_layers': [1, 2, 3],
    'dense_hidden_layer_embed_dim': [10, 20, 30, 40],
    'dropout_prob': [0.1, 0.2, 0.3, 0.4, 0.5],
    'batch_size': [32, 64, 128, 256],
    'learning_rate': [0.01, 0.001, 0.0001],
    'dense_layer_type' : ['Dense', 'Residual'],
    'batch_normalisation' : [False, True]

}

jiaocheng.set_hyperparameters(parameter_choices)

jiaocheng.set_non_tuneable_hyperparameters({
    'num_epochs' : 200,
    'verbose' : True,
    'loss_function':'MSE',
    'grad_clip' : False,
    'eval_metric' : 'R2',
    'random_state' : 19260817,
    'lookback_window_size' : LAG,
    'convolution_stride' : 1,
    'pool_type': 'MaxPool',})

jiaocheng.set_tuning_order(['cnn_n_hidden_layers', 
                            'output_channels_per_input_channel',
                            'convolution_kernel_dim',
                            'pool_kernel',
                            'dense_n_hidden_layers',
                            'dense_hidden_layer_embed_dim',
                            'batch_normalisation',
                            'dense_layer_type',
                            'activation',
                            'batch_size',
                            'dropout_prob',
                            'learning_rate'])

jiaocheng.set_hyperparameter_default_values({
    'cnn_n_hidden_layers': 1,
    'output_channels_per_input_channel': 2,
    'convolution_kernel_dim': 2,
    'activation': 'relu',
    'pool_kernel': 2,
    'dense_n_hidden_layers': 1,
    'dense_hidden_layer_embed_dim': 10,
    'dropout_prob': 0.1,
    'batch_size': 32,
    'learning_rate': 0.01,
    'dense_layer_type' : 'Dense',
    'batch_normalisation' : False}
)

try:
    jiaocheng.read_in_tuning_result_df(f'../models/tuning/jiaocheng_{MODEL}_{LABEL}.csv')
except:
    pass

jiaocheng.set_tuning_result_saving_address(f'../models/tuning/jiaocheng_{MODEL}_{LABEL}.csv')
jiaocheng.set_best_model_saving_address(f'../models/tmp_models/jiaocheng_{MODEL}_{LABEL}.pickle')

jiaocheng.tune()

jiaocheng.view_best_combo_and_score()

print('\n\n')

JiaoCheng Initialised
Successfully read in model <class 'models.TemporalConvolutionalNeuralNetwork_pt.TemporalConvolutionalNeuralNetworkRegressor_pt'>, which is a Regression model
Read in Train X data
Read in Train y data
Read in Val X data
Read in Val y data
Read in Test X data
Read in Test y data
Successfully recorded hyperparameter choices
Successfully recorded non_tuneable_hyperparameter choices
Successfully set tuning output address
Successfully set best model output address

Default combo: [0, 1, 0, 0, 0, 0, 0, 0, 0, 2, 0, 0] 


ROUND 1

Round 1 
Hyperparameter: cnn_n_hidden_layers (index: 0) 



TypeError: empty(): argument 'size' must be tuple of SymInts, but found element of type numpy.float64 at pos 2