In [1]:
import sys
from google.colab import drive
drive.mount('/content/drive')
# to use the packages in google drive
sys.path.append('/content/drive/My Drive/6000M_proj2/proj2')

Drive already mounted at /content/drive; to attempt to forcibly remount, call drive.mount("/content/drive", force_remount=True).


In [2]:
from tensorflow import keras

In [3]:
# !pip install loguru

In [4]:
import yfinance as yf
import pandas as pd
import seaborn as sns
import numpy as np
import tensorflow as tf

from tensorflow import keras
from tensorflow.keras import layers
from keras.layers import Dense, Conv1D, Flatten, MaxPooling1D, Dropout, LeakyReLU, LSTM, BatchNormalization
from keras.models import Sequential
from sklearn.model_selection import train_test_split
from tqdm import tqdm
from pathlib import Path
from concurrent.futures import ProcessPoolExecutor
from scipy.stats import skew, kurtosis
from os.path import join

from config import *
from src.universe import Universe
from src.utils import time_series_generator
from src.metrics import plot_mse

In [5]:
# !pip install keras-tuner --upgrade

In [6]:
import keras_tuner

def build_model(hp):
    model = keras.Sequential()
    # Tune the number of layers.
    for i in range(hp.Int("num_layers", 1, 3)):
        model.add(
            Conv1D(
                # Tune number of units separately.
                filters=hp.Int(f"units_{i}", min_value=32, max_value=512, step=32),
                kernel_size = hp.Int(f"kernel_{i}", min_value=1, max_value=3, step=1),
                activation=hp.Choice("activation", ["relu", "tanh"]),
                padding='same',
                data_format="channels_last")
        )
    model.add(Flatten())
    model.add(layers.Dense(hp.Int("dense", min_value=32, max_value=512, step=32), activation="relu"))
    if hp.Boolean("dropout"):
        model.add(layers.Dropout(rate=hp.Int("dropout", min_value=1, max_value=9, step=1)/10))
    model.add(layers.Dense(2500))

    model.compile(
        optimizer='rmsprop',
        loss="mse",
        metrics=["mse"],
    )
    return model


build_model(keras_tuner.HyperParameters())

<keras.engine.sequential.Sequential at 0x7f06cc843fa0>

In [7]:
tuner = keras_tuner.RandomSearch(
    hypermodel=build_model,
    objective="val_loss",
    max_trials=10,
    executions_per_trial=10,
    overwrite=True,
    directory="my_dir",
    project_name="CNN1D",
)

In [8]:
tuner.search_space_summary()

Search space summary
Default search space size: 6
num_layers (Int)
{'default': None, 'conditions': [], 'min_value': 1, 'max_value': 3, 'step': 1, 'sampling': 'linear'}
units_0 (Int)
{'default': None, 'conditions': [], 'min_value': 32, 'max_value': 512, 'step': 32, 'sampling': 'linear'}
kernel_0 (Int)
{'default': None, 'conditions': [], 'min_value': 1, 'max_value': 3, 'step': 1, 'sampling': 'linear'}
activation (Choice)
{'default': 'relu', 'conditions': [], 'values': ['relu', 'tanh'], 'ordered': False}
dense (Int)
{'default': None, 'conditions': [], 'min_value': 32, 'max_value': 512, 'step': 32, 'sampling': 'linear'}
dropout (Boolean)
{'default': False, 'conditions': []}


In [9]:
inception_date = '2022-03-31'
ONE_YEAR_TRADE_DAYS = 252
TWO_YEAR_TRADE_DAYS = ONE_YEAR_TRADE_DAYS * 2
WIN_LEN = 30
UNIVERSE_SIZE = 2500
EPOCH = 20
BATCH_SIZE = 16
training_path = Path(join(data_path, 'train_set'))
ret_train = pd.read_csv(join(training_path, '2022-03-31.csv'), index_col=0)
X, y = time_series_generator(ret_train, WIN_LEN)
train_index = ret_train.loc[:inception_date].iloc[-TWO_YEAR_TRADE_DAYS:].index
X_train, X_val, y_train, y_val = train_test_split(X,y)
X_train.shape, X_val.shape, y_train.shape, y_val.shape

((378, 30, 2500), (126, 30, 2500), (378, 2500), (126, 2500))

In [10]:
tuner.search(X_train, y_train, epochs=2, validation_data=(X_val, y_val))

Trial 10 Complete [00h 01m 24s]
val_loss: 0.0027173767331987618

Best val_loss So Far: 0.0027173767331987618
Total elapsed time: 00h 17m 15s


In [11]:
models = tuner.get_best_models(num_models=2)
best_model = models[0]

In [13]:
best_model.build(input_shape=(None, 30, 2500))

In [14]:
best_model.summary()

Model: "sequential"
_________________________________________________________________
 Layer (type)                Output Shape              Param #   
 conv1d (Conv1D)             (None, 30, 32)            240032    
                                                                 
 conv1d_1 (Conv1D)           (None, 30, 512)           33280     
                                                                 
 conv1d_2 (Conv1D)           (None, 30, 320)           491840    
                                                                 
 flatten (Flatten)           (None, 9600)              0         
                                                                 
 dense (Dense)               (None, 64)                614464    
                                                                 
 dense_1 (Dense)             (None, 2500)              162500    
                                                                 
Total params: 1,542,116
Trainable params: 1,542,116
Non-

In [15]:
tuner.results_summary()

Results summary
Results in my_dir/CNN1D
Showing 10 best trials
Objective(name="val_loss", direction="min")

Trial 09 summary
Hyperparameters:
num_layers: 3
units_0: 32
kernel_0: 3
activation: relu
dense: 64
dropout: False
units_1: 512
kernel_1: 2
units_2: 320
kernel_2: 3
Score: 0.0027173767331987618

Trial 04 summary
Hyperparameters:
num_layers: 3
units_0: 96
kernel_0: 1
activation: relu
dense: 512
dropout: False
units_1: 32
kernel_1: 3
units_2: 320
kernel_2: 2
Score: 0.0027410766342654824

Trial 08 summary
Hyperparameters:
num_layers: 1
units_0: 448
kernel_0: 2
activation: tanh
dense: 64
dropout: False
units_1: 288
kernel_1: 2
units_2: 224
kernel_2: 3
Score: 0.002887316280975938

Trial 01 summary
Hyperparameters:
num_layers: 1
units_0: 256
kernel_0: 3
activation: relu
dense: 224
dropout: False
units_1: 256
kernel_1: 3
Score: 0.0029107048176229

Trial 05 summary
Hyperparameters:
num_layers: 1
units_0: 448
kernel_0: 3
activation: relu
dense: 384
dropout: True
units_1: 224
kernel_1: 1
un