In [1]:
from classes.Ann import Ann
from classes.DataLoader import DataLoader
from classes.DataLoader_batch import DataLoader_batch
from classes.Simulation import Simulation

import sklearn
from sklearn.ensemble import RandomForestRegressor
from sklearn.svm import SVR
from sklearn.ensemble import GradientBoostingRegressor

import pandas as pd
import numpy as np

from utils.init_functions import random_normal
from arch import arch_model

import warnings
warnings.filterwarnings("ignore")


  from .autonotebook import tqdm as notebook_tqdm


### Data

In [2]:
index = "S&P500"
data = pd.read_csv("data/dataset.csv", index_col=0)
data.index = pd.to_datetime(data.index)
df_trv = data.rolling(5).std()
df_trv.index = pd.to_datetime(df_trv.index)
df_trv = df_trv.iloc[4:]
dfe = df_trv[index].dropna() 
mi = dfe.min()
ma = dfe.max()
dfe = (dfe - mi)/(ma-mi)
dfe_r = data["S&P500"][dfe.index]

am = arch_model(dfe_r.values, p=1, q=1)
res = am.fit(disp='off')
dfe_garch = pd.Series(res.conditional_volatility, index=dfe_r.index)
mi_garch = dfe_garch.min()
ma_garch = dfe_garch.max()
dfe_garch = (dfe_garch - mi_garch)/(ma_garch-mi_garch)

am = arch_model(dfe_r.values, vol='EGARCH' , p = 1 , o = 0 , q = 1)
res = am.fit(disp='off')
dfe_egarch = pd.Series(res.conditional_volatility, index=dfe_r.index)
mi_egarch = dfe_egarch.min()
ma_egarch = dfe_egarch.max()
df_egarch = (dfe_egarch - mi_egarch)/(ma_egarch-mi_egarch)

periods = {
    "2000-2007":"2008",
    "2001-2008":"2009",
    "2002-2009":"2010",
    "2009-2016":"2017",
    "2010-2017":"2018",
}

params_stacked = {
    "2000-2007":[0.0033,0],
    "2001-2008":[0.0059, 0.01],
    "2002-2009":[0.0136, 0],
    "2009-2016":[0.085, 0.02],
    "2010-2017":[0.01, 0.011],
}

params_ml = {
    "2000-2007":[10, 24, 1479, 0.003, 0.0001, 0.45],
    "2001-2008":[10, 107, 3000, 0.001, 0.0001, 0.55],
    "2002-2009":[1, 37, 3583, 0.001, 0.0004, 0.17],
    "2009-2016":[30, 118, 1000, 0.009, 0.0002, 0.13],
    "2010-2017":[7, 175, 1000, 0.003, 0.0001, 0.54],
}

### Stacked Ann

In [10]:
for period in periods:

    df = dfe[period[:4]:period[5:]]

    pivot_index_training_1 = round(df.shape[0] * 0.25)
    pivot_index_training_2 = round(df.shape[0] * 0.75)

    df_train_1 = df.iloc[:pivot_index_training_1]
    df_ann = df.iloc[pivot_index_training_1:]

    ite_train_1 = DataLoader(df=df_train_1, Y=df_train_1.values, window_size=30, batch_size=df_train_1.shape[0])
    for batch in ite_train_1: # Only one batch there
        X_train_1,y_train_1 = batch

    rf_rgs = RandomForestRegressor(n_estimators=params_ml[period][0], max_depth=params_ml[period][1])  # RandomForestRegressor(max_features = 10, min_samples_split = 24)   
    rf_rgs.fit(X_train_1,y_train_1)

    sv_rgs = SVR(kernel='rbf', gamma=params_ml[period][4], epsilon=params_ml[period][5])
    sv_rgs.fit(X_train_1,y_train_1)

    gb_rgs = GradientBoostingRegressor(learning_rate=params_ml[period][3], n_estimators=params_ml[period][2])  # GradientBoostingRegressor(learning_rate = 0.003, n_estimators=1479)
    gb_rgs.fit(X_train_1,y_train_1)

    ite = DataLoader(df=df_ann, Y=df_ann.values, window_size=30, batch_size=df_ann.shape[0])
    for batch in ite: # Only one batch there
        X,y = batch

    df_train_ann = pd.DataFrame(X, index=df_ann.index[30:])
    df_train_ann["rf"] = rf_rgs.predict(X)
    df_train_ann["sv"] = sv_rgs.predict(X)
    df_train_ann["gb"] = gb_rgs.predict(X)

    kwargs = dict(
        learning_rate = params_stacked[period][0],
        period = period,
        batch_size = df_train_ann.shape[0],
        num_epochs = 10000, 
        window_size = 30,
        weight_decay = 0,
        tab = df_train_ann.values,
        #l2_lambda = params_stacked[period][1],
        y = y,
    )

    sim = Simulation(**kwargs)
    sim.Ann.init_weights(random_normal)
    sim.make_dataloaders(pivot_index=pivot_index_training_2-pivot_index_training_1)
    sim.train(verbose=1)
    sim.Ann.save("models/torch/stacked_ann/{}".format(period))

Train loss: 0.0366 | Test loss: 0.0746: 100%|██████████| 10000/10000 [01:22<00:00, 121.55it/s]
Train loss: 0.0256 | Test loss: 0.5177: 100%|██████████| 10000/10000 [01:14<00:00, 133.68it/s]
Train loss: 0.0307 | Test loss: 1.9208: 100%|██████████| 10000/10000 [01:16<00:00, 131.40it/s]
Train loss: 0.0176 | Test loss: 0.1917: 100%|██████████| 10000/10000 [01:16<00:00, 130.85it/s]
Train loss: 0.0340 | Test loss: 0.0425: 100%|██████████| 10000/10000 [01:18<00:00, 127.22it/s]


### Feed forward-Ann

In [95]:
for period in periods:

    df = dfe[period[:4]:period[5:]]

    pivot_index_training_1 = round(df.shape[0] * 0.25)
    pivot_index_training_2 = round(df.shape[0] * 0.75)

    df_ann = df.iloc[pivot_index_training_1:]

    ite = DataLoader(df=df_ann, Y=df_ann.values, window_size=30, batch_size=df_ann.shape[0])
    for batch in ite: # Only one batch there
        X,y = batch

    kwargs = dict(
        learning_rate = params_stacked[period][0],
        period = period,
        batch_size = len(X),
        num_epochs = 10000, 
        window_size = 30,
        weight_decay = 0,
        #l2_lambda = params_stacked[period][1],
        tab = X,
        y = y,
    )

    sim = Simulation(**kwargs)
    sim.Ann.init_weights(random_normal)
    sim.make_dataloaders(pivot_index=pivot_index_training_2-pivot_index_training_1)
    sim.train(verbose=1)
    sim.Ann.save("models/torch/feed_forward_ann/{}".format(period))

Train loss: 0.0380 | Test loss: 0.0613: 100%|██████████| 10000/10000 [01:10<00:00, 142.01it/s]
Train loss: 0.0249 | Test loss: 0.4544: 100%|██████████| 10000/10000 [01:15<00:00, 132.71it/s]
Train loss: 0.0266 | Test loss: 3.7517: 100%|██████████| 10000/10000 [01:15<00:00, 131.89it/s]
Train loss: 0.0171 | Test loss: 0.2022: 100%|██████████| 10000/10000 [01:17<00:00, 129.76it/s]
Train loss: 0.0394 | Test loss: 0.0402: 100%|██████████| 10000/10000 [01:12<00:00, 137.30it/s]


### Ann-Garch

In [107]:
for period in periods:

    df = dfe[period[:4]:period[5:]]
    df_garch = dfe_garch[period[:4]:period[5:]]

    pivot_index_training_1 = round(df.shape[0] * 0.25)
    pivot_index_training_2 = round(df.shape[0] * 0.75)

    df_ann = df.iloc[pivot_index_training_1:]
    df_ann_garch = df_garch.iloc[pivot_index_training_1:]

    ite = DataLoader(df=df_ann, Y=df_ann.values, window_size=30, batch_size=df_ann.shape[0])
    for batch in ite: # Only one batch there
        X,y = batch

    ite_garch = DataLoader(df=df_ann_garch, Y=df_ann_garch.values, window_size=30, batch_size=df_ann_garch.shape[0])
    for batch in ite_garch: # Only one batch there
        X_garch,y_garch = batch

    kwargs = dict(
        learning_rate = params_stacked[period][0],
        period = period,
        batch_size = len(X_garch),
        num_epochs = 10000, 
        window_size = 30,
        weight_decay = 0,
        #l2_lambda = params_stacked[period][1],
        tab = X_garch,
        y = y,
    )

    sim = Simulation(**kwargs)
    sim.Ann.init_weights(random_normal)
    sim.make_dataloaders(pivot_index=pivot_index_training_2-pivot_index_training_1)
    sim.train(verbose=1)
    sim.Ann.save("models/torch/ann_garch/{}".format(period))

Train loss: 0.0543 | Test loss: 0.0734: 100%|██████████| 10000/10000 [01:05<00:00, 151.82it/s]
Train loss: 0.0334 | Test loss: 0.8235: 100%|██████████| 10000/10000 [01:10<00:00, 142.31it/s]
Train loss: 0.0331 | Test loss: 3.9470: 100%|██████████| 10000/10000 [01:13<00:00, 135.26it/s]
Train loss: 0.0223 | Test loss: 0.2113: 100%|██████████| 10000/10000 [01:13<00:00, 135.44it/s]
Train loss: 0.0623 | Test loss: 0.0556: 100%|██████████| 10000/10000 [01:16<00:00, 131.24it/s]


### Ann-egarch

In [108]:
for period in periods:

    df = dfe[period[:4]:period[5:]]
    df_egarch = dfe_egarch[period[:4]:period[5:]]

    pivot_index_training_1 = round(df.shape[0] * 0.25)
    pivot_index_training_2 = round(df.shape[0] * 0.75)

    df_ann = df.iloc[pivot_index_training_1:]
    df_ann_egarch = df_egarch.iloc[pivot_index_training_1:]

    ite = DataLoader(df=df_ann, Y=df_ann.values, window_size=30, batch_size=df_ann.shape[0])
    for batch in ite: # Only one batch there
        X,y = batch

    ite_garch = DataLoader(df=df_ann_egarch, Y=df_ann_egarch.values, window_size=30, batch_size=df_ann_egarch.shape[0])
    for batch in ite_garch: # Only one batch there
        X_egarch,y_egarch = batch

    kwargs = dict(
        learning_rate = params_stacked[period][0],
        period = period,
        batch_size = len(X_egarch),
        num_epochs = 10000, 
        window_size = 30,
        weight_decay = 0,
        #l2_lambda = params_stacked[period][1],
        tab = X_egarch,
        y = y,
    )

    sim = Simulation(**kwargs)
    sim.Ann.init_weights(random_normal)
    sim.make_dataloaders(pivot_index=pivot_index_training_2-pivot_index_training_1)
    sim.train(verbose=1)
    sim.Ann.save("models/torch/ann_egarch/{}".format(period))

Train loss: 0.0636 | Test loss: 0.0653: 100%|██████████| 10000/10000 [01:08<00:00, 146.49it/s]
Train loss: 0.0398 | Test loss: 0.4191: 100%|██████████| 10000/10000 [01:39<00:00, 100.24it/s]
Train loss: 0.0554 | Test loss: 0.5598: 100%|██████████| 10000/10000 [01:21<00:00, 123.01it/s]
Train loss: 0.0760 | Test loss: 0.1084: 100%|██████████| 10000/10000 [01:19<00:00, 125.63it/s]
Train loss: 0.0665 | Test loss: 0.0538: 100%|██████████| 10000/10000 [01:17<00:00, 128.31it/s]
