In [3]:
from classes.Ann import Ann
from classes.DataLoader import DataLoader
from classes.DataLoader_batch import DataLoader_batch
from classes.Simulation import Simulation

import sklearn
from classes.GradientBoosting import GradientBoosting as GradientBoostingRegressor
from sklearn.svm import SVR
from classes.RandomForestClassifier import RandomForest as RandomForestRegressor

import pandas as pd
import numpy as np

from utils.init_functions import random_normal
from classes.garchpq import GARCH
from arch import arch_model

import warnings
warnings.filterwarnings("ignore")


### Data

In [6]:
index = "S&P500"
data = pd.read_csv("data/dataset.csv", index_col=0)
data.index = pd.to_datetime(data.index)
df_trv = data.rolling(5).std()
df_trv.index = pd.to_datetime(df_trv.index)
df_trv = df_trv.iloc[4:]
dfe = df_trv[index].dropna() 
mi = dfe.min()
ma = dfe.max()
dfe = (dfe - mi)/(ma-mi)
dfe_r = data["S&P500"][dfe.index]

am = GARCH()
res = am.fit(dfe_r.values, [dfe_r.values.mean(), dfe_r.values.mean().var() * 0.01, 0.078, 0.815])
dfe_garch = pd.Series(np.sqrt(am.sigma2), index=dfe_r.index)
mi_garch = dfe_garch.min()
ma_garch = dfe_garch.max()
dfe_garch = (dfe_garch - mi_garch)/(ma_garch-mi_garch)

am = arch_model(dfe_r.values, vol='EGARCH' , p = 1 , o = 0 , q = 1)
res = am.fit(disp='off')
dfe_egarch = pd.Series(res.conditional_volatility, index=dfe_r.index)
mi_egarch = dfe_egarch.min()
ma_egarch = dfe_egarch.max()
df_egarch = (dfe_egarch - mi_egarch)/(ma_egarch-mi_egarch)

periods = {
    "2000-2007":"2008",
    "2001-2008":"2009",
    "2002-2009":"2010",
    "2009-2016":"2017",
    "2010-2017":"2018",
}

params_stacked = {
    "2000-2007":[0.0033,0],
    "2001-2008":[0.0059, 0.01],
    "2002-2009":[0.0136, 0],
    "2009-2016":[0.085, 0.02],
    "2010-2017":[0.01, 0.011],
}

params_ml = {
    "2000-2007":[10, 24, 1479, 0.003, 0.0001, 0.45],
    "2001-2008":[10, 107, 3000, 0.001, 0.0001, 0.55],
    "2002-2009":[1, 37, 3583, 0.001, 0.0004, 0.17],
    "2009-2016":[30, 118, 1000, 0.009, 0.0002, 0.13],
    "2010-2017":[7, 175, 1000, 0.003, 0.0001, 0.54],
}

### Stacked Ann

In [3]:
for period in periods:

    df = dfe[period[:4]:period[5:]]

    pivot_index_training_1 = round(df.shape[0] * 0.25)
    pivot_index_training_2 = round(df.shape[0] * 0.75)

    df_train_1 = df.iloc[:pivot_index_training_1]
    df_ann = df.iloc[pivot_index_training_1:]

    ite_train_1 = DataLoader(df=df_train_1, Y=df_train_1.values, window_size=30, batch_size=df_train_1.shape[0])
    for batch in ite_train_1: # Only one batch there
        X_train_1,y_train_1 = batch

    rf_rgs = RandomForestRegressor(n_estimators=30, max_depth=3)  # RandomForestRegressor(max_features = 10, min_samples_split = 24)   
    rf_rgs.fit(X_train_1,y_train_1)

    sv_rgs = SVR(kernel='rbf', gamma=params_ml[period][4], epsilon=params_ml[period][5])
    sv_rgs.fit(X_train_1,y_train_1)

    gb_rgs = GradientBoostingRegressor(learning_rate=0.01, n_estimators=30, max_depth=3)  # GradientBoostingRegressor(learning_rate = 0.003, n_estimators=1479)
    gb_rgs.fit(X_train_1,y_train_1)

    ite = DataLoader(df=df_ann, Y=df_ann.values, window_size=30, batch_size=df_ann.shape[0])
    for batch in ite: # Only one batch there
        X,y = batch

    df_train_ann = pd.DataFrame(X, index=df_ann.index[30:])
    df_train_ann["rf"] = rf_rgs.predict(X)
    df_train_ann["sv"] = sv_rgs.predict(X)
    df_train_ann["gb"] = gb_rgs.predict(X)

    kwargs = dict(
        learning_rate = params_stacked[period][0],
        period = period,
        batch_size = df_train_ann.shape[0],
        num_epochs = 10000, 
        window_size = 30,
        weight_decay = 0,
        tab = df_train_ann.values,
        #l2_lambda = params_stacked[period][1],
        y = y,
    )

    sim = Simulation(**kwargs)
    sim.Ann.init_weights(random_normal)
    sim.make_dataloaders(pivot_index=pivot_index_training_2-pivot_index_training_1)
    sim.train(verbose=1)
    sim.Ann.save("models/torch/stacked_ann_prop/{}".format(period))

Train loss: 0.0372 | Test loss: 0.0673: 100%|███████████████████████████████████| 10000/10000 [01:37<00:00, 102.58it/s]
Train loss: 0.0260 | Test loss: 0.5068: 100%|███████████████████████████████████| 10000/10000 [01:24<00:00, 117.75it/s]
Train loss: 0.0251 | Test loss: 3.7496: 100%|███████████████████████████████████| 10000/10000 [01:34<00:00, 105.66it/s]
Train loss: 0.0183 | Test loss: 0.1522: 100%|████████████████████████████████████| 10000/10000 [01:40<00:00, 99.89it/s]
Train loss: 0.0379 | Test loss: 0.0428: 100%|███████████████████████████████████| 10000/10000 [01:15<00:00, 133.23it/s]


### Feed forward-Ann

In [4]:
for period in periods:

    df = dfe[period[:4]:period[5:]]

    pivot_index_training_1 = round(df.shape[0] * 0.25)
    pivot_index_training_2 = round(df.shape[0] * 0.75)

    df_ann = df.iloc[pivot_index_training_1:]

    ite = DataLoader(df=df_ann, Y=df_ann.values, window_size=30, batch_size=df_ann.shape[0])
    for batch in ite: # Only one batch there
        X,y = batch

    kwargs = dict(
        learning_rate = params_stacked[period][0],
        period = period,
        batch_size = len(X),
        num_epochs = 10000, 
        window_size = 30,
        weight_decay = 0,
        #l2_lambda = params_stacked[period][1],
        tab = X,
        y = y,
    )

    sim = Simulation(**kwargs)
    sim.Ann.init_weights(random_normal)
    sim.make_dataloaders(pivot_index=pivot_index_training_2-pivot_index_training_1)
    sim.train(verbose=1)
    sim.Ann.save("models/torch/feed_forward_ann_prop/{}".format(period))

Train loss: 0.0457 | Test loss: 0.0574: 100%|███████████████████████████████████| 10000/10000 [01:13<00:00, 136.49it/s]
Train loss: 0.0245 | Test loss: 0.7881: 100%|███████████████████████████████████| 10000/10000 [01:31<00:00, 108.91it/s]
Train loss: 0.0252 | Test loss: 2.8038: 100%|███████████████████████████████████| 10000/10000 [01:30<00:00, 110.17it/s]
Train loss: 0.0189 | Test loss: 0.2278: 100%|███████████████████████████████████| 10000/10000 [01:31<00:00, 109.74it/s]
Train loss: 0.0359 | Test loss: 0.0432: 100%|███████████████████████████████████| 10000/10000 [01:30<00:00, 109.90it/s]


### Ann-Garch

In [7]:
for period in periods:

    df = dfe[period[:4]:period[5:]]
    df_garch = dfe_garch[period[:4]:period[5:]]

    pivot_index_training_1 = round(df.shape[0] * 0.25)
    pivot_index_training_2 = round(df.shape[0] * 0.75)

    df_ann = df.iloc[pivot_index_training_1:]
    df_ann_garch = df_garch.iloc[pivot_index_training_1:]

    ite = DataLoader(df=df_ann, Y=df_ann.values, window_size=30, batch_size=df_ann.shape[0])
    for batch in ite: # Only one batch there
        X,y = batch

    ite_garch = DataLoader(df=df_ann_garch, Y=df_ann_garch.values, window_size=30, batch_size=df_ann_garch.shape[0])
    for batch in ite_garch: # Only one batch there
        X_garch,y_garch = batch

    kwargs = dict(
        learning_rate = params_stacked[period][0],
        period = period,
        batch_size = len(X_garch),
        num_epochs = 10000, 
        window_size = 30,
        weight_decay = 0,
        #l2_lambda = params_stacked[period][1],
        tab = X_garch,
        y = y,
    )

    sim = Simulation(**kwargs)
    sim.Ann.init_weights(random_normal)
    sim.make_dataloaders(pivot_index=pivot_index_training_2-pivot_index_training_1)
    sim.train(verbose=1)
    sim.Ann.save("models/torch/ann_garch_prop/{}".format(period))

Train loss: 0.0436 | Test loss: 0.0867: 100%|███████████████████████████████████| 10000/10000 [01:29<00:00, 111.25it/s]
Train loss: 0.0345 | Test loss: 0.9236: 100%|███████████████████████████████████| 10000/10000 [01:34<00:00, 105.56it/s]
Train loss: 0.0357 | Test loss: 1.7842: 100%|███████████████████████████████████| 10000/10000 [01:30<00:00, 111.10it/s]
Train loss: 0.0320 | Test loss: 0.1896: 100%|███████████████████████████████████| 10000/10000 [01:35<00:00, 105.07it/s]
Train loss: 0.0565 | Test loss: 0.0605: 100%|███████████████████████████████████| 10000/10000 [01:37<00:00, 103.05it/s]
