In [1]:
import os
import NN
import utils
from torch import save as save_model
from torch import load as load_model
import config as cfg
import pandas as pd

import warnings
warnings.filterwarnings('ignore')

## Train and Evaluate a single model

### Setup Parameters

In [2]:
ETF = 'SPY' # 'SPY', 'DIA', 'QQQ'
NNtype = 'MLP' # 'MLP', 'RNN', 'PSN'
params = cfg.train_parameters[ETF][NNtype]
inputs_lag = cfg.SPYfeatures[NNtype] # SPYfeatures, DIAfeatures, QQQfeatures

### Load Dataframes

In [3]:
traindf = utils.load_file(os.path.join("data", ETF, NNtype, "Train.pkl"))
validdf = utils.load_file(os.path.join("data", ETF, NNtype, "Valid.pkl"))
testdf  = utils.load_file(os.path.join("data", ETF, NNtype, "Test.pkl"))

### Initialize DataLoaders

In [4]:
features_col = []
for i in inputs_lag:
    features_col.append("Return_" + str(i))
target_col   = 'Target'

mu, sigma = None, None

trainloader = utils.DataFrame2DataLoader(traindf, features_col, target_col, batch_size=10, normalize=False)
validloader = utils.DataFrame2DataLoader(validdf, features_col, target_col, batch_size=8, normalize=False)
testloader  = utils.DataFrame2DataLoader(testdf, features_col, target_col, batch_size=8, normalize=False)

### Instanciate Model

In [5]:
model = NN.Model(NNtype)
model.setup(params["input_size"], 
            params["hidden_size"],
            params["output_size"],
            1000,#params["epochs"],
            params["optim"],
            params["lr"],
            params["momentum"]
           )

Sequential(
  (0): Linear(in_features=7, out_features=6, bias=True)
  (1): Sigmoid()
  (2): Linear(in_features=6, out_features=1, bias=True)
)
SGD (
Parameter Group 0
    dampening: 0
    lr: 0.003
    momentum: 0.004
    nesterov: False
    weight_decay: 0
)


### Training

In [7]:
model.train(trainloader, validloader)

[00:00] Epoch 1/1000 - Train Loss : 0.005514 / Val Loss : 0.002338
[00:04] Epoch 100/1000 - Train Loss : 0.000140 / Val Loss : 0.000048
[00:08] Epoch 200/1000 - Train Loss : 0.000140 / Val Loss : 0.000048
[00:12] Epoch 300/1000 - Train Loss : 0.000140 / Val Loss : 0.000048
[00:17] Epoch 400/1000 - Train Loss : 0.000140 / Val Loss : 0.000048
[00:22] Epoch 500/1000 - Train Loss : 0.000140 / Val Loss : 0.000048
[00:26] Epoch 600/1000 - Train Loss : 0.000140 / Val Loss : 0.000048
[00:30] Epoch 700/1000 - Train Loss : 0.000140 / Val Loss : 0.000048
[00:34] Epoch 800/1000 - Train Loss : 0.000140 / Val Loss : 0.000048
[00:39] Epoch 900/1000 - Train Loss : 0.000140 / Val Loss : 0.000048
[00:43] Epoch 1000/1000 - Train Loss : 0.000140 / Val Loss : 0.000048
Train MAE : 0.0081 | Train MAPE  : 2.5607 | Train RSME : 0.0001 | Train Theil-U 0.9263
Valid MAE : 0.0054 | Valid MAPE  : 4.7942 | Valid RSME : 0.0000 | Valid Theil-U 0.8998


### Evaluation

In [8]:
model.evaluate(testloader)

Test MAE : 0.005511 | Test MAPE  : 1.194393 | Test RSME : 0.000054 | Test Theil-U 0.904277


## Train and Evaluate All Models

In [9]:
models = {'SPY':{'MLP':None,'RNN':None,'PSN':None},
          'DIA':{'MLP':None,'RNN':None,'PSN':None},
          'QQQ':{'MLP':None,'RNN':None,'PSN':None},
         }

for ETF in ['SPY', 'DIA', 'QQQ']:
    for NNtype in ['MLP', 'RNN', 'PSN']:
        print("="*10 + ETF + " - " + NNtype + "="*10)
        params = cfg.train_parameters[ETF][NNtype]
        if ETF == "SPY":
            inputs_lag = cfg.SPYfeatures[NNtype]
        elif ETF == "DIA":
            inputs_lag = cfg.DIAfeatures[NNtype]
        elif ETF == "QQQ":
            inputs_lag = cfg.QQQfeatures[NNtype]

        traindf = utils.load_file(os.path.join("data", ETF, NNtype, "Train.pkl"))
        validdf = utils.load_file(os.path.join("data", ETF, NNtype, "Valid.pkl"))
        testdf  = utils.load_file(os.path.join("data", ETF, NNtype, "Test.pkl"))

        features_col = []
        for i in inputs_lag:
            features_col.append("Return_" + str(i))
        target_col   = 'Target'

        mu, sigma = None, None

        trainloader = utils.DataFrame2DataLoader(traindf, features_col, target_col, batch_size=10, normalize=False)
        validloader = utils.DataFrame2DataLoader(validdf, features_col, target_col, batch_size=8, normalize=False)
        testloader  = utils.DataFrame2DataLoader(testdf, features_col, target_col, batch_size=8, normalize=False)

        model = NN.Model(NNtype)
        model.setup(params["input_size"], 
                    params["hidden_size"],
                    params["output_size"],
                    1000,#params["epochs"],
                    params["optim"],
                    params["lr"],
                    params["momentum"]
                   )

        model.train(trainloader, validloader)
        model.evaluate(testloader)
        
        models[ETF][NNtype] = model
        print("\n"*2)

Sequential(
  (0): Linear(in_features=7, out_features=6, bias=True)
  (1): Sigmoid()
  (2): Linear(in_features=6, out_features=1, bias=True)
)
SGD (
Parameter Group 0
    dampening: 0
    lr: 0.003
    momentum: 0.004
    nesterov: False
    weight_decay: 0
)
[00:00] Epoch 1/1000 - Train Loss : 0.116676 / Val Loss : 0.047273
[00:04] Epoch 100/1000 - Train Loss : 0.000143 / Val Loss : 0.000049
[00:08] Epoch 200/1000 - Train Loss : 0.000143 / Val Loss : 0.000049
[00:12] Epoch 300/1000 - Train Loss : 0.000143 / Val Loss : 0.000049
[00:17] Epoch 400/1000 - Train Loss : 0.000143 / Val Loss : 0.000049
[00:22] Epoch 500/1000 - Train Loss : 0.000143 / Val Loss : 0.000049
[00:26] Epoch 600/1000 - Train Loss : 0.000143 / Val Loss : 0.000049
[00:30] Epoch 700/1000 - Train Loss : 0.000143 / Val Loss : 0.000049
[00:34] Epoch 800/1000 - Train Loss : 0.000143 / Val Loss : 0.000049
[00:38] Epoch 900/1000 - Train Loss : 0.000143 / Val Loss : 0.000049
[00:43] Epoch 1000/1000 - Train Loss : 0.000143 / Va

### Save Models

In [12]:
for ETF in ["SPY", "DIA", "QQQ"]:
    for NNtype in ["MLP", "RNN", "PSN"]:
        os.makedirs(os.path.join("models", ETF, NNtype), exist_ok=True)
        save_model(models[ETF][NNtype].model.state_dict(), os.path.join("models", ETF, NNtype, "ckpt.pth"))

### Load Ckpt

In [6]:
ETF = "DIA"
NNtype = "RNN"
params = cfg.train_parameters[ETF][NNtype]
inputs_lag = cfg.DIAfeatures[NNtype]

features_col = []
for i in inputs_lag:
    features_col.append("Return_" + str(i))
target_col   = 'Target'
df  = utils.load_file(os.path.join("data", ETF, NNtype, "Test.pkl"))
dataloader = utils.DataFrame2DataLoader(df, features_col, target_col, batch_size=8, normalize=False)

modelloaded = NN.Model(NNtype)
modelloaded.setup(params["input_size"], 
            params["hidden_size"],
            params["output_size"],
            1000,#params["epochs"],
            params["optim"],
            params["lr"],
            params["momentum"]
           )
modelloaded.model.load_state_dict(load_model(os.path.join("models", ETF, NNtype, "ckpt.pth")))
modelloaded.evaluate(dataloader)

RNN(
  (rnn): RNN(8, 7)
  (fc): Linear(in_features=7, out_features=1, bias=True)
)
SGD (
Parameter Group 0
    dampening: 0
    lr: 0.005
    momentum: 0.006
    nesterov: False
    weight_decay: 0
)
Test MAE : 0.005624 | Test MAPE  : 10.524327 | Test RSME : 0.000054 | Test Theil-U 0.769070


# Generating Table 4

In [9]:
Table4 = pd.DataFrame()

for ETF in ['SPY', 'DIA', 'QQQ']:
    Table = pd.DataFrame({"Statistic":["MAE", "MAPE", "RMSE", "THEIL-U"]})
    for NNtype in ['MLP', 'RNN', 'PSN']:
        params = cfg.train_parameters[ETF][NNtype]
        if ETF == "SPY":
            inputs_lag = cfg.SPYfeatures[NNtype]
        elif ETF == "DIA":
            inputs_lag = cfg.DIAfeatures[NNtype]
        else:
            inputs_lag = cfg.QQQfeatures[NNtype]
        
        features_col = []
        for i in inputs_lag:
            features_col.append("Return_" + str(i))
        target_col   = 'Target'
        df  = utils.load_file(os.path.join("data", ETF, NNtype, "Test.pkl"))
        dataloader = utils.DataFrame2DataLoader(df, features_col, target_col, batch_size=8, normalize=False)
        
        modelloaded = NN.Model(NNtype)
        modelloaded.setup(params["input_size"],
                          params["hidden_size"],
                          params["output_size"],
                          1000,#params["epochs"],
                          params["optim"],
                          params["lr"],
                          params["momentum"])
        modelloaded.model.load_state_dict(load_model(os.path.join("models", ETF, NNtype, "ckpt.pth")))
        modelloaded.evaluate(dataloader)
        
        Table[NNtype] = [modelloaded.testMAE, modelloaded.testMAPE, modelloaded.testRMSE, modelloaded.testTheilU]
        Table["ETF"] = ETF
    
    Table4 = pd.concat([Table4,Table],ignore_index=True)

Table4.set_index(["ETF"], inplace=True)    
Table4.to_csv("table/Table4.csv", index=True)        

Sequential(
  (0): Linear(in_features=7, out_features=6, bias=True)
  (1): Sigmoid()
  (2): Linear(in_features=6, out_features=1, bias=True)
)
SGD (
Parameter Group 0
    dampening: 0
    lr: 0.003
    momentum: 0.004
    nesterov: False
    weight_decay: 0
)
Test MAE : 0.005540 | Test MAPE  : 1.100655 | Test RSME : 0.000054 | Test Theil-U 0.929284
RNN(
  (rnn): RNN(9, 6)
  (fc): Linear(in_features=6, out_features=1, bias=True)
)
SGD (
Parameter Group 0
    dampening: 0
    lr: 0.003
    momentum: 0.005
    nesterov: False
    weight_decay: 0
)
Test MAE : 0.006091 | Test MAPE  : 2.762408 | Test RSME : 0.000062 | Test Theil-U 0.736161
PSN(
  (fc): Linear(in_features=9, out_features=5, bias=True)
)
SGD (
Parameter Group 0
    dampening: 0
    lr: 0.4
    momentum: 0.5
    nesterov: False
    weight_decay: 0
)
Test MAE : 0.005535 | Test MAPE  : 1.503290 | Test RSME : 0.000055 | Test Theil-U 0.858845
Sequential(
  (0): Linear(in_features=7, out_features=9, bias=True)
  (1): Sigmoid()
  (2)

In [10]:
Table4

Unnamed: 0_level_0,Statistic,MLP,RNN,PSN
ETF,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1
SPY,MAE,0.00554,0.006091,0.005535
SPY,MAPE,1.100655,2.762408,1.50329
SPY,RMSE,5.4e-05,6.2e-05,5.5e-05
SPY,THEIL-U,0.929284,0.736161,0.858845
DIA,MAE,0.00539,0.005624,0.005406
DIA,MAPE,3.646228,10.524327,5.425974
DIA,RMSE,5.2e-05,5.4e-05,5.4e-05
DIA,THEIL-U,0.926919,0.76907,0.855376
QQQ,MAE,0.006587,0.006885,0.00659
QQQ,MAPE,2.376193,18.319845,9.227055


# To work

In [11]:
ypred, ytrue, evaluation = modelloaded.Getevaluation(dataloader)

AttributeError: 'Model' object has no attribute 'Getevaluation'

In [14]:
import numpy as np
y_pred = np.array(ypred)
y_true = np.array(ytrue)

In [51]:
modelloaded.evaluate(dataloader)

Test MAE : 0.005624 | Test MAPE  : inf | Test RSME : 0.000054 | Test Theil-U 0.769070


In [20]:
np.abs((y_true - y_pred)/y_true)

array([5.97504411e-01, 8.19620566e-01, 7.61992324e-01, 1.27961111e+00,
       1.55741931e+00, 1.06965293e+00, 4.08921429e+01, 1.03007374e+00,
       1.17004060e+00, 7.15264093e-01, 6.79977365e-01, 1.38981662e+00,
       5.70637783e-01, 2.21547445e+00, 1.06259083e+00, 8.27917005e-01,
       1.34271908e+00, 1.28133038e+00, 9.69783017e-01, 6.09123871e-01,
       1.31316356e+00, 1.12362061e+00, 4.03105552e-01, 3.35606941e+00,
       7.98431427e-01, 6.87432206e-01, 7.31159248e-01, 3.80845850e-01,
       1.51305376e+00, 1.29509997e+00, 7.20141230e-01, 3.29434595e+00,
       1.13785257e+00, 1.49065573e+00, 3.72291052e+00, 7.72002016e-01,
       6.24651569e-01, 8.43269646e-01, 5.24755104e-01, 1.34105122e+00,
       1.07121130e+00, 9.88006366e-01, 8.68172943e-01, 1.04517050e+00,
       3.42294065e-01, 4.86738036e-01, 1.53935203e+00, 2.25559896e-01,
       8.57544100e-01, 1.52026479e+00, 1.25801378e+00, 6.77532215e-01,
       1.29784225e+00, 7.91556409e-01, 2.08852883e+00, 1.93345424e+00,
      

In [30]:
y_true[y_true == 0] = 0.000006

In [31]:
np.where(y_true == 0)

(array([], dtype=int64),)

In [15]:
Table4

Unnamed: 0_level_0,Statistic,MLP,RNN,PSN
ETF,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1
SPY,MAE,0.005533,0.005745,0.005533
SPY,MAPE,1.167107,1.459932,1.501336
SPY,RMSE,5.5e-05,6e-05,5.5e-05
SPY,THEIL-U,0.925617,0.819338,0.858615
DIA,MAE,0.00542,0.005653,0.005405
DIA,MAPE,1.971646,2.648959,5.369128
DIA,RMSE,5.3e-05,5.5e-05,5.4e-05
DIA,THEIL-U,0.910827,0.836283,0.855665
QQQ,MAE,0.00666,0.006684,0.006594
QQQ,MAPE,5.020542,8.077758,9.344285
