In [15]:
import os
import NN
import utils
from torch import save as save_model
from torch import load as load_model
import config as cfg

import warnings
warnings.filterwarnings('ignore')

## Train and Evaluate a single model

### Setup Parameters

In [2]:
ETF = 'SPY' # 'SPY', 'DIA', 'QQQ'
NNtype = 'MLP' # 'MLP', 'RNN', 'PSN'
params = cfg.train_parameters[ETF][NNtype]
inputs_lag = cfg.SPYfeatures[NNtype] # SPYfeatures, DIAfeatures, QQQfeatures

### Load Dataframes

In [3]:
traindf = utils.load_file(os.path.join("data", ETF, NNtype, "Train.pkl"))
validdf = utils.load_file(os.path.join("data", ETF, NNtype, "Valid.pkl"))
testdf  = utils.load_file(os.path.join("data", ETF, NNtype, "Test.pkl"))

### Initialize DataLoaders

In [4]:
features_col = []
for i in inputs_lag:
    features_col.append("Return_" + str(i))
target_col   = 'Target'

mu, sigma = None, None

trainloader = utils.DataFrame2DataLoader(traindf, features_col, target_col, batch_size=10, normalize=False)
validloader = utils.DataFrame2DataLoader(validdf, features_col, target_col, batch_size=8, normalize=False)
testloader  = utils.DataFrame2DataLoader(testdf, features_col, target_col, batch_size=8, normalize=False)

### Instanciate Model

In [5]:
model = NN.Model(NNtype)
model.setup(params["input_size"], 
            params["hidden_size"],
            params["output_size"],
            1000,#params["epochs"],
            params["optim"],
            params["lr"],
            params["momentum"]
           )

Sequential(
  (0): Linear(in_features=7, out_features=6, bias=True)
  (1): Sigmoid()
  (2): Linear(in_features=6, out_features=1, bias=True)
)
SGD (
Parameter Group 0
    dampening: 0
    lr: 0.003
    momentum: 0.004
    nesterov: False
    weight_decay: 0
)


### Training

In [6]:
model.train(trainloader, validloader)

[00:00] Epoch 1/1000 - Train Loss : 0.076996 / Val Loss : 0.028562
[00:05] Epoch 100/1000 - Train Loss : 0.000138 / Val Loss : 0.000048
[00:09] Epoch 200/1000 - Train Loss : 0.000138 / Val Loss : 0.000048
[00:14] Epoch 300/1000 - Train Loss : 0.000138 / Val Loss : 0.000048
[00:19] Epoch 400/1000 - Train Loss : 0.000138 / Val Loss : 0.000048
[00:24] Epoch 500/1000 - Train Loss : 0.000138 / Val Loss : 0.000048
[00:28] Epoch 600/1000 - Train Loss : 0.000138 / Val Loss : 0.000048
[00:33] Epoch 700/1000 - Train Loss : 0.000138 / Val Loss : 0.000048
[00:38] Epoch 800/1000 - Train Loss : 0.000138 / Val Loss : 0.000048
[00:42] Epoch 900/1000 - Train Loss : 0.000138 / Val Loss : 0.000048
[00:47] Epoch 1000/1000 - Train Loss : 0.000138 / Val Loss : 0.000048
Train MAE : 0.0081 | Train MAPE  : inf | Train RSME : 0.0001 | Train Theil-U 0.9260
Valid MAE : 0.0053 | Valid MAPE  : inf | Valid RSME : 0.0000 | Valid Theil-U 0.9112


### Evaluation

In [7]:
model.evaluate(testloader)

Test MAE : 0.005538 | Test MAPE  : 1.174776 | Test RSME : 0.000055 | Test Theil-U 0.922170


## Train and Evaluate All Models

In [8]:
models = {'SPY':{'MLP':None,'RNN':None,'PSN':None},
          'DIA':{'MLP':None,'RNN':None,'PSN':None},
          'QQQ':{'MLP':None,'RNN':None,'PSN':None},
         }

for ETF in ['SPY', 'DIA', 'QQQ']:
    for NNtype in ['MLP', 'RNN', 'PSN']:
        print("="*10 + ETF + " - " + NNtype + "="*10)
        params = cfg.train_parameters[ETF][NNtype]
        if ETF == "SPY":
            inputs_lag = cfg.SPYfeatures[NNtype]
        elif ETF == "DIA":
            inputs_lag = cfg.DIAfeatures[NNtype]
        elif ETF == "QQQ":
            inputs_lag = cfg.QQQfeatures[NNtype]

        traindf = utils.load_file(os.path.join("data", ETF, NNtype, "Train.pkl"))
        validdf = utils.load_file(os.path.join("data", ETF, NNtype, "Valid.pkl"))
        testdf  = utils.load_file(os.path.join("data", ETF, NNtype, "Test.pkl"))

        features_col = []
        for i in inputs_lag:
            features_col.append("Return_" + str(i))
        target_col   = 'Target'

        mu, sigma = None, None

        trainloader = utils.DataFrame2DataLoader(traindf, features_col, target_col, batch_size=10, normalize=False)
        validloader = utils.DataFrame2DataLoader(validdf, features_col, target_col, batch_size=8, normalize=False)
        testloader  = utils.DataFrame2DataLoader(testdf, features_col, target_col, batch_size=8, normalize=False)

        model = NN.Model(NNtype)
        model.setup(params["input_size"], 
                    params["hidden_size"],
                    params["output_size"],
                    1000,#params["epochs"],
                    params["optim"],
                    params["lr"],
                    params["momentum"]
                   )

        model.train(trainloader, validloader)
        model.evaluate(testloader)
        
        models[ETF][NNtype] = model
        print("\n"*2)

Sequential(
  (0): Linear(in_features=7, out_features=6, bias=True)
  (1): Sigmoid()
  (2): Linear(in_features=6, out_features=1, bias=True)
)
SGD (
Parameter Group 0
    dampening: 0
    lr: 0.003
    momentum: 0.004
    nesterov: False
    weight_decay: 0
)
[00:00] Epoch 1/1000 - Train Loss : 0.482743 / Val Loss : 0.202627
[00:04] Epoch 100/1000 - Train Loss : 0.000140 / Val Loss : 0.000049
[00:09] Epoch 200/1000 - Train Loss : 0.000140 / Val Loss : 0.000049
[00:14] Epoch 300/1000 - Train Loss : 0.000140 / Val Loss : 0.000049
[00:18] Epoch 400/1000 - Train Loss : 0.000140 / Val Loss : 0.000049
[00:23] Epoch 500/1000 - Train Loss : 0.000140 / Val Loss : 0.000049
[00:28] Epoch 600/1000 - Train Loss : 0.000140 / Val Loss : 0.000049
[00:33] Epoch 700/1000 - Train Loss : 0.000140 / Val Loss : 0.000049
[00:37] Epoch 800/1000 - Train Loss : 0.000140 / Val Loss : 0.000049
[00:42] Epoch 900/1000 - Train Loss : 0.000140 / Val Loss : 0.000049
[00:47] Epoch 1000/1000 - Train Loss : 0.000140 / Va

### Save Models

In [13]:
for ETF in ["SPY", "DIA", "QQQ"]:
    for NNtype in ["MLP", "RNN", "PSN"]:
        os.makedirs(os.path.join("models", ETF, NNtype), exist_ok=True)
        save_model(models[ETF][NNtype].model.state_dict(), os.path.join("models", ETF, NNtype, "ckpt.pth"))

### Load Ckpt

In [17]:
ETF = "DIA"
NNtype = "RNN"
params = cfg.train_parameters[ETF][NNtype]
inputs_lag = cfg.DIAfeatures[NNtype]

features_col = []
for i in inputs_lag:
    features_col.append("Return_" + str(i))
target_col   = 'Target'
df  = utils.load_file(os.path.join("data", ETF, NNtype, "Test.pkl"))
dataloader = utils.DataFrame2DataLoader(df, features_col, target_col, batch_size=8, normalize=False)

modelloaded = NN.Model(NNtype)
modelloaded.setup(params["input_size"], 
            params["hidden_size"],
            params["output_size"],
            1000,#params["epochs"],
            params["optim"],
            params["lr"],
            params["momentum"]
           )
modelloaded.model.load_state_dict(load_model(os.path.join("models", ETF, NNtype, "ckpt.pth")))
modelloaded.evaluate(dataloader)

RNN(
  (rnn): RNN(8, 7)
  (fc): Linear(in_features=7, out_features=1, bias=True)
)
SGD (
Parameter Group 0
    dampening: 0
    lr: 0.005
    momentum: 0.006
    nesterov: False
    weight_decay: 0
)
Test MAE : 0.005908 | Test MAPE  : inf | Test RSME : 0.000061 | Test Theil-U 0.814241
