<a href="https://colab.research.google.com/github/Krankile/npmf/blob/main/notebooks/training_loop.ipynb" target="_parent"><img src="https://colab.research.google.com/assets/colab-badge.svg" alt="Open In Colab"/></a>

# Setup

## Kernel setup

In [2]:
%load_ext autoreload
%autoreload 2

In [3]:
%%capture
!pip install wandb more_itertools
!git clone https://github.com/Krankile/npmf.git

In [4]:
# https://wandb.ai/authorize
!wandb login

[34m[1mwandb[0m: Currently logged in as: [33mankile[0m ([33mkrankile[0m). Use [1m`wandb login --relogin`[0m to force relogin


## General setup

In [5]:
%%capture
!cd npmf && git pull

import math
import multiprocessing
import os
import pickle
import random
from collections import Counter, defaultdict
from dataclasses import asdict, dataclass
from datetime import datetime, timedelta
from operator import itemgetter
from typing import Callable, List, Tuple
from functools import partial
from glob import glob


from more_itertools import chunked

import matplotlib as mpl
import matplotlib.pyplot as plt
import numpy as np
import pandas as pd
import torch

from npmf.utils.colors import main, main2, main3
from npmf.utils.dataset import TimeDeltaDataset, EraDataset, EraController
from npmf.utils.dtypes import fundamental_types
from npmf.utils.eikon import column_mapping
from npmf.utils.tests.utils import pickle_df
from npmf.utils.wandb import get_datasets, put_dataset, put_nn_model
from npmf.utils.training import EarlyStop, to_device, TqdmPostFix, loss_fns
from npmf.utils.models import models

from numpy.ma.core import outerproduct
from pandas.tseries.offsets import BDay, Day
from sklearn.preprocessing import MinMaxScaler, minmax_scale
from torch import nn
from torch.utils.data import DataLoader, Dataset, ConcatDataset

import wandb as wb

In [6]:
np.seterr(all="raise")

mpl.rcParams['axes.prop_cycle'] = mpl.cycler(color=[main, main2, main3, "black"])
mpl.rcParams['figure.figsize'] = (6, 4)  # (6, 4) is default and used in the paper

In [7]:
device = "cuda" if torch.cuda.is_available() else "cpu"
print(f"Using {device} device")

Using cuda device


In [8]:
!nvidia-smi

Sun Jun  5 16:27:00 2022       
+-----------------------------------------------------------------------------+
| NVIDIA-SMI 460.32.03    Driver Version: 460.32.03    CUDA Version: 11.2     |
|-------------------------------+----------------------+----------------------+
| GPU  Name        Persistence-M| Bus-Id        Disp.A | Volatile Uncorr. ECC |
| Fan  Temp  Perf  Pwr:Usage/Cap|         Memory-Usage | GPU-Util  Compute M. |
|                               |                      |               MIG M. |
|   0  Tesla P100-PCIE...  Off  | 00000000:00:04.0 Off |                    0 |
| N/A   40C    P0    39W / 250W |      2MiB / 16280MiB |      0%      Default |
|                               |                      |                  N/A |
+-------------------------------+----------------------+----------------------+
                                                                               
+-----------------------------------------------------------------------------+
| Proces

In [9]:
pre_proc_data_dir = None
np.random.seed(69)


# Get some data

In [10]:
%%capture
reload_data = not True

if reload_data or not "stock_df" in vars():
    names = ["stock-data:final", "fundamental-data:final", "meta-data:final", "macro-data:final"]

    stock_df, fundamental_df, meta_df, macro_df = get_datasets(names=names, project="master")

    stock_df = stock_df.drop(columns=["close_price", "currency"]).astype({"market_cap": np.float32})
    fundamental_df = fundamental_df.drop(columns="period_end_date").astype(fundamental_types)
    macro_df.iloc[:, 1:] = macro_df.iloc[:, 1:].astype(np.float32)

In [11]:
%%capture
reload_proc_data = True
dont_load_proc_data = not True

if (not dont_load_proc_data) and (reload_proc_data or not "pre_proc_data_dir" in vars() or pre_proc_data_dir is None):
    with wb.init(job_type="get-data", project="master", entity="krankile") as run:
        art = run.use_artifact("era-datasets:240")
        pre_proc_data_dir = art.download()

## Define a class to handle information across eras

# Run the loop! (Like Odd-Geir Lademo)

In [12]:
# Check if it's necessary to calculate naive loss every epoch
def get_epoch_loss(model, optimizer, dataloader, loss_fn, device, run_type, conf):
    model_losses = []
    naive_losses = []
    for data, meta_cont, meta_cat, target in to_device(dataloader, device):

        optimizer.zero_grad()
        y_pred: torch.Tensor = model(torch.clamp(data, -conf.clamp, conf.clamp) if conf.clamp else data, meta_cont, meta_cat)

        naive_loss = loss_fn(target.clone(), torch.ones(target.shape, device=device))
        loss = loss_fn(target, y_pred)

        model_losses.append(loss.item())
        naive_losses.append(naive_loss.item())

        if run_type == "train":
            loss.backward()
            optimizer.step()

    return model_losses, naive_losses

In [13]:
def eras_ahead_loss(model, data_loaders, optimizer, conf):
    model_infront = []
    naive_infront = []
    
    with torch.no_grad():
        for loader in data_loaders:
            model_loss, naive_loss = get_epoch_loss(model, optimizer, loader, loss_fns["mape_2"], device, "inference", conf)
        
            model_infront += model_loss
            naive_infront += naive_loss
    
    return np.array(model_infront), np.array(naive_infront)

In [14]:
def train_one_era(run, model, optimizer, data_train, data_val, stopper, losses, device, conf, pbar):

    for epoch in range(conf.max_epochs):
        epoch_losses = dict(train=[], val=[])
        
        pbar.update_postfix({"epoch": epoch})
        for run_type, dataloader in {"train": data_train, "val": data_val}.items():
            model.train(run_type == "train")
            
            epoch_model_loss, naive_losses = get_epoch_loss(model, optimizer, dataloader, loss_fns[conf[f"{run_type}_loss"]], device, run_type, conf)
            epoch_losses[run_type] += epoch_model_loss

            epoch_loss = np.mean(epoch_losses[run_type])
            losses[run_type].append(epoch_loss)

            run.log({f"epoch_{run_type}": epoch_loss, "epoch": epoch})

        pbar.update_postfix({"train_loss": np.mean(epoch_losses["train"]), "val_loss": np.mean(epoch_losses["val"]), "naive": np.mean(naive_losses)})


        # TODO: Implement checkpointing of the best model according to val_loss
        if run_type == "val" and stopper(epoch_losses["val"]):
            losses["epoch_lens"].append(epoch + 1)
            break

    return epoch_losses["train"], epoch_losses["val"]

In [16]:
def train(config, project=None, entity=None, enablewb=True) -> nn.Module:
    
    mode = "online" if enablewb else "offline"
    with wb.init(config=config, project=project, entity=entity, job_type="training", mode=mode) as run:

        conf = run.config
        print(conf)

        model = models[conf.model](**conf).to(device)

        # Try decreasing learning rate underway
        optimizer = torch.optim.Adam(model.parameters(), lr=conf.learning_rate)

        losses = dict(train=[], val=[], epoch_lens=[])

        eras = EraController(start_date=conf.start_date, end_metric_start_date=conf.end_date, queue_length=conf.queue_length, stock_df=stock_df, fundamental_df=fundamental_df, meta_df=meta_df, macro_df=macro_df, conf=conf)
        pbar = TqdmPostFix(eras, total=eras.total)
        stopper = EarlyStop(conf.patience, conf.min_delta, model=(model if conf.checkpoint else None), pbar=pbar)

        for i, (data_train, data_val) in enumerate(pbar):
            # Does this work??
            torch.cuda.empty_cache()
            
            pbar.set_description(f"Era {eras.date} [{i+1}/{eras.total}]")

            train_losses, val_losses = train_one_era(
                run=run, 
                model=model, 
                optimizer=optimizer, 
                data_train=data_train, 
                data_val=data_val,
                stopper=stopper.reset(),
                losses=losses,
                device=device, 
                conf=conf,
                pbar=pbar,
            )

            loaders_infront, loaders_end = eras.validation_loaders()
            model_infront, naive_infront = eras_ahead_loss(model, loaders_infront, optimizer, conf)
            model_end, naive_end = eras_ahead_loss(model, loaders_end, optimizer, conf)

            metric_loss = 0.5*(np.mean(model_infront/naive_infront-1) +  np.mean(model_end/naive_end-1))

            run.log({"era_train": np.mean(train_losses), "era_val" : np.mean(val_losses),"model_infront": np.mean(model_infront),
                     "naive_infront": np.mean(naive_infront), "model_end": np.mean(model_end), "naive_end": np.mean(naive_end),
                     "metric_loss": metric_loss, **eras.loader_to_na_dict[eras.date], "time": eras.date.timestamp(), "era": i})

        if conf.save_model:
            put_nn_model(model, run)

    return model, losses

In [17]:
def get_params_from_data(stock_df, fundamental_df, meta_df, macro_df, params_human):
    meta_cont_len = 1
    meta_cat_len = np.array([len(meta_df[col].unique()) for col in meta_df.iloc[:,1:] if col != "founding_year"]) + 1
    
    stock_feats = 1
    macro_feats = (macro_df.shape[1]-1)
    funda_feats = (fundamental_df.loc[:,"revenue":].shape[1] - 1) + 2

    n_features = stock_feats + macro_feats + funda_feats
    
    data_given_params = dict(
        meta_cont_lens=(meta_cont_len, 1),
        meta_cat_lens=list(map(lambda x: (x, int(math.ceil(x**0.25))), meta_cat_len)),
        out_len=params_human["forecast_w"],
        input_size=n_features,
    )
    return data_given_params

In [18]:
params_human = dict(
    cpus=1,
    training_w=240,
    forecast_w=240,
    start_date="2000-12-31",
    end_date="2018-04-30",
    save_model=True,
    batch_size=512,
    pre_proc_data_dir=pre_proc_data_dir,
    clamp=2,
    dtype="float32",
    queue_length=6,

    include_past=True,
    checkpoint=True,
)

params_wb = dict(
    max_epochs=500,
    patience=10,
    min_delta=0.0001,
    learning_rate=0.0001,

    hd=256,
    dropout=0.1,
    num_layers=5,
    channels=256,
    kernel_size=5,

    meta_hd=16,

    model="TcnV2",
    train_loss="mse_2",
    val_loss="mape_2",
    activation="relu",
)

params_from_data = get_params_from_data(stock_df, fundamental_df, meta_df, macro_df, params_human)

config = {  
    **params_human,
    **params_wb,
    **params_from_data,
}

In [None]:
# 05.06.2022 Test checkpointing and new pbar handling
# Also, long forecast horizon and with past data as well

enablewb = True
sweepid = None  #"krankile/master/q8hau0w8"

if sweepid:
    count = 500 # number of runs to execute
    wb.agent(sweepid, partial(train,config=config, enablewb=enablewb), count=count)

else:
    model, losses = train(config=config, project="master", entity="krankile", enablewb=enablewb)

{'cpus': 1, 'training_w': 240, 'forecast_w': 240, 'start_date': '2000-12-31', 'end_date': '2018-04-30', 'save_model': True, 'batch_size': 512, 'pre_proc_data_dir': './artifacts/era-datasets:v4', 'clamp': 2, 'dtype': 'float32', 'queue_length': 6, 'include_past': True, 'checkpoint': True, 'max_epochs': 500, 'patience': 10, 'min_delta': 0.0001, 'learning_rate': 0.001, 'hd': 256, 'dropout': 0.1, 'num_layers': 5, 'channels': 256, 'kernel_size': 5, 'meta_hd': 16, 'model': 'TcnV2', 'train_loss': 'mse_2', 'val_loss': 'mape_2', 'activation': 'relu', 'meta_cont_lens': [1, 1], 'meta_cat_lens': [[110, 4], [6, 2], [91, 4], [285, 5], [3, 2], [5, 2], [7, 2], [14, 2], [58, 3]], 'out_len': 240, 'input_size': 37}


Era 2001-08-31 00:00:00 [9/209]:   4%|▍         | 8/209 [07:06<3:03:07, 54.66s/it, epoch=10, train_loss=0.0972, val_loss=0.244, naive=0.229, triggers=10/10, best_loss=0.229]