<a href="https://colab.research.google.com/github/Krankile/ensemble_forecasting/blob/main/notebooks/weight_net/weight_net.ipynb" target="_parent"><img src="https://colab.research.google.com/assets/colab-badge.svg" alt="Open In Colab"/></a>

##Setup

In [35]:
%%capture
!pip install pytorch-forecasting kora wandb

Go here to find wandb API key:

[https://wandb.ai/settings](https://wandb.ai/settings)

In [36]:
import wandb
wandb.login()

from kora import drive
drive.link_nbs()



In [37]:
import os
import copy
import math
import random
from multiprocessing import cpu_count
from pathlib import Path
from collections import namedtuple

import numpy as np
import pandas as pd

from tqdm import tqdm
from sklearn.utils import shuffle
from sklearn.preprocessing import MinMaxScaler , minmax_scale, StandardScaler
import matplotlib.pyplot as plt

import torch
from torch import nn, optim
import torch.nn.functional as F
from torch.utils.data import Dataset, DataLoader

from pytorch_forecasting.metrics import SMAPE

In [38]:
ROOT = Path("/content/drive/MyDrive/Master, Ankile og Krange")
ROOT

PosixPath('/content/drive/MyDrive/Master, Ankile og Krange')

In [39]:
device = torch.device("cuda" if torch.cuda.is_available() else "cpu")
device

device(type='cuda')

## Define loss functions

In [40]:
def mase(pred, actual, *args):
    divs, *_ = args
    return torch.div(nn.functional.l1_loss(pred, actual, reduction="none").sum(1), divs).mean()

def smape(pred, actual, *args):
    *_, h = args
    return 200 * torch.div(((pred - actual).abs() / (pred.abs() + actual.abs() + 1e-40)).sum(1), h).mean()

def owa(pred, actual, *args):
    _, n_smape, n_mase, __ = args
    return 0.5*(torch.div(smape(pred, actual, *args), n_smape.mean()) + torch.div(mase(pred, actual, *args), n_mase.mean()))

class NoneScheduler:
    def __init__(self, *args, **kwargs):
        pass
    
    def step(self):
        pass

activations = {
    "relu": nn.ReLU, 
    "elu": nn.ELU, 
    "leaky": nn.LeakyReLU, 
}

optimizers = {
    "adam": torch.optim.Adam,
    "sgd": torch.optim.SGD,
    "adamw": torch.optim.AdamW,
}

loss_functions = {
    "smape": smape,
    "mse": nn.MSELoss().to(device),
    "mase": mase,
    "owa": owa,
}

schedulers = {
    "onecyclelr": optim.lr_scheduler.OneCycleLR,
    None: NoneScheduler
}

scalers = {
    "minmax": MinMaxScaler(feature_range=(-1,1)),
    "standard": StandardScaler(),
}

#Build net

In [41]:
class WeightNet(nn.Module):

    def __init__(self, num_cont, out_size, n_hidden, hidden_dim, dropout, bn, activation, emb_dims):
        super().__init__()

        self.embeddings = nn.ModuleList([nn.Embedding(x, y) for x, y in emb_dims])
        self.num_embs = sum([y for x, y in emb_dims])
        self.num_cont = num_cont

        layers = [nn.Linear(self.num_embs + self.num_cont, hidden_dim)]

        self.first_bn = nn.BatchNorm1d(self.num_cont)

        for i in range(n_hidden):
            layers.extend(
                [nn.Dropout(p=dropout)]
                +[nn.BatchNorm1d(hidden_dim)] if bn else []
                +[activations[activation]()]
            )
            if i == (n_hidden - 1):
                layers.append(nn.Linear(hidden_dim, out_size))
            else:
                layers.append(nn.Linear(hidden_dim, hidden_dim))

        self.fc = nn.Sequential(*layers)
        self.softmax = nn.Softmax(dim=1)

    def forward(self, cat, cont):
        x = [emb(cat[:, i]) for i, emb in enumerate(self.embeddings)]
        x = torch.cat(x, 1)

        cont = self.first_bn(cont)
        
        x = torch.cat([x, cont], 1) 
        x = self.fc(x)
        x = self.softmax(x)
        return x

#Create training loop

## Feature extractor
Load the training dataframe of size 90,000 x 527

In [42]:
 def feature_extractor(df, manual_auto_tp_toggle, normalization, n_models):
     
    batch_size = df.shape[0]
    idxs = df.index.to_numpy()

    #Get forecasts
    forecasts = df.loc[:, "auto_arima_forec_0":"quant_99_reg_47"]

    #Get feature inputs 
    if manual_auto_tp_toggle == "":
        raise Exception("Manual_or_auto_toggle needs to cointain either m or a for input to be non-empty")


    inputs_start = "x_acf1" if "m" in manual_auto_tp_toggle.lower() else "lstm_0"
    inputs_end = "lstm_31" if "a" in manual_auto_tp_toggle.lower() else "series_length"
    
    inputs = df.loc[:, inputs_start:inputs_end]
    
    inputs_cat = df.loc[:, ['type', 'period']].astype("category")
    emb_dims = [(x, min(x // 2, 50)) for x in map(lambda y: len(inputs_cat[y].cat.categories), inputs_cat)]
    
    for col in inputs_cat:
        inputs_cat[col] = inputs_cat[col].cat.codes

    inputs_cat = torch.as_tensor(inputs_cat.to_numpy(), dtype=torch.long)

    scaler = scalers[normalization]
    inputs_normalized = scaler.fit_transform(inputs.to_numpy())

    #Get actuals 
    actuals = df.loc[:, "actual_0":"actual_47"].to_numpy()
    forecasts = forecasts.to_numpy().reshape((batch_size, n_models, 48)).swapaxes(1, 2)

    return (inputs_cat, emb_dims), inputs_normalized, forecasts, actuals

## Dataset

In [43]:
class M4Data(Dataset):
    
    def __init__(self, meta_path, loss_path, manual_or_auto_toggle, n_models, type_of_normalization="standard"):
        meta_df = pd.read_feather(meta_path).set_index("index").replace(np.nan, 0)
        loss_df = pd.read_feather(loss_path).set_index("st").loc[meta_df.index]

        self.h = meta_df["h"].astype(np.int16)
        self.divs = loss_df["mase_divisor"]
        self.n_smape = loss_df["naive2_smape"]
        self.n_mase = loss_df["naive2_mase"]

        self.index = meta_df.index.values
        self.length = meta_df.shape[0]
  
        (self.cats, emb_dims), self.input, self.forecast, self.actuals = feature_extractor(meta_df, manual_or_auto_toggle, type_of_normalization, n_models)

        self.num_cont = self.input.shape[1]
        self.emb_dims = emb_dims

    def __len__(self):
        return self.length
    
    def __getitem__(self, idx):
        return self.cats[idx], self.input[idx], self.forecast[idx], self.actuals[idx], self.divs[idx], self.n_smape[idx], self.n_mase[idx], self.h[idx]

In [44]:
def get_dataloaders(train_path, val_path, loss_train_path, loss_val_path, batch_size, manual_or_auto_toggle, n_models, normalize="standard"):
    
    cpus = cpu_count()
    print(f"CPU count: {cpus}")
    train_data = M4Data(train_path, loss_train_path, manual_or_auto_toggle, n_models, normalize)
    val_data = M4Data(val_path, loss_val_path, manual_or_auto_toggle, n_models, normalize)

    train_loader = DataLoader(train_data, batch_size=batch_size, shuffle=True, num_workers=cpus, drop_last=True)
    val_loader = DataLoader(val_data, batch_size=batch_size, shuffle=False, num_workers=cpus)

    return train_loader, val_loader, train_data.emb_dims, train_data.num_cont, train_data.length

## Training loop

In [45]:
def train_model(model, train_loader, val_loader, num_examples, conf):    
    batch_size = conf.batch_size

    optimizer = optimizers[conf.optimizer](model.parameters(), lr=conf.learning_rate, weight_decay=conf.weight_decay)
    scheduler = schedulers[conf.schedule](
        optimizer, conf.learning_rate,
        epochs=conf.epochs,
        steps_per_epoch=math.ceil(num_examples / batch_size),
    )

    loss_func = loss_functions[conf.loss_func]
    train_loss_plot = []
    val_loss_plot = []
    it = tqdm(range(1, conf.epochs+1))
    
    best_loss = float("inf")
    step = 0

    for epoch in it:

        #Each epoch has a training and validation phase
        train_losses = []
        val_losses = []
        for phase in ['train','val']:
            if phase == 'train':
                model.train()  # Set model to training mode
                batches = train_loader
            else:
                model.eval()  # Set model to evaluate mode
                batches = val_loader
            for i, tensors in enumerate(batches):
                cats, inputs, forecasts, actuals, *loss_args = map(lambda x: x.to(device), tensors)
                optimizer.zero_grad()

                y_pred = model(cats, inputs).unsqueeze(2)

                prediction = torch.matmul(forecasts, y_pred).squeeze(2)
                loss = loss_func(prediction, actuals, *loss_args)
                if phase == 'train':
                    train_losses.append(loss.item())
                    loss.backward()

                    optimizer.step()
                    scheduler.step()
                    
                    step += 1
                else:
                    val_losses.append(loss.item())

        train_loss = np.mean(train_losses)
        val_loss = np.mean(val_losses)

        if val_loss < best_loss: 
            best_loss = val_loss
            best_model_wts = copy.deepcopy(model.state_dict())

            filepath = "model.torch"
            torch.save(best_model_wts, filepath)
            wandb.save(filepath)

        train_mean = np.mean(train_loss)
        val_mean = np.mean(val_loss)
        wandb.log({"train_loss": train_loss, "val_loss": val_loss, "epoch": epoch, "best_loss": best_loss, "n_examples":batch_size*step, "lr": optimizer.param_groups[0]["lr"]}, step=step)
        it.set_postfix({"train_loss": train_mean, "val_loss": val_mean, "best_loss": best_loss, "lr": f'{optimizer.param_groups[0]["lr"]:.2e}'})
        
        train_loss_plot.append(train_mean)
        val_loss_plot.append(val_mean)

    model.load_state_dict(best_model_wts)
    return model.eval()

##Begin training

In [46]:
def train(config=None, project=None, entity=None, enablewb=True):
    mode = "online" if enablewb else "disabled"
    config = config if config is not None else defaultconfig
    with wandb.init(config=config, project=project, entity=entity, job_type="training", mode=mode) as run:
        confdict = {**config, **wandb.config}
        conf = namedtuple("Config", confdict.keys())(*confdict.values())
        print(conf)

        train_path = ROOT / 'Data/Meta/m4_meta_am_train_14m.feather'
        val_path = ROOT / 'Data/Meta/m4_meta_am_val_14m.feather'
        loss_train_path = ROOT / 'Data/loss_functions/loss_func_train.feather'
        loss_val_path = ROOT / 'Data/loss_functions/loss_func_val.feather'

        (train_loader,
         val_loader,
         emb_dims,
         num_cont,
         num_examples) = get_dataloaders(train_path,val_path,
                                         loss_train_path, loss_val_path,
                                         conf.batch_size, conf.manual_or_auto_toggle,
                                         conf.num_models, conf.normalize_data)

        model = WeightNet(
            num_cont=num_cont,
            out_size=conf.num_models,
            n_hidden=conf.n_hidden,
            hidden_dim=conf.hidden_dim,
            dropout=conf.dropout,
            bn=conf.bn,
            activation=conf.act,
            emb_dims=emb_dims,
        )

        print(f"Moving model to device: {device}")
        model = model.to(device)

        model = train_model(
            model,
            train_loader,
            val_loader,
            num_examples,
            conf=conf,
        )
    return model
    

## Run config

In [47]:
defaultconfig = dict(
    epochs=500,
    hidden_dim=512,
    learning_rate=2e-3,
    architecture="weight_net_v04",
    batch_size=2048,
    optimizer="adamw",
    loss_func="owa",
    dropout=0.6,
    manual_or_auto_toggle="ma",
    normalize_data="standard",
    weight_decay=0.05,
    act="relu",
    bn=False,
    n_hidden=3,
    num_models=14,
    schedule=None,
    categorical_vars=True,
    meta_vars=True,
)

## Start run

In [48]:
sweep = False

if sweep:
    count = 500 # number of runs to execute
    wandb.agent("krankile/weight-net/qvmej9jx", function=train, count=count)
else:
    train(config=defaultconfig, project="weight-net", entity="Krankile", enablewb=False)

Config(epochs=500, hidden_dim=512, learning_rate=0.002, architecture='weight_net_v04', batch_size=2048, optimizer='adamw', loss_func='owa', dropout=0.6, manual_or_auto_toggle='ma', normalize_data='standard', weight_decay=0.05, act='relu', bn=False, n_hidden=3, num_models=14, schedule=None, categorical_vars=True, meta_vars=True)
CPU count: 2
Moving model to device: cuda


  2%|▏         | 8/500 [00:42<43:14,  5.27s/it, train_loss=0.763, val_loss=0.796, best_loss=0.793, lr=2.00e-03]


KeyboardInterrupt: ignored

#Run a net over test data and get sMAPE, OWA, and MAE loss

Load test data

In [None]:
test_df = pd.read_feather("/content/drive/MyDrive/Master, Ankile og Krange/Data/Meta/m4_meta_am_test.feather")

Load the net from wandb

Make the loss loop 

In [None]:
def test_loss(df, runpath, loss_function, use_meta):
    root = "/content/drive/MyDrive/Master, Ankile og Krange/"

    modelpath = root + runpath + "model.torch"
    configpath = root + runpath + "config.yaml"

    params = dict(
        config=configpath,
        project="lstm-vae",
        entity="krankile",
        job_type="smape-testset",
        mode="disabled",
    )

    inputs, forecasts, actuals, mask = feature_extractor(df, "am", "standard", use_meta=use_meta)

    with wandb.init(**params) as run:
        conf = run.config

        model = Dense_net(
            in_size=inputs.shape[1],
            out_size=9, #len(config.models),
            dropout_1=conf.dropout_1,
            dropout_2=conf.dropout_2,
            hidden_dim1=conf.hidden_dim1,
            hidden_dim2=conf.hidden_dim2,
        )

    print(modelpath)
    model.load_state_dict(torch.load(modelpath))
    model = model.eval()
    print(model)


    model = model.to(device)
    
    inputs = torch.Tensor(inputs).to(device)
    forecasts = torch.Tensor(forecasts).to(device)
    actuals = torch.Tensor(actuals).to(device)
    mask = mask.to(device)

    y_pred = model(inputs).unsqueeze(2)  # Array containing tensors of weighted average for all forecasts

    normalization_weights = actuals[:,0:1]

    normalized_actuals = actuals / normalization_weights

    prediction = torch.matmul(forecasts, y_pred).squeeze(2) / normalization_weights

    loss = loss_function(prediction.masked_select(mask), normalized_actuals.masked_select(mask))

    return loss

In [None]:
runpath = "Models/211109_weightnet_robust_sweep_97/"

loss_function = loss_functions["sm"]

losses = test_loss(test_df, runpath, loss_function, use_meta=True)

In [None]:
losses.mean().item() * 100

robust smape 11.787387728691101
desert smape

#Outdated 

In [None]:
net_untrained = Dense_net(42,9)
plt.rcParams["figure.figsize"] = (16,8)
def plot_some_forecast_and_actuals(net, n_plots):
    
    for i in random.sample(range(5000), k=n_plots): 
      
      inputs, forecasts, actuals, mask = feature_extractor(meta_train.iloc[[i]], "m", "minmax")
      y_pred = net(inputs).unsqueeze(2)
      y_pred_2 = net_untrained(inputs).unsqueeze(2)
      
      #print("trained", y_pred)
      #print("untrained", y_pred_2)

      
      method_forecast_tup = []

      predictions = torch.matmul(forecasts, y_pred).squeeze(2)
      predictions_un = torch.matmul(forecasts, y_pred_2).squeeze(2)

      plt.title(i)
      plt.plot(predictions.detach().numpy()[0], label="prediction")
      plt.plot(predictions_un.detach().numpy()[0], label="Untrained prediction")
      plt.plot(actuals.detach().numpy()[0], label="actual")
      plt.legend()
      plt.show()

plot_some_forecast_and_actuals(net, 3)