<a href="https://colab.research.google.com/github/Krankile/ensemble-forecasting/blob/main/weight_net.ipynb" target="_parent"><img src="https://colab.research.google.com/assets/colab-badge.svg" alt="Open In Colab"/></a>

##Setup

In [1]:
%%capture
!pip install pytorch-forecasting kora wandb

Go here to find wandb API key:

[https://wandb.ai/settings](https://wandb.ai/settings)

In [2]:
import wandb
wandb.login()

from kora import drive
drive.link_nbs()

<IPython.core.display.Javascript object>

[34m[1mwandb[0m: Appending key for api.wandb.ai to your netrc file: /root/.netrc


Mounted at /content/drive


In [3]:
import os
import copy
import math
import random
from multiprocessing import cpu_count
from pathlib import Path
from collections import namedtuple

import numpy as np
import pandas as pd

from tqdm.notebook import tqdm
from sklearn.utils import shuffle
from sklearn.preprocessing import MinMaxScaler , minmax_scale, StandardScaler
import matplotlib.pyplot as plt

import torch
from torch import nn, optim
import torch.nn.functional as F
from torch.utils.data import Dataset, DataLoader

from pytorch_forecasting.metrics import SMAPE, MAPE, MASE

In [4]:
ROOT = Path("/content/drive/MyDrive/12 – Master, Ankile og Krange")
ROOT

PosixPath('/content/drive/MyDrive/12 – Master, Ankile og Krange')

In [5]:
device = torch.device("cuda" if torch.cuda.is_available() else "cpu")
device

device(type='cuda')

#Build net

In [6]:
class NoneScheduler:
    def __init__(self, *args, **kwargs):
        pass
    
    def step(self):
        pass

In [7]:
activations = {
    "relu": nn.ReLU, 
    "elu": nn.ELU, 
    "leaky": nn.LeakyReLU, 
}

optimizers = {
    "adam": torch.optim.Adam,
    "sgd": torch.optim.SGD,
    "adamw": torch.optim.AdamW,
}

loss_functions = {
    "sm":SMAPE(reduction="mean").loss,
    "c":nn.CrossEntropyLoss().to(device),
    "mse":nn.MSELoss().to(device),
    "mase":MASE(reduction="mean").loss,
}

schedulers = {
    "onecyclelr": optim.lr_scheduler.OneCycleLR,
    None: NoneScheduler
}

scalers = {
    "minmax": MinMaxScaler(feature_range=(-1,1)),
    "standard": StandardScaler(),
}

In [8]:
class WeightNet(nn.Module):

    def __init__(self, num_cont, out_size, n_hidden, hidden_dim, dropout, bn, activation, emb_dims):
        super().__init__()

        self.embeddings = nn.ModuleList([nn.Embedding(x, y) for x, y in emb_dims])
        self.num_embs = sum([y for x, y in emb_dims])
        self.num_cont = num_cont

        layers = [nn.Linear(self.num_embs + self.num_cont, hidden_dim)]

        self.first_bn = nn.BatchNorm1d(self.num_cont)

        for i in range(n_hidden):
            layers.extend(
                [nn.Dropout(p=dropout)]
                +[nn.BatchNorm1d(hidden_dim)] if bn else []
                +[activations[activation]()]
            )
            if i == (n_hidden - 1):
                layers.append(nn.Linear(hidden_dim, out_size))
            else:
                layers.append(nn.Linear(hidden_dim, hidden_dim))

        self.fc = nn.Sequential(*layers)
        self.softmax = nn.Softmax(dim=1)

    def forward(self, cat, cont):
        x = [emb(cat[:, i]) for i, emb in enumerate(self.embeddings)]
        x = torch.cat(x, 1)

        cont = self.first_bn(cont)
        
        x = torch.cat([x, cont], 1) 
        x = self.fc(x)
        x = self.softmax(x)
        return x

#Create training loop

Define the OWA loss function from the M4 competition

In [9]:
def OWA(naive_pred, actual): #Creates a new metric object which has the .loss(y_pred, actual) as a function
  OwA = 0.5*(SMAPE()/SMAPE().loss(naive_pred, actual) + MAPE()/MAPE().loss(naive_pred, actual))
  return OwA

Load the training dataframe of size 90,000 x 527

In [10]:
def feature_extractor(df, manual_auto_tp_toggle, normalization):

    batch_size = df.shape[0]

      mask = []
       for h in df.h:
            mask.append([1]*int(h) + [0]*(48-int(h)))
        mask = torch.BoolTensor(mask)

        # Get forecasts
        forecasts = df.loc[:, "auto_arima_forec_0":"snaive_forec_47"]

        # Get feature inputs
        if manual_auto_tp_toggle == "":
            raise Exception(
                "Manual_or_auto_toggle needs to cointain either m or a for input to be non-empty")

        inputs_start = "x_acf1" if "m" in manual_auto_tp_toggle.lower() else "lstm_0"
        inputs_end = "lstm_31" if "a" in manual_auto_tp_toggle.lower() else "series_length"

        inputs = df.loc[:, inputs_start:inputs_end]

        inputs_cat = df.loc[:, ['type', 'period']].astype("category")
        emb_dims = [(x, min(x // 2, 50))
                    for x in map(lambda y: len(inputs_cat[y].cat.categories), inputs_cat)]

        for col in inputs_cat:
            inputs_cat[col] = inputs_cat[col].cat.codes

        inputs_cat = torch.as_tensor(inputs_cat.to_numpy(), dtype=torch.long)

        scaler = scalers[normalization]
        inputs_normalized = scaler.fit_transform(inputs.to_numpy())

        # Get actuals
        actuals = df.loc[:, "actual_0":"actual_47"].to_numpy()
        del df
        forecasts = forecasts.to_numpy().reshape((batch_size, 9, 48)).swapaxes(1, 2)

        # TODO: This is hard coded to (9,48)
        return (inputs_cat, emb_dims), inputs_normalized, forecasts, actuals, mask


## Dataset

In [11]:
class M4Data(Dataset):
    
    def __init__(self, path, manual_or_auto_toggle,type_of_normalization="standard"):
        df = pd.read_feather(path).set_index("index").replace(np.nan, 0)
        self.index = df.index.values      
        self.length = df.shape[0]
  
        (self.cats, emb_dims), self.input, self.forecast, self.actuals, self.mask = feature_extractor(df, manual_or_auto_toggle, type_of_normalization)

        self.num_cont = self.input.shape[1]
        self.emb_dims = emb_dims

    def __len__(self):
        return self.length
    
    def __getitem__(self, idx):
        return self.cats[idx], self.input[idx], self.forecast[idx], self.actuals[idx], self.mask[idx]

In [28]:
def get_dataloaders(train_path, val_path, batch_size, manual_or_auto_toggle, normalize="standard"):
    
    cpus = cpu_count()
    print(f"CPU count: {cpus}")
    train_data = M4Data(train_path, manual_or_auto_toggle,normalize)
    val_data = M4Data(val_path, manual_or_auto_toggle, normalize)

    train_loader = DataLoader(train_data, batch_size=batch_size, shuffle=True, num_workers=cpus, drop_last=True)
    val_loader = DataLoader(val_data, batch_size=batch_size, shuffle=False, num_workers=cpus)

    return train_loader, val_loader, train_data.emb_dims, train_data.num_cont, train_data.length

## Training loop

In [13]:
def train_model(model, train_loader, val_loader, num_examples, conf):    
    batch_size = conf.batch_size

    optimizer = optimizers[conf.optimizer](model.parameters(), lr=conf.learning_rate, weight_decay=conf.weight_decay)
    scheduler = schedulers[conf.schedule](
        optimizer, conf.learning_rate,
        epochs=conf.epochs,
        steps_per_epoch=math.ceil(num_examples / batch_size),
    )

    loss_func = loss_functions[conf.loss_func]
    train_loss_plot = []
    val_loss_plot = []
    it = tqdm(range(1, conf.epochs+1))
    
    best_loss = float("inf")
    step = 0

    for epoch in it:

        #Each epoch has a training and validation phase
        train_losses = []
        val_losses = []
        for phase in ['train','val']:
            if phase == 'train':
                model.train()  # Set model to training mode
                batches = train_loader
            else:
                model.eval()  # Set model to evaluate mode
                batches = val_loader
            for i, (cats, inputs, forecasts, actuals, mask) in enumerate(batches):
                cats = cats.to(device)
                inputs = inputs.to(device)
                forecasts = forecasts.to(device)
                actuals = actuals.to(device)    
                mask = mask.to(device)
                
                optimizer.zero_grad()

                y_pred = model(cats, inputs).unsqueeze(2) #Array containing tensors of weighted average for all forecasts
                normalization_weights = actuals[:,0:1]
                normalized_actuals = actuals/normalization_weights
                prediction = torch.matmul(forecasts, y_pred).squeeze(2) / normalization_weights
                loss = loss_func(prediction.masked_select(mask), normalized_actuals.masked_select(mask)).mean()
                
                if phase == 'train':
                    train_losses.append(loss.item())
                    loss.backward()

                    optimizer.step()
                    scheduler.step()
                    
                    step += 1
                else:
                    val_losses.append(loss.item())

        train_loss = np.mean(train_losses)
        val_loss = np.mean(val_losses)

        if val_loss < best_loss: 
            best_loss = val_loss
            best_model_wts = copy.deepcopy(model.state_dict())

            filepath = "model.torch"
            torch.save(best_model_wts, filepath)
            wandb.save(filepath)

        train_mean = np.mean(train_loss)
        val_mean = np.mean(val_loss)
        wandb.log({"train_loss": train_loss, "val_loss": val_loss, "epoch": epoch, "best_loss": best_loss, "n_examples":batch_size*step, "lr": optimizer.param_groups[0]["lr"]}, step=step)
        it.set_postfix({"train_loss": train_mean, "val_loss": val_mean, "best_loss": best_loss, "lr": f'{optimizer.param_groups[0]["lr"]:.2e}'})
        
        train_loss_plot.append(train_mean)
        val_loss_plot.append(val_mean)

    model.load_state_dict(best_model_wts)
    return model.eval()

##Begin training

In [14]:
def train(config=None, project=None, entity=None, enablewb=True):
    mode = "online" if enablewb else "disabled"
    config = config if config is not None else defaultconfig
    with wandb.init(config=config, project=project, entity=entity, job_type="training", mode=mode) as run:
        confdict = {**config, **wandb.config}
        conf = namedtuple("Config", confdict.keys())(*confdict.values())
        print(conf)

        train_path = ROOT / 'Data/Meta/m4_meta_am_train.feather'
        val_path = ROOT / 'Data/Meta/m4_meta_am_val.feather'

        (train_loader,
         val_loader,
         emb_dims,
         num_cont,
         num_examples) = get_dataloaders(train_path, val_path, conf.batch_size, conf.manual_or_auto_toggle, conf.normalize_data)
        
        wandb.log({"meta_variables": True}, step=0)

        model = WeightNet(
            num_cont=num_cont,
            out_size=9,
            n_hidden=conf.n_hidden,
            hidden_dim=conf.hidden_dim,
            dropout=conf.dropout,
            bn=conf.bn,
            activation=conf.act,
            emb_dims=emb_dims,
        )

        print(f"Moving model to device: {device}")
        model = model.to(device)

        model = train_model(
            model,
            train_loader,
            val_loader,
            num_examples,
            conf=conf,
        )
    return model
    

## Run config

In [29]:
defaultconfig = dict(
    epochs=2_000,
    hidden_dim=512,
    learning_rate=2e-3,
    dataartifact="m4_meta_am_train:v1",
    architecture="weight_net_v03",
    batch_size=2048,
    optimizer="adamw",
    loss_func="sm",
    dropout=0.6,
    early_stop=False,
    manual_or_auto_toggle="ma",
    normalize_data="standard",
    weight_decay=0.05,
    act="relu",
    bn=True,
    n_hidden=3,
    schedule="onecyclelr",
    pct_start=0.3,
    cycle_momentum=True,
    categorical_vars=True,
    three_phase=False,
)

## Start run

In [None]:
sweep = True

if sweep:
    count = 500 # number of runs to execute
    wandb.agent("krankile/weight-net/b8i0flha", function=train, count=count)
else:
    train(config=defaultconfig, project="weight-net", entity="Krankile", enablewb=False)

[34m[1mwandb[0m: Agent Starting Run: rfy0srlj with config:
[34m[1mwandb[0m: 	batch_size: 4096
[34m[1mwandb[0m: 	bn: False
[34m[1mwandb[0m: 	categorical_vars: True
[34m[1mwandb[0m: 	cycle_momentum: True
[34m[1mwandb[0m: 	dropout: 0.256588000459846
[34m[1mwandb[0m: 	hidden_dim: 64
[34m[1mwandb[0m: 	learning_rate: 0.004055140191379214
[34m[1mwandb[0m: 	n_hidden: 4
[34m[1mwandb[0m: 	pct_start: 0.3
[34m[1mwandb[0m: 	three_phase: True
[34m[1mwandb[0m: 	weight_decay: 0.05137334673863219


Config(epochs=2000, hidden_dim=64, learning_rate=0.004055140191379214, dataartifact='m4_meta_am_train:v1', architecture='weight_net_v03', batch_size=4096, optimizer='adamw', loss_func='sm', dropout=0.256588000459846, early_stop=False, manual_or_auto_toggle='ma', normalize_data='standard', weight_decay=0.05137334673863219, act='relu', bn=False, n_hidden=4, schedule='onecyclelr', pct_start=0.3, cycle_momentum=True, categorical_vars=True, three_phase=True)
CPU count: 4
Moving model to device: cuda


  0%|          | 0/2000 [00:00<?, ?it/s]

#Run a net over test data and get sMAPE, OWA, and MAE loss

Load test data

In [None]:
test_df = pd.read_feather("/content/drive/MyDrive/Master, Ankile og Krange/Data/Meta/m4_meta_am_test.feather")

Load the net from wandb

Make the loss loop 

In [None]:
def test_loss(df, runpath, loss_function, use_meta):
    root = "/content/drive/MyDrive/Master, Ankile og Krange/"

    modelpath = root + runpath + "model.torch"
    configpath = root + runpath + "config.yaml"

    params = dict(
        config=configpath,
        project="lstm-vae",
        entity="krankile",
        job_type="smape-testset",
        mode="disabled",
    )

    inputs, forecasts, actuals, mask = feature_extractor(df, "am", "standard", use_meta=use_meta)

    with wandb.init(**params) as run:
        conf = run.config

        model = Dense_net(
            in_size=inputs.shape[1],
            out_size=9, #len(config.models),
            dropout_1=conf.dropout_1,
            dropout_2=conf.dropout_2,
            hidden_dim1=conf.hidden_dim1,
            hidden_dim2=conf.hidden_dim2,
        )

    print(modelpath)
    model.load_state_dict(torch.load(modelpath))
    model = model.eval()
    print(model)


    model = model.to(device)
    
    inputs = torch.Tensor(inputs).to(device)
    forecasts = torch.Tensor(forecasts).to(device)
    actuals = torch.Tensor(actuals).to(device)
    mask = mask.to(device)

    y_pred = model(inputs).unsqueeze(2)  # Array containing tensors of weighted average for all forecasts

    normalization_weights = actuals[:,0:1]

    normalized_actuals = actuals / normalization_weights

    prediction = torch.matmul(forecasts, y_pred).squeeze(2) / normalization_weights

    loss = loss_function(prediction.masked_select(mask), normalized_actuals.masked_select(mask))

    return loss

In [None]:
runpath = "Models/211109_weightnet_robust_sweep_97/"

loss_function = loss_functions["sm"]

losses = test_loss(test_df, runpath, loss_function, use_meta=True)

In [None]:
losses.mean().item() * 100

robust smape 11.787387728691101
desert smape

#Outdated 

In [None]:
net_untrained = Dense_net(42,9)
plt.rcParams["figure.figsize"] = (16,8)
def plot_some_forecast_and_actuals(net, n_plots):
    
    for i in random.sample(range(5000), k=n_plots): 
      
      inputs, forecasts, actuals, mask = feature_extractor(meta_train.iloc[[i]], "m", "minmax")
      y_pred = net(inputs).unsqueeze(2)
      y_pred_2 = net_untrained(inputs).unsqueeze(2)
      
      #print("trained", y_pred)
      #print("untrained", y_pred_2)

      
      method_forecast_tup = []

      predictions = torch.matmul(forecasts, y_pred).squeeze(2)
      predictions_un = torch.matmul(forecasts, y_pred_2).squeeze(2)

      plt.title(i)
      plt.plot(predictions.detach().numpy()[0], label="prediction")
      plt.plot(predictions_un.detach().numpy()[0], label="Untrained prediction")
      plt.plot(actuals.detach().numpy()[0], label="actual")
      plt.legend()
      plt.show()

plot_some_forecast_and_actuals(net, 3)