# PyTorch blend model
Each model will have 4 features:
1. torch.cos(3 * (azi - 0.15788))**50
2. xyz

Use `1 - CosineSimilarity` for loss

In [1]:
import pandas as pd
import numpy as np
import matplotlib.pyplot as plt
from scipy.optimize import minimize
from sklearn.model_selection import KFold

import torch
import torch.nn as nn
from torch.utils.data import Dataset, DataLoader

import pytorch_lightning as pl
from transformers import get_cosine_schedule_with_warmup

In [2]:
oofs = [
    "/mnt/storage_dimm2/kaggle_output/icecube-neutrinos-in-deep-ice/20230323-102724/DynEdge/fold_0/oofs.parquet",
    "/mnt/storage_dimm2/kaggle_output/icecube-neutrinos-in-deep-ice/20230409-080525/DynEdge/fold_0/oofs.parquet",
    "/mnt/storage_dimm2/kaggle_output/icecube-neutrinos-in-deep-ice/20230405-063040/GPS/fold_0/oofs.parquet",
]

In [3]:
oof_df = {}

for i, oof in enumerate(oofs):
    df = pd.read_parquet(oof)
    oof_df[f"model_{i}_azi"] = df["azimuth"]
    oof_df[f"model_{i}_zen"] = df["zenith"]
    
oof_df["azimuth_gt"] = df["azimuth_gt"]
oof_df["zenith_gt"] = df["zenith_gt"]

oof_df = pd.DataFrame(oof_df)

In [4]:
class StackDataset(Dataset):
    def __init__(self, df):
        self.df = df
        self.num_models = len(df.columns) // 2
        self.train = False
        
        if "azimuth_gt" in df.columns:
            self.num_models -= 1
            self.train = True
        
    def __len__(self):
        return len(self.df)
    
    def angles_to_xyz(self, azimuth, zenith):
        x = torch.cos(azimuth) * torch.sin(zenith)
        y = torch.sin(azimuth) * torch.sin(zenith)
        z = torch.cos(zenith)
        return torch.cat([x, y, z])
    
    def __getitem__(self, idx):
        obs = self.df.iloc[idx]
        features = []
        
        for i in range(self.num_models):
            azi = torch.tensor([obs[f"model_{i}_azi"]], dtype=torch.float32)
            zen = torch.tensor([obs[f"model_{i}_zen"]], dtype=torch.float32)
            xyz = self.angles_to_xyz(azi, zen)
            periodic = (torch.cos(3 * (azi - 0.15788))**50)
            features.extend([xyz, periodic])
            
        features = torch.cat(features)
        
        if self.train:
            azi_gt = torch.tensor([obs["azimuth_gt"]], dtype=torch.float32)
            zen_gt = torch.tensor([obs["zenith_gt"]], dtype=torch.float32)
            target = self.angles_to_xyz(azi_gt, zen_gt)
        else:
            target = None
            
        return features, target

In [5]:
def create_folds(data, n_splits=5, random_state=48):
    kf = KFold(n_splits=n_splits, shuffle=True, random_state=random_state)
    data["fold"] = -1

    for f, (t_, v_) in enumerate(kf.split(X=data)):
        data.loc[v_, "fold"] = f

    return data


class StackDataModule(pl.LightningDataModule):

    def __init__(self, data, batch_size: int = 32, num_workers: int = 8):
        super().__init__()
        self.batch_size = batch_size
        self.num_workers = num_workers
        self.data = create_folds(data)
        self.train_steps = 0

    def setup(self, stage=None, fold_n: int = 0):

        if stage == "fit" or stage == "predict":
            trn_df = self.data.query(f"fold != {fold_n}")
            val_df = self.data.query(f"fold == {fold_n}")
            del trn_df["fold"]
            del val_df["fold"]

            self.train_ds = StackDataset(trn_df)
            self.valid_ds = StackDataset(val_df)
            self.train_steps = len(self.train_ds) / self.batch_size
            print(len(self.train_ds), "train and", len(self.valid_ds),
                  "valid samples")

    def train_dataloader(self):
        return DataLoader(
            self.train_ds,
            num_workers=self.num_workers,
            batch_size=self.batch_size,
            shuffle=True,
            drop_last=True,
            pin_memory=True,
            # persistent_workers=True,
        )

    def val_dataloader(self):
        return DataLoader(
            self.valid_ds,
            batch_size=self.batch_size,
            num_workers=self.num_workers,
            pin_memory=True,
            # persistent_workers=True,
        )

    def predict_dataloader(self):
        return DataLoader(
            self.valid_ds,
            batch_size=self.batch_size,
            num_workers=self.num_workers,
            pin_memory=True,
        )

In [6]:
def angular_dist_score(y_pred, y_true):
    """
    calculate the MAE of the angular distance between two directions.
    The two vectors are first converted to cartesian unit vectors,
    and then their scalar product is computed, which is equal to
    the cosine of the angle between the two vectors. The inverse
    cosine (arccos) thereof is then the angle between the two input vectors

    # https://www.kaggle.com/code/sohier/mean-angular-error

    Parameters:
    -----------

    y_pred : float (torch.Tensor)
        Prediction array of [N, 2], where the second dim is azimuth & zenith
    y_true : float (torch.Tensor)
        Ground truth array of [N, 2], where the second dim is azimuth & zenith

    Returns:
    --------

    dist : float (torch.Tensor)
        mean over the angular distance(s) in radian
    """

    az_true = y_true[:, 0]
    zen_true = y_true[:, 1]

    az_pred = y_pred[:, 0]
    zen_pred = y_pred[:, 1]

    # pre-compute all sine and cosine values
    sa1 = torch.sin(az_true)
    ca1 = torch.cos(az_true)
    sz1 = torch.sin(zen_true)
    cz1 = torch.cos(zen_true)

    sa2 = torch.sin(az_pred)
    ca2 = torch.cos(az_pred)
    sz2 = torch.sin(zen_pred)
    cz2 = torch.cos(zen_pred)

    # scalar product of the two cartesian vectors (x = sz*ca, y = sz*sa, z = cz)
    scalar_prod = sz1 * sz2 * (ca1 * ca2 + sa1 * sa2) + (cz1 * cz2)

    # scalar product of two unit vectors is always between -1 and 1, this is against nummerical instability
    # that might otherwise occure from the finite precision of the sine and cosine functions
    scalar_prod = torch.clamp(scalar_prod, -1, 1)

    # convert back to an angle (in radian)
    return torch.mean(torch.abs(torch.arccos(scalar_prod)))

In [7]:
def add_weight_decay(
    model,
    weight_decay=1e-5,
    skip_list=("bias", "bn", "LayerNorm.bias", "LayerNorm.weight"),
):
    decay = []
    no_decay = []
    for name, param in model.named_parameters():
        if not param.requires_grad:
            continue  # frozen weights
        if len(param.shape) == 1 or name.endswith(".bias") or name in skip_list:
            no_decay.append(param)
        else:
            decay.append(param)
    return [
        {"params": no_decay, "weight_decay": 0.0},
        {"params": decay, "weight_decay": weight_decay},
    ]

In [8]:
class StackModel(pl.LightningModule):

    def __init__(
        self,
        nb_models: int = 3,
        model_name: str = "StackModel",
        learning_rate: float = 0.0001,
        weight_decay: float = 0.01,
        eps: float = 1e-8,
        warmup: float = 0.0,
        T_max: int = 1000,
        **kwargs,
    ):
        super().__init__()

        self.save_hyperparameters()

        self.loss_fn = nn.CosineSimilarity()

        self.model = nn.Sequential(
            nn.Linear(nb_models * 4, 128),
            nn.ReLU(),
            nn.Linear(128, 128),
            nn.ReLU(),
            nn.Linear(128, 3),
        )

    def forward(self, x):
        return self.model(x)

    def xyz_to_angles(self, xyz):
        x = xyz[:, 0]
        y = xyz[:, 1]
        z = xyz[:, 2]
        r = torch.sqrt(x**2 + y**2 + z**2)

        zen = torch.arccos(z / r)
        azi = torch.arctan2(y, x)

        return torch.stack([azi, zen], dim=1)

    def training_step(self, batch, batch_idx):
        features, target = batch
        pred_xyz = self.forward(features)

        loss = 1 - self.loss_fn(pred_xyz, target).mean()
        return {"loss": loss}

    def validation_step(self, batch, batch_idx):
        features, target = batch
        pred_xyz = self.forward(features)

        loss = 1 - self.loss_fn(pred_xyz, target).mean()

        pred_angles = self.xyz_to_angles(pred_xyz)
        target_angles = self.xyz_to_angles(target)
        metric = angular_dist_score(pred_angles, target_angles)

        self.log(
            "metric",
            metric,
            prog_bar=True,
            sync_dist=True,
            on_epoch=True,
#             batch_size=self.hparams.batch_size,
        )

    def configure_optimizers(self):
        parameters = add_weight_decay(
            self,
            self.hparams.weight_decay,
            skip_list=["bias", "LayerNorm.bias"],  # , "LayerNorm.weight"],
        )

        opt = torch.optim.AdamW(parameters,
                                lr=self.hparams.learning_rate,
                                eps=self.hparams.eps)

        sch = get_cosine_schedule_with_warmup(
            opt,
            num_warmup_steps=int(self.hparams.warmup * self.hparams.T_max),
            num_training_steps=self.hparams.T_max,
            num_cycles=0.5,  # 1,
            last_epoch=-1,
        )

        return {
            "optimizer": opt,
            "lr_scheduler": {
                "scheduler": sch,
                "interval": "step"
            },
        }

# Train model

In [9]:
dm = StackDataModule(oof_df, batch_size=512)
dm.setup(stage="fit", fold_n=0)

epochs = 5
t_max = dm.train_steps * epochs

model = StackModel(len(oofs), T_max=t_max)

trainer = pl.Trainer(max_epochs=epochs)

trainer.fit(model, datamodule=dm)

GPU available: True (cuda), used: False
TPU available: False, using: 0 TPU cores
IPU available: False, using: 0 IPUs
HPU available: False, using: 0 HPUs


1056768 train and 264192 valid samples


  rank_zero_warn(

  | Name    | Type             | Params
---------------------------------------------
0 | loss_fn | CosineSimilarity | 0     
1 | model   | Sequential       | 18.6 K
---------------------------------------------
18.6 K    Trainable params
0         Non-trainable params
18.6 K    Total params
0.074     Total estimated model params size (MB)


1056768 train and 264192 valid samples


Sanity Checking: 0it [00:00, ?it/s]

Training: 0it [00:00, ?it/s]

Validation: 0it [00:00, ?it/s]

Validation: 0it [00:00, ?it/s]

Validation: 0it [00:00, ?it/s]

Validation: 0it [00:00, ?it/s]

Validation: 0it [00:00, ?it/s]

`Trainer.fit` stopped: `max_epochs=5` reached.
