In [1]:
from icecube.models import IceCubeModelEncoderMAT
from icecube.dataset import HuggingFaceDatasetGraphV0
from icecube.utils import collate_fn_graphv0
from datasets import  load_from_disk
from torch.utils.data import DataLoader
import torch
from pathlib import Path
import pandas as pd
import matplotlib.pyplot as plt
import numpy as np
from torch import nn

  from .autonotebook import tqdm as notebook_tqdm


In [2]:
#md = IceCubeModelEncoderMAT().eval()
#ds = HuggingFaceDatasetGraphV0(load_from_disk('/opt/slh/icecube/data/hf_cashe/batch_1.parquet'))
#dl = DataLoader(ds, batch_size=64, shuffle=True, num_workers=4, collate_fn=collate_fn_graphv0)
#with torch.no_grad():
#    for x in dl:
#        out = md(x)
#        break

In [3]:
def angular_dist_score(az_true, zen_true, az_pred, zen_pred):
    '''
    calculate the MAE of the angular distance between two directions.
    The two vectors are first converted to cartesian unit vectors,
    and then their scalar product is computed, which is equal to
    the cosine of the angle between the two vectors. The inverse 
    cosine (arccos) thereof is then the angle between the two input vectors
    
    Parameters:
    -----------
    
    az_true : float (or array thereof)
        true azimuth value(s) in radian
    zen_true : float (or array thereof)
        true zenith value(s) in radian
    az_pred : float (or array thereof)
        predicted azimuth value(s) in radian
    zen_pred : float (or array thereof)
        predicted zenith value(s) in radian
    
    Returns:
    --------
    
    dist : float
        mean over the angular distance(s) in radian
    '''
    
    if not (np.all(np.isfinite(az_true)) and
            np.all(np.isfinite(zen_true)) and
            np.all(np.isfinite(az_pred)) and
            np.all(np.isfinite(zen_pred))):
        raise ValueError("All arguments must be finite")
    
    # pre-compute all sine and cosine values
    sa1 = np.sin(az_true)
    ca1 = np.cos(az_true)
    sz1 = np.sin(zen_true)
    cz1 = np.cos(zen_true)
    
    sa2 = np.sin(az_pred)
    ca2 = np.cos(az_pred)
    sz2 = np.sin(zen_pred)
    cz2 = np.cos(zen_pred)
    
    # scalar product of the two cartesian vectors (x = sz*ca, y = sz*sa, z = cz)
    scalar_prod = sz1*sz2*(ca1*ca2 + sa1*sa2) + (cz1*cz2)
    
    # scalar product of two unit vectors is always between -1 and 1, this is against nummerical instability
    # that might otherwise occure from the finite precision of the sine and cosine functions
    scalar_prod =  np.clip(scalar_prod, -1, 1)
    
    # convert back to an angle (in radian)
    return np.abs(np.arccos(scalar_prod))

In [4]:
exp_name = 'EXP_07'

!ls {Path('RESULTS/')/exp_name}

EXP_07_0.csv  EXP_07_4.csv  EXP_07_8.csv	  EXP_07_OOF_4.csv.csv
EXP_07_0.pth  EXP_07_4.pth  EXP_07_8.pth	  EXP_07_OOF_5.csv.csv
EXP_07_1.csv  EXP_07_5.csv  EXP_07_9.csv	  EXP_07_OOF_6.csv.csv
EXP_07_1.pth  EXP_07_5.pth  EXP_07_9.pth	  EXP_07_OOF_7.csv.csv
EXP_07_2.csv  EXP_07_6.csv  EXP_07_OOF_0.csv.csv  EXP_07_OOF_8.csv.csv
EXP_07_2.pth  EXP_07_6.pth  EXP_07_OOF_1.csv.csv  EXP_07_OOF_9.csv.csv
EXP_07_3.csv  EXP_07_7.csv  EXP_07_OOF_2.csv.csv
EXP_07_3.pth  EXP_07_7.pth  EXP_07_OOF_3.csv.csv


In [5]:
#function that sort the files by the number in the name and read them
def read_oof_files(exp_name, epoch = 9):
    oof_files = sorted((Path('RESULTS/')/exp_name).glob("*OOF*.csv"), 
                 key = lambda x: int(x.stem.split('_')[-1].split('.')[0]))
    
    oof = pd.read_csv(oof_files[epoch])

    oof['metric'] = angular_dist_score(oof['azimuth_gt'], oof['zenith_gt'], 
                  oof['azimuth_pred'], oof['zenith_pred'])
    return oof

#function that read csv files not OOF and sorted by the number in the name
def read_logs(exp_name):
    files = list((Path('RESULTS/')/exp_name).glob("*.csv"))
    files = [file for file in files if 'OOF' not in file.name]
    files = sorted(files, key = lambda x: int(x.stem.split('_')[-1].split('.')[0]))
    logs = [pd.read_csv(file) for file in files]
    logs = pd.concat(logs)
    return logs

In [6]:
#oof = read_oof_files(exp_name, epoch = 9)

In [7]:
import config

In [8]:
read_logs('EXP_07')

Unnamed: 0,epoch,train_loss,valid_loss,metric
0,0,0.600888,0.56277,1.442924
0,1,0.539214,0.529143,1.333392
0,2,0.52612,0.523607,1.32182
0,3,0.521747,0.519293,1.305128
0,4,0.518047,0.51542,1.293718
0,5,0.514774,0.512305,1.296434
0,6,0.512183,0.512563,1.275622
0,7,0.509884,0.509192,1.283002
0,8,0.507596,0.508071,1.275165
0,9,0.505623,0.505269,1.275138


In [9]:
read_logs('EXP_08')

Unnamed: 0,epoch,train_loss,valid_loss,metric
0,0,0.598318,0.573669,1.463767
0,1,0.551687,0.543076,1.397443
0,2,0.533885,0.523545,1.331168
0,3,0.520183,0.516296,1.303239
0,4,0.515565,0.513794,1.297275


In [31]:
get_config_as_dict(config.BASELINE_EMBED_V3)

{'BATCH_SIZE': 1536,
 'COLLAT_FN': <function icecube.utils.collate_fn_v1(batch)>,
 'DATA_CACHE_DIR': PosixPath('data/hf_cashe'),
 'DEVICE': 'cuda:0',
 'EPOCHS': 10,
 'EXP_NAME': 'EXP_09',
 'FIT_FUNC': <function icecube.utils.fit_shuflle(epochs, model, loss_fn, opt, metric, config, folder='models', exp_name='exp_00', device=None, sched=None, save_md=<class 'icecube.utils.SaveModelEpoch'>)>,
 'FOLDER': PosixPath('RESULTS'),
 'LOSS_FUNC': icecube.models.LogCoshLoss,
 'LR': 0.001,
 'METRIC': <function icecube.utils.get_score(y_hat, y)>,
 'MODEL_NAME': icecube.models.IceCubeModelEncoderSensorEmbeddinngV2,
 'NUM_WORKERS': 8,
 'OPT': torch.optim.adamw.AdamW,
 'PRESISTENT_WORKERS': True,
 'SCHEDULER': <function transformers.optimization.get_cosine_schedule_with_warmup(optimizer: torch.optim.optimizer.Optimizer, num_warmup_steps: int, num_training_steps: int, num_cycles: float = 0.5, last_epoch: int = -1)>,
 'TRN_BATCH_RANGE': (1, 100),
 'TRN_DATASET': icecube.dataset.HuggingFaceDatasetV3,
 'VA