In [4]:
%load_ext autoreload
%autoreload 2
import sys
sys.path.append('/opt/slh/icecube/')

import os
os.environ["CUDA_VISIBLE_DEVICES"]="1,0"
#os.environ["CUDA_VISIBLE_DEVICES"]="1"
#os.environ["NCCL_P2P_DISABLE"] = "1"

In [5]:
import polars as pl
import pandas as pd
import gc
import os
import numpy as np
from icecube.fastai_fix import *
from tqdm.notebook import tqdm
from icecube.data_train_v3 import RandomChunkSampler,LenMatchBatchSampler,IceCubeCache, DeviceDataLoader
from icecube.loss import loss, loss_vms
from icecube.models import EncoderWithDirectionReconstructionV18
from fastxtend.vision.all import EMACallback

  warn(f"Failed to load image Python extension: {e}")


[1;34mgraphnet[0m: [32mINFO    [0m 2023-03-27 22:32:01 - get_logger - Writing log to [1mlogs/graphnet_20230327-223201.log[0m


In [6]:
OUT = 'V18FT2_64'
PATH = '../data/'
SELECTION = 'total'

NUM_WORKERS = 24
SEED = 2023
bs = 1024-256
L = 196

def seed_everything(seed):
    random.seed(seed)
    os.environ['PYTHONHASHSEED'] = str(seed)
    np.random.seed(seed)
    torch.manual_seed(seed)
    torch.cuda.manual_seed(seed)
    torch.backends.cudnn.deterministic = True
    torch.backends.cudnn.benchmark = True

seed_everything(SEED)
os.makedirs(OUT, exist_ok=True)

In [7]:
def WrapperAdamW(param_groups,**kwargs):
    return OptimWrapper(param_groups,torch.optim.AdamW)



In [8]:
fname = OUT

ds_train = IceCubeCache(PATH, mode='train', L=L, selection=SELECTION,reduce_size=0.125)
ds_train_len = IceCubeCache(PATH, mode='train', L=L, reduce_size=0.125, selection=SELECTION, mask_only=True)
sampler_train = RandomChunkSampler(ds_train_len, chunks=ds_train.chunks)
len_sampler_train = LenMatchBatchSampler(sampler_train, batch_size=bs, drop_last=True)
dl_train = DeviceDataLoader(torch.utils.data.DataLoader(ds_train, 
            batch_sampler=len_sampler_train, num_workers=4, persistent_workers=True))

ds_val = IceCubeCache(PATH, mode='eval', L=L, selection=SELECTION)
ds_val_len = IceCubeCache(PATH, mode='eval', L=L, selection=SELECTION, mask_only=True)
sampler_val = torch.utils.data.SequentialSampler(ds_val_len)
len_sampler_val = LenMatchBatchSampler(sampler_val, batch_size=bs, drop_last=False)
dl_val= DeviceDataLoader(torch.utils.data.DataLoader(ds_val, batch_sampler=len_sampler_val,
            num_workers=0))


data = DataLoaders(dl_train,dl_val)
model = EncoderWithDirectionReconstructionV18()
model.load_state_dict(torch.load('/opt/slh/icecube/hb_training_loop/V18FT2/models/model_0.pth'))
model = nn.DataParallel(model)
model = model.cuda()
learn = Learner(data, model,  path = OUT, loss_func=loss_vms,cbs=[GradientClip(3.0),CSVLogger(),EMACallback(),
            SaveModelCallback(monitor='loss',comp=np.less,every_epoch=True),
            GradientAccumulation(n_acc=4096//bs)],
            metrics=[loss], opt_func=partial(WrapperAdamW,eps=1e-7)).to_fp16()




In [9]:
ds_train[0]

({'sensor_id': tensor([3918, 4157, 3520, 5041, 2948,  860, 2440, 1743, 3609, 5057, 5057, 2977,
          5059, 3496, 3161, 2959, 1397, 1970, 3387, 1583, 1940, 1241,  558,  557,
          1405,  557,  558,  557,  557, 3050,  553,  972,  973, 2261,  975,  560,
           554, 3276, 4831, 4571, 3520, 3699,  300,  613, 3438, 2421, 3609, 3115,
          5057, 4528, 3496, 2448, 3289, 3050, 4904, 1970, 3452,   48, 3267, 3267,
           104,    0,    0,    0]),
  'time': tensor([-0.1357, -0.1295, -0.1169, -0.1112, -0.0649, -0.0625, -0.0572, -0.0507,
          -0.0476, -0.0440, -0.0426, -0.0418, -0.0044, -0.0008,  0.0086,  0.0214,
           0.0283,  0.0324,  0.0341,  0.0362,  0.0406,  0.0472,  0.0734,  0.0735,
           0.0738,  0.0742,  0.0780,  0.0792,  0.0812,  0.0857,  0.0869,  0.0908,
           0.0924,  0.0924,  0.0939,  0.1027,  0.1087,  0.1334,  0.1402,  0.1428,
           0.1508,  0.1533,  0.1736,  0.1781,  0.1867,  0.1878,  0.1884,  0.1974,
           0.1989,  0.2005,  0.2094,  0.2