In [1]:
import sys
import os 
sys.path.insert(1, os.path.realpath(os.path.pardir))

import numpy as np
from pathlib import Path

import wandb
import torch
import torch.nn as nn
import torch.nn.functional as F

import pytorch_lightning as pl
from pytorch_lightning.loggers import WandbLogger
from pytorch_lightning.callbacks import ModelCheckpoint

from utils import data_utils
from utils import losses, hand_visualize

from models import HVATNet_v2


## Data preprocessing

In [2]:
class AttrDict(dict):
    def __init__(self, *args, **kwargs):
        super(AttrDict, self).__init__(*args, **kwargs)
        self.__dict__ = self

In [3]:
config = dict(
    WANDB_NOTES = 'HVATNet v3(simple) Matvey + ds 8',
    # data = '/home/user/common/koval_alvi/myo_to_hand_pose/data/anna_12_08',
    # data = '/home/user/common/koval_alvi/myo_to_hand_pose/data/experiment_v2/study_amp_Andrey_05_11_22',
    # data = '/home/user/common/koval_alvi/myo_to_hand_pose/data/experiment_v2/study_health_Anna_20_12_22',
    data = '/home/user/common/koval_alvi/myo_to_hand_pose/data/experiment_v2/health_right_matvey_26_01_23',


    original_fps = 200,  # TODO describtion
    delay_ms = 0,  # Shift vr vs EMG parameter. ms dealy between emg and VR.
    start_crop_ms = 5*1000,  # bad values in the beginning of recordign in ms to delete.
    window_size = 512,

    max_epochs = 500,
    samples_per_epoch = 100000 ,#160000, 
    train_bs = 256,
    val_bs = 32,
    device = [3],
    optimizer_params = dict(lr=1e-4,
                            wd=1e-5)
)
config = AttrDict(config)

In [4]:
import audiomentations as A
augment = A.Compose([
    # A.Gain(min_gain_in_db=-0.05, max_gain_in_db=0.05, p=0.3),
    # A.GainTransition(
    #     min_gain_in_db = -0.5,
    #     max_gain_in_db =  0.5,
    #     min_duration = 10,
    #     max_duration = 100,
    #     duration_unit = "samples",
    #     p=0.3),
    A.AddGaussianNoise(min_amplitude=0.01, max_amplitude=0.2, p=0.5),
    # data_utils.SpatialRotation(min_angle=1, max_angle=10, p=0.5)
])

x = torch.zeros(8, 512)
augmented_samples = augment(samples=x, sample_rate=200) # sahpe -> [8, time]
print(augmented_samples.shape)

(8, 512)


In [5]:
# Init train and val dataset and dataloaders
all_data_path = Path(config.data)
train_folder = all_data_path / 'train'
val_folder = all_data_path / 'test'

train_dataset = data_utils.create_dataset(data_folder=train_folder,
                                          original_fps=config.original_fps,
                                          delay_ms=config.delay_ms,
                                          start_crop_ms=config.start_crop_ms,
                                          window_size=config.window_size,
                                          random_sampling=True,
                                          samples_per_epoch=config.samples_per_epoch, 
                                          transform = augment, 
                                          down_sample_target=8)

val_dataset = data_utils.create_dataset(data_folder=val_folder,
                                        original_fps=config.original_fps,
                                        delay_ms=config.delay_ms,
                                        start_crop_ms=config.start_crop_ms,
                                        window_size=config.window_size,
                                        random_sampling=False,
                                        samples_per_epoch=None, 
                                        transform=None)



Number of all files with moves: 58


100%|██████████| 58/58 [02:46<00:00,  2.87s/it]


Total len: 100000
Number of all files with moves: 15


100%|██████████| 15/15 [00:42<00:00,  2.86s/it]

Total len: 307





# Init model

In [6]:
class LitHVATNet_v2(pl.LightningModule):
    def __init__(self, model, loss_function, lr, wd):
        """
        Wrapper of model with loss function calculatino and initing optimizer.
        """
        super().__init__()
        self.model = model
        self.lr = lr
        self.wd = wd
        self.loss_function = loss_function # should compare quats. 
    
    def forward(self, x):
        x = self.model(x)
        return x
    
    def configure_optimizers(self):
        optimizer = torch.optim.AdamW(self.parameters(), 
                                      lr=self.lr, 
                                      weight_decay = self.wd)
        return optimizer
    
    def training_step(self, train_batch, batch_idx):
        x, y = train_batch
        multi_scale_quats = self.model(x)
        
        # loss on full size
        full_size_pred = multi_scale_quats[-1]
        loss_dict = self.loss_function(full_size_pred, y)
        
        # average loss on multi scale outputs
        # for interpolation purpose 
        batch, n_bones, n_quats, time = y.shape
        y_3d = y.reshape(batch, -1, time)
        
        ms_losses = []
        for feat in multi_scale_quats[:-1]:  
            
            new_time_size = feat.shape[-1]
            y_ = F.interpolate(y_3d, size=new_time_size, mode='linear', align_corners=False)
            y_ = y_.reshape(batch, n_bones, n_quats, new_time_size)
            
            loss_dict_tmp = self.loss_function(feat, y_)
            ms_losses.append(loss_dict_tmp['total_loss'])
        
        ms_loss = torch.mean(torch.stack(ms_losses))
        loss_dict['total_loss'] += ms_loss
        
        for k, v in loss_dict.items():
            self.log("train_" + str(k), v, on_step=False, on_epoch=True)
        return loss_dict['total_loss']
            
    def validation_step(self, val_batch, batch_idx):
        
        if trainer.global_step == 0: 
            wandb.define_metric('val_angle_degree', summary='min')
        
        x, y = val_batch
        full_size_pred = self.model(x)
        loss_dict = self.loss_function(full_size_pred, y)
                
        for k, v in loss_dict.items():
            self.log("val_" + str(k), v, on_step=False, on_epoch=True)
        return loss_dict['angle_degree']
    
    
    def validation_epoch_end(self, validation_step_outputs):
        val_current_loss = torch.mean(torch.stack(validation_step_outputs))
        self.val_current_loss = val_current_loss
        print(f'current step {self.current_epoch} val_current_loss {self.val_current_loss}')

In [7]:
from pytorch_lightning.callbacks import Callback

class MovementsWandb(Callback):
    def __init__(self):
        self.best_val_loss = 10000000
        
    def on_validation_epoch_end(self, trainer, pl_module):
        
        ### do not calculate epoch in sanity check. 
        if trainer.state.stage == 'sanity_check':
            return

        # check better or not
        if pl_module.val_current_loss < self.best_val_loss: 
            
            print('Best val')
            print('MAKE VIS', pl_module.current_epoch)
            print('new best val score', pl_module.val_current_loss) 
            
            self.best_val_loss = pl_module.val_current_loss
            
            hand_visualize.visualize_val_moves(model = pl_module.model, 
                                               val_exps_data = trainer.val_dataloaders[0].dataset.exps_data, 
                                               epoch = pl_module.current_epoch,
                                               device = pl_module.device)
                

                

## Init model

In [8]:
hvatnet_v2_params =dict(n_electrodes=8, n_channels_out=64,
                        n_res_blocks=3, n_blocks_per_layer=2,
                        n_filters=128, kernel_size=3,
                        strides=(2, 2, 2, 4),
                        dilation=2)

model = HVATNet_v2.HVATNetv2(**hvatnet_v2_params)
model_pl = LitHVATNet_v2(model,
                       loss_function=losses.make_loss_function(),
                       **config.optimizer_params)


### Start to train model.

In [None]:

train_dataloader = torch.utils.data.DataLoader(train_dataset,
                                               batch_size=config.train_bs,
                                               shuffle=True,
                                               num_workers=0)

val_dataloader = torch.utils.data.DataLoader(val_dataset,
                                             batch_size=config.val_bs,
                                             shuffle=False,
                                             num_workers=0)


    
    
wandb_logger = WandbLogger(entity="koval_alvi",
                           project="ALVI_hvatnet_v2", 
                           log_model = 'all', 
                           save_code=True,
                           notes = config.WANDB_NOTES, 
                           dir = 'lightning_logs')

checkpoint_callback = ModelCheckpoint(monitor='val_angle_degree',
                                  save_top_k=5,
                                  save_last=True,
                                  filename='{epoch:02d}-{val_angle_degree:.3f}',
                                  verbose=True,
                                  mode='min')

trainer = pl.Trainer(max_epochs=config.max_epochs,
                     accelerator='gpu', 
                     devices= config.device,
                     logger=wandb_logger,
                     callbacks=[MovementsWandb(), checkpoint_callback])

trainer.fit(model_pl, train_dataloader, val_dataloader)


Failed to detect the name of this notebook, you can set it manually with the WANDB_NOTEBOOK_NAME environment variable to enable code saving.
[34m[1mwandb[0m: Currently logged in as: [33mkoval_alvi[0m. Use [1m`wandb login --relogin`[0m to force relogin


GPU available: True (cuda), used: True
TPU available: False, using: 0 TPU cores
IPU available: False, using: 0 IPUs
HPU available: False, using: 0 HPUs
LOCAL_RANK: 0 - CUDA_VISIBLE_DEVICES: [0,1,2,3]

  | Name  | Type      | Params
------------------------------------
0 | model | HVATNetv2 | 3.6 M 
------------------------------------
3.6 M     Trainable params
0         Non-trainable params
3.6 M     Total params
14.431    Total estimated model params size (MB)


Sanity Checking: 0it [00:00, ?it/s]

  rank_zero_warn(


current step 0 val_current_loss 125.7862548828125


  rank_zero_warn(


Training: 0it [00:00, ?it/s]

Validation: 0it [00:00, ?it/s]

current step 0 val_current_loss 47.0617561340332
Best val
MAKE VIS 0
new best val score tensor(47.0618, device='cuda:3')


Epoch 0, global step 391: 'val_angle_degree' reached 45.89272 (best 45.89272), saving model to 'lightning_logs/ALVI_hvatnet_v2/uuhkw3j4/checkpoints/epoch=00-val_angle_degree=45.893.ckpt' as top 5


Validation: 0it [00:00, ?it/s]

current step 1 val_current_loss 47.03540802001953
Best val
MAKE VIS 1
new best val score tensor(47.0354, device='cuda:3')


Epoch 1, global step 782: 'val_angle_degree' reached 45.89280 (best 45.89272), saving model to 'lightning_logs/ALVI_hvatnet_v2/uuhkw3j4/checkpoints/epoch=01-val_angle_degree=45.893.ckpt' as top 5


Validation: 0it [00:00, ?it/s]

current step 2 val_current_loss 46.92982482910156
Best val
MAKE VIS 2
new best val score tensor(46.9298, device='cuda:3')


Epoch 2, global step 1173: 'val_angle_degree' reached 45.62522 (best 45.62522), saving model to 'lightning_logs/ALVI_hvatnet_v2/uuhkw3j4/checkpoints/epoch=02-val_angle_degree=45.625.ckpt' as top 5


Validation: 0it [00:00, ?it/s]

current step 3 val_current_loss 46.83994674682617
Best val
MAKE VIS 3
new best val score tensor(46.8399, device='cuda:3')


Epoch 3, global step 1564: 'val_angle_degree' reached 45.56966 (best 45.56966), saving model to 'lightning_logs/ALVI_hvatnet_v2/uuhkw3j4/checkpoints/epoch=03-val_angle_degree=45.570.ckpt' as top 5


Validation: 0it [00:00, ?it/s]

current step 4 val_current_loss 45.306861877441406
Best val
MAKE VIS 4
new best val score tensor(45.3069, device='cuda:3')


Epoch 4, global step 1955: 'val_angle_degree' reached 44.32302 (best 44.32302), saving model to 'lightning_logs/ALVI_hvatnet_v2/uuhkw3j4/checkpoints/epoch=04-val_angle_degree=44.323.ckpt' as top 5


Validation: 0it [00:00, ?it/s]

current step 5 val_current_loss 31.459386825561523
Best val
MAKE VIS 5
new best val score tensor(31.4594, device='cuda:3')


Epoch 5, global step 2346: 'val_angle_degree' reached 31.48644 (best 31.48644), saving model to 'lightning_logs/ALVI_hvatnet_v2/uuhkw3j4/checkpoints/epoch=05-val_angle_degree=31.486.ckpt' as top 5


Validation: 0it [00:00, ?it/s]

current step 6 val_current_loss 24.690092086791992
Best val
MAKE VIS 6
new best val score tensor(24.6901, device='cuda:3')


Epoch 6, global step 2737: 'val_angle_degree' reached 24.17962 (best 24.17962), saving model to 'lightning_logs/ALVI_hvatnet_v2/uuhkw3j4/checkpoints/epoch=06-val_angle_degree=24.180.ckpt' as top 5


Validation: 0it [00:00, ?it/s]

current step 7 val_current_loss 22.830915451049805
Best val
MAKE VIS 7
new best val score tensor(22.8309, device='cuda:3')


Epoch 7, global step 3128: 'val_angle_degree' reached 22.19693 (best 22.19693), saving model to 'lightning_logs/ALVI_hvatnet_v2/uuhkw3j4/checkpoints/epoch=07-val_angle_degree=22.197.ckpt' as top 5


Validation: 0it [00:00, ?it/s]

current step 8 val_current_loss 20.74629783630371
Best val
MAKE VIS 8
new best val score tensor(20.7463, device='cuda:3')


Epoch 8, global step 3519: 'val_angle_degree' reached 20.16396 (best 20.16396), saving model to 'lightning_logs/ALVI_hvatnet_v2/uuhkw3j4/checkpoints/epoch=08-val_angle_degree=20.164.ckpt' as top 5


Validation: 0it [00:00, ?it/s]

Epoch 9, global step 3910: 'val_angle_degree' reached 20.38314 (best 20.16396), saving model to 'lightning_logs/ALVI_hvatnet_v2/uuhkw3j4/checkpoints/epoch=09-val_angle_degree=20.383.ckpt' as top 5


current step 9 val_current_loss 20.842117309570312


Validation: 0it [00:00, ?it/s]

current step 10 val_current_loss 19.564634323120117
Best val
MAKE VIS 10
new best val score tensor(19.5646, device='cuda:3')


Epoch 10, global step 4301: 'val_angle_degree' reached 18.98575 (best 18.98575), saving model to 'lightning_logs/ALVI_hvatnet_v2/uuhkw3j4/checkpoints/epoch=10-val_angle_degree=18.986.ckpt' as top 5


Validation: 0it [00:00, ?it/s]

current step 11 val_current_loss 19.126667022705078
Best val
MAKE VIS 11
new best val score tensor(19.1267, device='cuda:3')


Epoch 11, global step 4692: 'val_angle_degree' reached 18.62726 (best 18.62726), saving model to 'lightning_logs/ALVI_hvatnet_v2/uuhkw3j4/checkpoints/epoch=11-val_angle_degree=18.627.ckpt' as top 5


Validation: 0it [00:00, ?it/s]

current step 12 val_current_loss 18.74958038330078
Best val
MAKE VIS 12
new best val score tensor(18.7496, device='cuda:3')


Epoch 12, global step 5083: 'val_angle_degree' reached 18.21168 (best 18.21168), saving model to 'lightning_logs/ALVI_hvatnet_v2/uuhkw3j4/checkpoints/epoch=12-val_angle_degree=18.212.ckpt' as top 5


Validation: 0it [00:00, ?it/s]

current step 13 val_current_loss 18.38480567932129
Best val
MAKE VIS 13
new best val score tensor(18.3848, device='cuda:3')


Epoch 13, global step 5474: 'val_angle_degree' reached 17.88100 (best 17.88100), saving model to 'lightning_logs/ALVI_hvatnet_v2/uuhkw3j4/checkpoints/epoch=13-val_angle_degree=17.881.ckpt' as top 5


Validation: 0it [00:00, ?it/s]

current step 14 val_current_loss 17.658870697021484
Best val
MAKE VIS 14
new best val score tensor(17.6589, device='cuda:3')


Epoch 14, global step 5865: 'val_angle_degree' reached 17.14894 (best 17.14894), saving model to 'lightning_logs/ALVI_hvatnet_v2/uuhkw3j4/checkpoints/epoch=14-val_angle_degree=17.149.ckpt' as top 5


Validation: 0it [00:00, ?it/s]

current step 15 val_current_loss 17.196638107299805
Best val
MAKE VIS 15
new best val score tensor(17.1966, device='cuda:3')


Epoch 15, global step 6256: 'val_angle_degree' reached 16.56062 (best 16.56062), saving model to 'lightning_logs/ALVI_hvatnet_v2/uuhkw3j4/checkpoints/epoch=15-val_angle_degree=16.561.ckpt' as top 5


Validation: 0it [00:00, ?it/s]

current step 16 val_current_loss 16.983551025390625
Best val
MAKE VIS 16
new best val score tensor(16.9836, device='cuda:3')


Epoch 16, global step 6647: 'val_angle_degree' reached 16.49111 (best 16.49111), saving model to 'lightning_logs/ALVI_hvatnet_v2/uuhkw3j4/checkpoints/epoch=16-val_angle_degree=16.491.ckpt' as top 5


Validation: 0it [00:00, ?it/s]

current step 17 val_current_loss 16.612085342407227
Best val
MAKE VIS 17
new best val score tensor(16.6121, device='cuda:3')


Epoch 17, global step 7038: 'val_angle_degree' reached 15.97735 (best 15.97735), saving model to 'lightning_logs/ALVI_hvatnet_v2/uuhkw3j4/checkpoints/epoch=17-val_angle_degree=15.977.ckpt' as top 5


Validation: 0it [00:00, ?it/s]

current step 18 val_current_loss 15.948538780212402
Best val
MAKE VIS 18
new best val score tensor(15.9485, device='cuda:3')


Epoch 18, global step 7429: 'val_angle_degree' reached 15.26906 (best 15.26906), saving model to 'lightning_logs/ALVI_hvatnet_v2/uuhkw3j4/checkpoints/epoch=18-val_angle_degree=15.269.ckpt' as top 5


Validation: 0it [00:00, ?it/s]

Epoch 19, global step 7820: 'val_angle_degree' reached 15.99865 (best 15.26906), saving model to 'lightning_logs/ALVI_hvatnet_v2/uuhkw3j4/checkpoints/epoch=19-val_angle_degree=15.999.ckpt' as top 5


current step 19 val_current_loss 16.254785537719727


Validation: 0it [00:00, ?it/s]

current step 20 val_current_loss 14.823880195617676
Best val
MAKE VIS 20
new best val score tensor(14.8239, device='cuda:3')


Epoch 20, global step 8211: 'val_angle_degree' reached 14.27758 (best 14.27758), saving model to 'lightning_logs/ALVI_hvatnet_v2/uuhkw3j4/checkpoints/epoch=20-val_angle_degree=14.278.ckpt' as top 5


Validation: 0it [00:00, ?it/s]

current step 21 val_current_loss 14.630722045898438
Best val
MAKE VIS 21
new best val score tensor(14.6307, device='cuda:3')


Epoch 21, global step 8602: 'val_angle_degree' reached 14.21766 (best 14.21766), saving model to 'lightning_logs/ALVI_hvatnet_v2/uuhkw3j4/checkpoints/epoch=21-val_angle_degree=14.218.ckpt' as top 5


Validation: 0it [00:00, ?it/s]

current step 22 val_current_loss 13.782177925109863
Best val
MAKE VIS 22
new best val score tensor(13.7822, device='cuda:3')


Epoch 22, global step 8993: 'val_angle_degree' reached 13.38622 (best 13.38622), saving model to 'lightning_logs/ALVI_hvatnet_v2/uuhkw3j4/checkpoints/epoch=22-val_angle_degree=13.386.ckpt' as top 5


Validation: 0it [00:00, ?it/s]

current step 23 val_current_loss 13.325823783874512
Best val
MAKE VIS 23
new best val score tensor(13.3258, device='cuda:3')


Epoch 23, global step 9384: 'val_angle_degree' reached 13.06724 (best 13.06724), saving model to 'lightning_logs/ALVI_hvatnet_v2/uuhkw3j4/checkpoints/epoch=23-val_angle_degree=13.067.ckpt' as top 5


Validation: 0it [00:00, ?it/s]

Epoch 24, global step 9775: 'val_angle_degree' reached 13.53482 (best 13.06724), saving model to 'lightning_logs/ALVI_hvatnet_v2/uuhkw3j4/checkpoints/epoch=24-val_angle_degree=13.535.ckpt' as top 5


current step 24 val_current_loss 13.757010459899902


Validation: 0it [00:00, ?it/s]

Epoch 25, global step 10166: 'val_angle_degree' reached 13.09083 (best 13.06724), saving model to 'lightning_logs/ALVI_hvatnet_v2/uuhkw3j4/checkpoints/epoch=25-val_angle_degree=13.091.ckpt' as top 5


current step 25 val_current_loss 13.352932929992676


Validation: 0it [00:00, ?it/s]

Epoch 26, global step 10557: 'val_angle_degree' reached 13.24848 (best 13.06724), saving model to 'lightning_logs/ALVI_hvatnet_v2/uuhkw3j4/checkpoints/epoch=26-val_angle_degree=13.248.ckpt' as top 5


current step 26 val_current_loss 13.457635879516602


Validation: 0it [00:00, ?it/s]

current step 27 val_current_loss 12.888032913208008
Best val
MAKE VIS 27
new best val score tensor(12.8880, device='cuda:3')


Epoch 27, global step 10948: 'val_angle_degree' reached 12.55699 (best 12.55699), saving model to 'lightning_logs/ALVI_hvatnet_v2/uuhkw3j4/checkpoints/epoch=27-val_angle_degree=12.557.ckpt' as top 5


Validation: 0it [00:00, ?it/s]

Epoch 28, global step 11339: 'val_angle_degree' reached 13.29447 (best 12.55699), saving model to 'lightning_logs/ALVI_hvatnet_v2/uuhkw3j4/checkpoints/epoch=28-val_angle_degree=13.294.ckpt' as top 5


current step 28 val_current_loss 13.4887056350708


Validation: 0it [00:00, ?it/s]

Epoch 29, global step 11730: 'val_angle_degree' was not in top 5


current step 29 val_current_loss 14.03478717803955


Validation: 0it [00:00, ?it/s]

Epoch 30, global step 12121: 'val_angle_degree' reached 12.69326 (best 12.55699), saving model to 'lightning_logs/ALVI_hvatnet_v2/uuhkw3j4/checkpoints/epoch=30-val_angle_degree=12.693.ckpt' as top 5


current step 30 val_current_loss 12.917909622192383


Validation: 0it [00:00, ?it/s]

current step 31 val_current_loss 12.812926292419434
Best val
MAKE VIS 31
new best val score tensor(12.8129, device='cuda:3')


Epoch 31, global step 12512: 'val_angle_degree' reached 12.60738 (best 12.55699), saving model to 'lightning_logs/ALVI_hvatnet_v2/uuhkw3j4/checkpoints/epoch=31-val_angle_degree=12.607.ckpt' as top 5


Validation: 0it [00:00, ?it/s]

current step 32 val_current_loss 12.611587524414062
Best val
MAKE VIS 32
new best val score tensor(12.6116, device='cuda:3')


Epoch 32, global step 12903: 'val_angle_degree' reached 12.45057 (best 12.45057), saving model to 'lightning_logs/ALVI_hvatnet_v2/uuhkw3j4/checkpoints/epoch=32-val_angle_degree=12.451.ckpt' as top 5
IOPub message rate exceeded.
The notebook server will temporarily stop sending output
to the client in order to avoid crashing it.
To change this limit, set the config variable
`--NotebookApp.iopub_msg_rate_limit`.

Current values:
NotebookApp.iopub_msg_rate_limit=1000.0 (msgs/sec)
NotebookApp.rate_limit_window=3.0 (secs)



Validation: 0it [00:00, ?it/s]

Epoch 35, global step 14076: 'val_angle_degree' reached 12.30032 (best 12.23199), saving model to 'lightning_logs/ALVI_hvatnet_v2/uuhkw3j4/checkpoints/epoch=35-val_angle_degree=12.300.ckpt' as top 5


current step 35 val_current_loss 12.460963249206543


IOPub message rate exceeded.
The notebook server will temporarily stop sending output
to the client in order to avoid crashing it.
To change this limit, set the config variable
`--NotebookApp.iopub_msg_rate_limit`.

Current values:
NotebookApp.iopub_msg_rate_limit=1000.0 (msgs/sec)
NotebookApp.rate_limit_window=3.0 (secs)



Validation: 0it [00:00, ?it/s]

current step 38 val_current_loss 11.742871284484863
Best val
MAKE VIS 38
new best val score tensor(11.7429, device='cuda:3')


Epoch 38, global step 15249: 'val_angle_degree' reached 11.55571 (best 11.55571), saving model to 'lightning_logs/ALVI_hvatnet_v2/uuhkw3j4/checkpoints/epoch=38-val_angle_degree=11.556.ckpt' as top 5
IOPub message rate exceeded.
The notebook server will temporarily stop sending output
to the client in order to avoid crashing it.
To change this limit, set the config variable
`--NotebookApp.iopub_msg_rate_limit`.

Current values:
NotebookApp.iopub_msg_rate_limit=1000.0 (msgs/sec)
NotebookApp.rate_limit_window=3.0 (secs)



Validation: 0it [00:00, ?it/s]

current step 41 val_current_loss 11.671786308288574
Best val
MAKE VIS 41
new best val score tensor(11.6718, device='cuda:3')


Epoch 41, global step 16422: 'val_angle_degree' reached 11.48958 (best 11.48958), saving model to 'lightning_logs/ALVI_hvatnet_v2/uuhkw3j4/checkpoints/epoch=41-val_angle_degree=11.490.ckpt' as top 5
IOPub message rate exceeded.
The notebook server will temporarily stop sending output
to the client in order to avoid crashing it.
To change this limit, set the config variable
`--NotebookApp.iopub_msg_rate_limit`.

Current values:
NotebookApp.iopub_msg_rate_limit=1000.0 (msgs/sec)
NotebookApp.rate_limit_window=3.0 (secs)



Validation: 0it [00:00, ?it/s]

current step 44 val_current_loss 11.182710647583008
Best val
MAKE VIS 44
new best val score tensor(11.1827, device='cuda:3')


Epoch 44, global step 17595: 'val_angle_degree' reached 10.99718 (best 10.99718), saving model to 'lightning_logs/ALVI_hvatnet_v2/uuhkw3j4/checkpoints/epoch=44-val_angle_degree=10.997.ckpt' as top 5
IOPub message rate exceeded.
The notebook server will temporarily stop sending output
to the client in order to avoid crashing it.
To change this limit, set the config variable
`--NotebookApp.iopub_msg_rate_limit`.

Current values:
NotebookApp.iopub_msg_rate_limit=1000.0 (msgs/sec)
NotebookApp.rate_limit_window=3.0 (secs)



Validation: 0it [00:00, ?it/s]

Epoch 47, global step 18768: 'val_angle_degree' reached 11.44436 (best 10.99718), saving model to 'lightning_logs/ALVI_hvatnet_v2/uuhkw3j4/checkpoints/epoch=47-val_angle_degree=11.444.ckpt' as top 5


current step 47 val_current_loss 11.57456111907959


IOPub message rate exceeded.
The notebook server will temporarily stop sending output
to the client in order to avoid crashing it.
To change this limit, set the config variable
`--NotebookApp.iopub_msg_rate_limit`.

Current values:
NotebookApp.iopub_msg_rate_limit=1000.0 (msgs/sec)
NotebookApp.rate_limit_window=3.0 (secs)



Validation: 0it [00:00, ?it/s]

Epoch 50, global step 19941: 'val_angle_degree' was not in top 5


current step 50 val_current_loss 11.613268852233887


IOPub message rate exceeded.
The notebook server will temporarily stop sending output
to the client in order to avoid crashing it.
To change this limit, set the config variable
`--NotebookApp.iopub_msg_rate_limit`.

Current values:
NotebookApp.iopub_msg_rate_limit=1000.0 (msgs/sec)
NotebookApp.rate_limit_window=3.0 (secs)



Validation: 0it [00:00, ?it/s]

Epoch 52, global step 20723: 'val_angle_degree' reached 11.20746 (best 10.95915), saving model to 'lightning_logs/ALVI_hvatnet_v2/uuhkw3j4/checkpoints/epoch=52-val_angle_degree=11.207.ckpt' as top 5


current step 52 val_current_loss 11.314867973327637


IOPub message rate exceeded.
The notebook server will temporarily stop sending output
to the client in order to avoid crashing it.
To change this limit, set the config variable
`--NotebookApp.iopub_msg_rate_limit`.

Current values:
NotebookApp.iopub_msg_rate_limit=1000.0 (msgs/sec)
NotebookApp.rate_limit_window=3.0 (secs)



Validation: 0it [00:00, ?it/s]

Epoch 55, global step 21896: 'val_angle_degree' reached 11.10752 (best 10.72174), saving model to 'lightning_logs/ALVI_hvatnet_v2/uuhkw3j4/checkpoints/epoch=55-val_angle_degree=11.108.ckpt' as top 5


current step 55 val_current_loss 11.202975273132324


IOPub message rate exceeded.
The notebook server will temporarily stop sending output
to the client in order to avoid crashing it.
To change this limit, set the config variable
`--NotebookApp.iopub_msg_rate_limit`.

Current values:
NotebookApp.iopub_msg_rate_limit=1000.0 (msgs/sec)
NotebookApp.rate_limit_window=3.0 (secs)

Epoch 57, global step 22678: 'val_angle_degree' reached 10.78743 (best 10.72174), saving model to 'lightning_logs/ALVI_hvatnet_v2/uuhkw3j4/checkpoints/epoch=57-val_angle_degree=10.787.ckpt' as top 5


current step 57 val_current_loss 10.916472434997559


IOPub message rate exceeded.
The notebook server will temporarily stop sending output
to the client in order to avoid crashing it.
To change this limit, set the config variable
`--NotebookApp.iopub_msg_rate_limit`.

Current values:
NotebookApp.iopub_msg_rate_limit=1000.0 (msgs/sec)
NotebookApp.rate_limit_window=3.0 (secs)



Validation: 0it [00:00, ?it/s]

Epoch 60, global step 23851: 'val_angle_degree' was not in top 5


current step 60 val_current_loss 11.340132713317871


IOPub message rate exceeded.
The notebook server will temporarily stop sending output
to the client in order to avoid crashing it.
To change this limit, set the config variable
`--NotebookApp.iopub_msg_rate_limit`.

Current values:
NotebookApp.iopub_msg_rate_limit=1000.0 (msgs/sec)
NotebookApp.rate_limit_window=3.0 (secs)



Validation: 0it [00:00, ?it/s]

current step 62 val_current_loss 10.54152774810791
Best val
MAKE VIS 62
new best val score tensor(10.5415, device='cuda:3')


Epoch 62, global step 24633: 'val_angle_degree' reached 10.39977 (best 10.39977), saving model to 'lightning_logs/ALVI_hvatnet_v2/uuhkw3j4/checkpoints/epoch=62-val_angle_degree=10.400.ckpt' as top 5
IOPub message rate exceeded.
The notebook server will temporarily stop sending output
to the client in order to avoid crashing it.
To change this limit, set the config variable
`--NotebookApp.iopub_msg_rate_limit`.

Current values:
NotebookApp.iopub_msg_rate_limit=1000.0 (msgs/sec)
NotebookApp.rate_limit_window=3.0 (secs)

IOPub message rate exceeded.
The notebook server will temporarily stop sending output
to the client in order to avoid crashing it.
To change this limit, set the config variable
`--NotebookApp.iopub_msg_rate_limit`.

Current values:
NotebookApp.iopub_msg_rate_limit=1000.0 (msgs/sec)
NotebookApp.rate_limit_window=3.0 (secs)



Validation: 0it [00:00, ?it/s]

Epoch 66, global step 26197: 'val_angle_degree' was not in top 5


current step 66 val_current_loss 11.331929206848145


Validation: 0it [00:00, ?it/s]

Epoch 67, global step 26588: 'val_angle_degree' was not in top 5


current step 67 val_current_loss 10.906124114990234


IOPub message rate exceeded.
The notebook server will temporarily stop sending output
to the client in order to avoid crashing it.
To change this limit, set the config variable
`--NotebookApp.iopub_msg_rate_limit`.

Current values:
NotebookApp.iopub_msg_rate_limit=1000.0 (msgs/sec)
NotebookApp.rate_limit_window=3.0 (secs)



Validation: 0it [00:00, ?it/s]

Epoch 71, global step 28152: 'val_angle_degree' was not in top 5


current step 71 val_current_loss 10.890891075134277


Validation: 0it [00:00, ?it/s]

Epoch 72, global step 28543: 'val_angle_degree' was not in top 5


current step 72 val_current_loss 11.05428695678711


IOPub message rate exceeded.
The notebook server will temporarily stop sending output
to the client in order to avoid crashing it.
To change this limit, set the config variable
`--NotebookApp.iopub_msg_rate_limit`.

Current values:
NotebookApp.iopub_msg_rate_limit=1000.0 (msgs/sec)
NotebookApp.rate_limit_window=3.0 (secs)



Validation: 0it [00:00, ?it/s]

Epoch 78, global step 30889: 'val_angle_degree' was not in top 5


current step 78 val_current_loss 11.079652786254883


Validation: 0it [00:00, ?it/s]

Epoch 79, global step 31280: 'val_angle_degree' was not in top 5


current step 79 val_current_loss 10.549773216247559


Validation: 0it [00:00, ?it/s]

Epoch 80, global step 31671: 'val_angle_degree' was not in top 5


current step 80 val_current_loss 10.493996620178223


IOPub message rate exceeded.
The notebook server will temporarily stop sending output
to the client in order to avoid crashing it.
To change this limit, set the config variable
`--NotebookApp.iopub_msg_rate_limit`.

Current values:
NotebookApp.iopub_msg_rate_limit=1000.0 (msgs/sec)
NotebookApp.rate_limit_window=3.0 (secs)



Validation: 0it [00:00, ?it/s]

Epoch 86, global step 34017: 'val_angle_degree' reached 10.26670 (best 10.04903), saving model to 'lightning_logs/ALVI_hvatnet_v2/uuhkw3j4/checkpoints/epoch=86-val_angle_degree=10.267.ckpt' as top 5


current step 86 val_current_loss 10.30827522277832


Validation: 0it [00:00, ?it/s]

Epoch 87, global step 34408: 'val_angle_degree' was not in top 5


current step 87 val_current_loss 11.0405855178833


Validation: 0it [00:00, ?it/s]

Epoch 88, global step 34799: 'val_angle_degree' was not in top 5


current step 88 val_current_loss 11.0951509475708


IOPub message rate exceeded.
The notebook server will temporarily stop sending output
to the client in order to avoid crashing it.
To change this limit, set the config variable
`--NotebookApp.iopub_msg_rate_limit`.

Current values:
NotebookApp.iopub_msg_rate_limit=1000.0 (msgs/sec)
NotebookApp.rate_limit_window=3.0 (secs)



Validation: 0it [00:00, ?it/s]

Epoch 94, global step 37145: 'val_angle_degree' was not in top 5


current step 94 val_current_loss 10.61704158782959


Validation: 0it [00:00, ?it/s]

current step 95 val_current_loss 10.034555435180664
Best val
MAKE VIS 95
new best val score tensor(10.0346, device='cuda:3')


Epoch 95, global step 37536: 'val_angle_degree' reached 9.98075 (best 9.98075), saving model to 'lightning_logs/ALVI_hvatnet_v2/uuhkw3j4/checkpoints/epoch=95-val_angle_degree=9.981.ckpt' as top 5


Validation: 0it [00:00, ?it/s]

IOPub message rate exceeded.
The notebook server will temporarily stop sending output
to the client in order to avoid crashing it.
To change this limit, set the config variable
`--NotebookApp.iopub_msg_rate_limit`.

Current values:
NotebookApp.iopub_msg_rate_limit=1000.0 (msgs/sec)
NotebookApp.rate_limit_window=3.0 (secs)



Validation: 0it [00:00, ?it/s]

Epoch 101, global step 39882: 'val_angle_degree' reached 10.15405 (best 9.98075), saving model to 'lightning_logs/ALVI_hvatnet_v2/uuhkw3j4/checkpoints/epoch=101-val_angle_degree=10.154.ckpt' as top 5


current step 101 val_current_loss 10.19616413116455


Validation: 0it [00:00, ?it/s]

Epoch 102, global step 40273: 'val_angle_degree' was not in top 5


current step 102 val_current_loss 10.7739839553833


IOPub message rate exceeded.
The notebook server will temporarily stop sending output
to the client in order to avoid crashing it.
To change this limit, set the config variable
`--NotebookApp.iopub_msg_rate_limit`.

Current values:
NotebookApp.iopub_msg_rate_limit=1000.0 (msgs/sec)
NotebookApp.rate_limit_window=3.0 (secs)



Validation: 0it [00:00, ?it/s]

Epoch 104, global step 41055: 'val_angle_degree' was not in top 5


current step 104 val_current_loss 10.518534660339355


IOPub message rate exceeded.
The notebook server will temporarily stop sending output
to the client in order to avoid crashing it.
To change this limit, set the config variable
`--NotebookApp.iopub_msg_rate_limit`.

Current values:
NotebookApp.iopub_msg_rate_limit=1000.0 (msgs/sec)
NotebookApp.rate_limit_window=3.0 (secs)



Validation: 0it [00:00, ?it/s]

Epoch 107, global step 42228: 'val_angle_degree' was not in top 5


current step 107 val_current_loss 10.52778148651123


Validation: 0it [00:00, ?it/s]

Epoch 108, global step 42619: 'val_angle_degree' was not in top 5


current step 108 val_current_loss 10.548069953918457


IOPub message rate exceeded.
The notebook server will temporarily stop sending output
to the client in order to avoid crashing it.
To change this limit, set the config variable
`--NotebookApp.iopub_msg_rate_limit`.

Current values:
NotebookApp.iopub_msg_rate_limit=1000.0 (msgs/sec)
NotebookApp.rate_limit_window=3.0 (secs)



Validation: 0it [00:00, ?it/s]

Epoch 110, global step 43401: 'val_angle_degree' was not in top 5


current step 110 val_current_loss 10.554145812988281


IOPub message rate exceeded.
The notebook server will temporarily stop sending output
to the client in order to avoid crashing it.
To change this limit, set the config variable
`--NotebookApp.iopub_msg_rate_limit`.

Current values:
NotebookApp.iopub_msg_rate_limit=1000.0 (msgs/sec)
NotebookApp.rate_limit_window=3.0 (secs)



Validation: 0it [00:00, ?it/s]

Epoch 114, global step 44965: 'val_angle_degree' was not in top 5


current step 114 val_current_loss 10.873306274414062


IOPub message rate exceeded.
The notebook server will temporarily stop sending output
to the client in order to avoid crashing it.
To change this limit, set the config variable
`--NotebookApp.iopub_msg_rate_limit`.

Current values:
NotebookApp.iopub_msg_rate_limit=1000.0 (msgs/sec)
NotebookApp.rate_limit_window=3.0 (secs)



Validation: 0it [00:00, ?it/s]

Epoch 116, global step 45747: 'val_angle_degree' was not in top 5


current step 116 val_current_loss 10.62854290008545


IOPub message rate exceeded.
The notebook server will temporarily stop sending output
to the client in order to avoid crashing it.
To change this limit, set the config variable
`--NotebookApp.iopub_msg_rate_limit`.

Current values:
NotebookApp.iopub_msg_rate_limit=1000.0 (msgs/sec)
NotebookApp.rate_limit_window=3.0 (secs)



Validation: 0it [00:00, ?it/s]

Epoch 120, global step 47311: 'val_angle_degree' was not in top 5


current step 120 val_current_loss 10.808842658996582


IOPub message rate exceeded.
The notebook server will temporarily stop sending output
to the client in order to avoid crashing it.
To change this limit, set the config variable
`--NotebookApp.iopub_msg_rate_limit`.

Current values:
NotebookApp.iopub_msg_rate_limit=1000.0 (msgs/sec)
NotebookApp.rate_limit_window=3.0 (secs)



Validation: 0it [00:00, ?it/s]

Epoch 122, global step 48093: 'val_angle_degree' reached 10.15738 (best 9.98075), saving model to 'lightning_logs/ALVI_hvatnet_v2/uuhkw3j4/checkpoints/epoch=122-val_angle_degree=10.157.ckpt' as top 5


current step 122 val_current_loss 10.202067375183105


IOPub message rate exceeded.
The notebook server will temporarily stop sending output
to the client in order to avoid crashing it.
To change this limit, set the config variable
`--NotebookApp.iopub_msg_rate_limit`.

Current values:
NotebookApp.iopub_msg_rate_limit=1000.0 (msgs/sec)
NotebookApp.rate_limit_window=3.0 (secs)



Validation: 0it [00:00, ?it/s]

Epoch 123, global step 48484: 'val_angle_degree' was not in top 5


current step 123 val_current_loss 10.428929328918457


Validation: 0it [00:00, ?it/s]

Epoch 124, global step 48875: 'val_angle_degree' was not in top 5


current step 124 val_current_loss 10.48351001739502


Validation: 0it [00:00, ?it/s]

Epoch 125, global step 49266: 'val_angle_degree' reached 10.08693 (best 9.98075), saving model to 'lightning_logs/ALVI_hvatnet_v2/uuhkw3j4/checkpoints/epoch=125-val_angle_degree=10.087.ckpt' as top 5


current step 125 val_current_loss 10.153624534606934


Validation: 0it [00:00, ?it/s]

Epoch 126, global step 49657: 'val_angle_degree' was not in top 5


current step 126 val_current_loss 10.518051147460938


Validation: 0it [00:00, ?it/s]

Epoch 127, global step 50048: 'val_angle_degree' was not in top 5


current step 127 val_current_loss 10.512259483337402


Validation: 0it [00:00, ?it/s]

Epoch 128, global step 50439: 'val_angle_degree' reached 10.12889 (best 9.98075), saving model to 'lightning_logs/ALVI_hvatnet_v2/uuhkw3j4/checkpoints/epoch=128-val_angle_degree=10.129.ckpt' as top 5


current step 128 val_current_loss 10.169486045837402


Validation: 0it [00:00, ?it/s]

Epoch 129, global step 50830: 'val_angle_degree' was not in top 5


current step 129 val_current_loss 10.371506690979004


Validation: 0it [00:00, ?it/s]

current step 130 val_current_loss 9.948555946350098
Best val
MAKE VIS 130
new best val score tensor(9.9486, device='cuda:3')


Epoch 130, global step 51221: 'val_angle_degree' reached 9.89810 (best 9.89810), saving model to 'lightning_logs/ALVI_hvatnet_v2/uuhkw3j4/checkpoints/epoch=130-val_angle_degree=9.898.ckpt' as top 5


Validation: 0it [00:00, ?it/s]

Epoch 131, global step 51612: 'val_angle_degree' reached 10.05786 (best 9.89810), saving model to 'lightning_logs/ALVI_hvatnet_v2/uuhkw3j4/checkpoints/epoch=131-val_angle_degree=10.058.ckpt' as top 5


current step 131 val_current_loss 10.083069801330566


Validation: 0it [00:00, ?it/s]

Epoch 132, global step 52003: 'val_angle_degree' was not in top 5


current step 132 val_current_loss 10.68303108215332


Validation: 0it [00:00, ?it/s]

Epoch 133, global step 52394: 'val_angle_degree' reached 10.02060 (best 9.89810), saving model to 'lightning_logs/ALVI_hvatnet_v2/uuhkw3j4/checkpoints/epoch=133-val_angle_degree=10.021.ckpt' as top 5


current step 133 val_current_loss 10.104257583618164


Validation: 0it [00:00, ?it/s]

Epoch 134, global step 52785: 'val_angle_degree' was not in top 5


current step 134 val_current_loss 10.225348472595215


Validation: 0it [00:00, ?it/s]

Epoch 135, global step 53176: 'val_angle_degree' was not in top 5


current step 135 val_current_loss 10.200400352478027


Validation: 0it [00:00, ?it/s]

Epoch 136, global step 53567: 'val_angle_degree' was not in top 5


current step 136 val_current_loss 10.449039459228516


Validation: 0it [00:00, ?it/s]

Epoch 137, global step 53958: 'val_angle_degree' was not in top 5


current step 137 val_current_loss 10.167105674743652


Validation: 0it [00:00, ?it/s]

Epoch 138, global step 54349: 'val_angle_degree' was not in top 5


current step 138 val_current_loss 10.545858383178711


Validation: 0it [00:00, ?it/s]

Epoch 139, global step 54740: 'val_angle_degree' reached 10.03132 (best 9.89810), saving model to 'lightning_logs/ALVI_hvatnet_v2/uuhkw3j4/checkpoints/epoch=139-val_angle_degree=10.031.ckpt' as top 5


current step 139 val_current_loss 10.066678047180176


Validation: 0it [00:00, ?it/s]

Epoch 140, global step 55131: 'val_angle_degree' was not in top 5


current step 140 val_current_loss 10.119349479675293


Validation: 0it [00:00, ?it/s]

Epoch 141, global step 55522: 'val_angle_degree' reached 9.95345 (best 9.89810), saving model to 'lightning_logs/ALVI_hvatnet_v2/uuhkw3j4/checkpoints/epoch=141-val_angle_degree=9.953.ckpt' as top 5


current step 141 val_current_loss 10.026567459106445


Validation: 0it [00:00, ?it/s]

Epoch 142, global step 55913: 'val_angle_degree' reached 10.01596 (best 9.89810), saving model to 'lightning_logs/ALVI_hvatnet_v2/uuhkw3j4/checkpoints/epoch=142-val_angle_degree=10.016.ckpt' as top 5


current step 142 val_current_loss 10.071444511413574


Validation: 0it [00:00, ?it/s]

Epoch 143, global step 56304: 'val_angle_degree' was not in top 5


current step 143 val_current_loss 10.084784507751465


Validation: 0it [00:00, ?it/s]

Epoch 144, global step 56695: 'val_angle_degree' reached 9.90477 (best 9.89810), saving model to 'lightning_logs/ALVI_hvatnet_v2/uuhkw3j4/checkpoints/epoch=144-val_angle_degree=9.905.ckpt' as top 5


current step 144 val_current_loss 9.958037376403809


Validation: 0it [00:00, ?it/s]

Epoch 145, global step 57086: 'val_angle_degree' was not in top 5


current step 145 val_current_loss 10.20355224609375


Validation: 0it [00:00, ?it/s]

Epoch 146, global step 57477: 'val_angle_degree' was not in top 5


current step 146 val_current_loss 10.17936897277832


Validation: 0it [00:00, ?it/s]

Epoch 147, global step 57868: 'val_angle_degree' reached 9.92525 (best 9.89810), saving model to 'lightning_logs/ALVI_hvatnet_v2/uuhkw3j4/checkpoints/epoch=147-val_angle_degree=9.925.ckpt' as top 5


current step 147 val_current_loss 9.980928421020508


Validation: 0it [00:00, ?it/s]

Epoch 148, global step 58259: 'val_angle_degree' reached 9.96722 (best 9.89810), saving model to 'lightning_logs/ALVI_hvatnet_v2/uuhkw3j4/checkpoints/epoch=148-val_angle_degree=9.967.ckpt' as top 5


current step 148 val_current_loss 9.992541313171387


Validation: 0it [00:00, ?it/s]

Epoch 149, global step 58650: 'val_angle_degree' was not in top 5


current step 149 val_current_loss 10.060954093933105


Validation: 0it [00:00, ?it/s]

Epoch 150, global step 59041: 'val_angle_degree' was not in top 5


current step 150 val_current_loss 10.309061050415039


Validation: 0it [00:00, ?it/s]

Epoch 151, global step 59432: 'val_angle_degree' reached 9.96171 (best 9.89810), saving model to 'lightning_logs/ALVI_hvatnet_v2/uuhkw3j4/checkpoints/epoch=151-val_angle_degree=9.962.ckpt' as top 5


current step 151 val_current_loss 10.00363826751709


Validation: 0it [00:00, ?it/s]

current step 152 val_current_loss 9.943392753601074
Best val
MAKE VIS 152
new best val score tensor(9.9434, device='cuda:3')


Epoch 152, global step 59823: 'val_angle_degree' reached 9.87803 (best 9.87803), saving model to 'lightning_logs/ALVI_hvatnet_v2/uuhkw3j4/checkpoints/epoch=152-val_angle_degree=9.878.ckpt' as top 5


Validation: 0it [00:00, ?it/s]

Epoch 153, global step 60214: 'val_angle_degree' was not in top 5


current step 153 val_current_loss 10.22290325164795


Validation: 0it [00:00, ?it/s]

Epoch 154, global step 60605: 'val_angle_degree' was not in top 5


current step 154 val_current_loss 10.319147109985352


Validation: 0it [00:00, ?it/s]

Epoch 155, global step 60996: 'val_angle_degree' was not in top 5


current step 155 val_current_loss 10.37865161895752


Validation: 0it [00:00, ?it/s]

Epoch 156, global step 61387: 'val_angle_degree' reached 9.94604 (best 9.87803), saving model to 'lightning_logs/ALVI_hvatnet_v2/uuhkw3j4/checkpoints/epoch=156-val_angle_degree=9.946.ckpt' as top 5


current step 156 val_current_loss 10.01175594329834


Validation: 0it [00:00, ?it/s]

Epoch 157, global step 61778: 'val_angle_degree' was not in top 5


current step 157 val_current_loss 10.130391120910645


Validation: 0it [00:00, ?it/s]

Epoch 158, global step 62169: 'val_angle_degree' was not in top 5


current step 158 val_current_loss 10.3457670211792


Validation: 0it [00:00, ?it/s]

Epoch 159, global step 62560: 'val_angle_degree' was not in top 5


current step 159 val_current_loss 10.269943237304688


Validation: 0it [00:00, ?it/s]

Epoch 160, global step 62951: 'val_angle_degree' was not in top 5


current step 160 val_current_loss 10.443506240844727


Validation: 0it [00:00, ?it/s]

Epoch 161, global step 63342: 'val_angle_degree' was not in top 5


current step 161 val_current_loss 10.667309761047363


Validation: 0it [00:00, ?it/s]

Epoch 162, global step 63733: 'val_angle_degree' was not in top 5


current step 162 val_current_loss 10.292311668395996


Validation: 0it [00:00, ?it/s]

current step 163 val_current_loss 9.900887489318848
Best val
MAKE VIS 163
new best val score tensor(9.9009, device='cuda:3')


Epoch 163, global step 64124: 'val_angle_degree' reached 9.85081 (best 9.85081), saving model to 'lightning_logs/ALVI_hvatnet_v2/uuhkw3j4/checkpoints/epoch=163-val_angle_degree=9.851.ckpt' as top 5


Validation: 0it [00:00, ?it/s]

Epoch 164, global step 64515: 'val_angle_degree' reached 9.90045 (best 9.85081), saving model to 'lightning_logs/ALVI_hvatnet_v2/uuhkw3j4/checkpoints/epoch=164-val_angle_degree=9.900.ckpt' as top 5


current step 164 val_current_loss 9.966626167297363


Validation: 0it [00:00, ?it/s]

Epoch 165, global step 64906: 'val_angle_degree' was not in top 5


current step 165 val_current_loss 10.403631210327148


Validation: 0it [00:00, ?it/s]

Epoch 166, global step 65297: 'val_angle_degree' was not in top 5


current step 166 val_current_loss 10.3676118850708


Validation: 0it [00:00, ?it/s]

Epoch 167, global step 65688: 'val_angle_degree' was not in top 5


current step 167 val_current_loss 10.336065292358398


Validation: 0it [00:00, ?it/s]

Epoch 168, global step 66079: 'val_angle_degree' was not in top 5


current step 168 val_current_loss 10.239297866821289


Validation: 0it [00:00, ?it/s]

Epoch 169, global step 66470: 'val_angle_degree' was not in top 5


current step 169 val_current_loss 10.043107986450195


Validation: 0it [00:00, ?it/s]

Epoch 170, global step 66861: 'val_angle_degree' reached 9.85638 (best 9.85081), saving model to 'lightning_logs/ALVI_hvatnet_v2/uuhkw3j4/checkpoints/epoch=170-val_angle_degree=9.856.ckpt' as top 5


current step 170 val_current_loss 9.9238862991333


Validation: 0it [00:00, ?it/s]

Epoch 171, global step 67252: 'val_angle_degree' was not in top 5


current step 171 val_current_loss 10.101106643676758


Validation: 0it [00:00, ?it/s]

Epoch 172, global step 67643: 'val_angle_degree' was not in top 5


current step 172 val_current_loss 9.987802505493164


Validation: 0it [00:00, ?it/s]

Epoch 173, global step 68034: 'val_angle_degree' was not in top 5


current step 173 val_current_loss 10.122692108154297


Validation: 0it [00:00, ?it/s]

Epoch 174, global step 68425: 'val_angle_degree' was not in top 5


current step 174 val_current_loss 10.057827949523926


Validation: 0it [00:00, ?it/s]

Epoch 175, global step 68816: 'val_angle_degree' was not in top 5


current step 175 val_current_loss 10.171180725097656


Validation: 0it [00:00, ?it/s]

Epoch 176, global step 69207: 'val_angle_degree' was not in top 5


current step 176 val_current_loss 10.094216346740723


Validation: 0it [00:00, ?it/s]

Epoch 177, global step 69598: 'val_angle_degree' was not in top 5


current step 177 val_current_loss 10.118185997009277


Validation: 0it [00:00, ?it/s]

Epoch 178, global step 69989: 'val_angle_degree' was not in top 5


current step 178 val_current_loss 10.778505325317383


Validation: 0it [00:00, ?it/s]

Epoch 179, global step 70380: 'val_angle_degree' was not in top 5


current step 179 val_current_loss 10.118425369262695


Validation: 0it [00:00, ?it/s]

Epoch 180, global step 70771: 'val_angle_degree' was not in top 5


current step 180 val_current_loss 10.317471504211426


Validation: 0it [00:00, ?it/s]

Epoch 181, global step 71162: 'val_angle_degree' was not in top 5


current step 181 val_current_loss 10.215670585632324


Validation: 0it [00:00, ?it/s]

Epoch 182, global step 71553: 'val_angle_degree' was not in top 5


current step 182 val_current_loss 10.074783325195312


Validation: 0it [00:00, ?it/s]

Epoch 183, global step 71944: 'val_angle_degree' was not in top 5


current step 183 val_current_loss 9.976941108703613


Validation: 0it [00:00, ?it/s]

Epoch 184, global step 72335: 'val_angle_degree' was not in top 5


current step 184 val_current_loss 10.080373764038086


Validation: 0it [00:00, ?it/s]

Epoch 185, global step 72726: 'val_angle_degree' was not in top 5


current step 185 val_current_loss 10.030488014221191


Validation: 0it [00:00, ?it/s]

Epoch 186, global step 73117: 'val_angle_degree' was not in top 5


current step 186 val_current_loss 10.094258308410645


Validation: 0it [00:00, ?it/s]

Epoch 187, global step 73508: 'val_angle_degree' was not in top 5


current step 187 val_current_loss 10.126398086547852


Validation: 0it [00:00, ?it/s]

Epoch 188, global step 73899: 'val_angle_degree' was not in top 5


current step 188 val_current_loss 10.671302795410156


Validation: 0it [00:00, ?it/s]

Epoch 189, global step 74290: 'val_angle_degree' was not in top 5


current step 189 val_current_loss 10.460368156433105


Validation: 0it [00:00, ?it/s]

Epoch 190, global step 74681: 'val_angle_degree' was not in top 5


current step 190 val_current_loss 9.947291374206543


Validation: 0it [00:00, ?it/s]

Epoch 191, global step 75072: 'val_angle_degree' was not in top 5


current step 191 val_current_loss 9.99838924407959


Validation: 0it [00:00, ?it/s]

Epoch 192, global step 75463: 'val_angle_degree' was not in top 5


current step 192 val_current_loss 10.043913841247559


Validation: 0it [00:00, ?it/s]

Epoch 193, global step 75854: 'val_angle_degree' was not in top 5


current step 193 val_current_loss 10.208563804626465


### Inference and model uploading

In [None]:
# reference can be retrieved in artifacts panel
# "VERSION" can be a version (ex: "v2") or an alias ("latest or "best")
checkpoint_reference = "koval_alvi/alvi labs hand prediction/model-1wsrrin6:v19"

# download checkpoint locally (if not already cached)
run = wandb.init(project="alvi labs hand prediction")
artifact = run.use_artifact(checkpoint_reference, type="model")
artifact_dir = artifact.download()

# load checkpoint
model = model.load_state_dict(torch.load(Path(artifact_dir) / "model.ckpt"))

In [None]:
weights = torch.load(Path(artifact_dir) / "model.ckpt")['state_dict']

model_pl.load_state_dict(weights)

In [None]:
model_pl