In [1]:
import torch
from torch import nn,utils
import lightning as L
from transformer.transformer import CARL
import sys
sys.path.append("/home/c1l1mo/projects/VideoAlignment/")
from loss.scl import SCL
import yaml
from easydict import EasyDict as Edict
from dataset import construct_dataloader
from dataset.penn_action import PennAction
import os

with open("/home/c1l1mo/projects/VideoAlignment/result/scl_penn_action/config.yaml", 'r') as config_file:
    config_dict = yaml.safe_load(config_file)
cfg = Edict(config_dict)
cfg.PATH_TO_DATASET = os.path.join("/home/c1l1mo/datasets",cfg.PATH_TO_DATASET)

  from .autonotebook import tqdm as notebook_tqdm


In [9]:
carl = CARL(cfg)
scl = SCL(cfg)


class LitCARL(L.LightningModule):
    def __init__(self,carl,scl):
        super().__init__()
        self.carl=carl
        self.scl =scl
    def training_step(self,batch,batch_idx):
        original_video,video,label,seq_len,steps,mask,name,skeleton = batch
        batch_size, num_views, num_steps, c, h, w = video.shape
        video = video.view(-1, num_steps, c, h, w)
        embs = self.carl(video,video_mask=mask,skeleton=skeleton)
        loss = self.scl.compute_loss(embs,seq_len.to(embs.device),steps.to(embs.device),mask.to(embs.device))
        self.log("train_loss: " , loss)
        return loss
    def configure_optimizers(self):
        optimizer = torch.optim.AdamW(self.parameters(), lr=cfg.OPTIMIZER.LR.INITIAL_LR,
            betas=(0.9, 0.999),
            weight_decay=cfg.OPTIMIZER.WEIGHT_DECAY,)
        return optimizer
    def train_dataloader(self):
        dataset = PennAction(cfg,"train",mode="train",algo="scl")
        data_loader = utils.data.DataLoader(dataset)
        return data_loader
litcarl = LitCARL(carl,scl)

In [11]:
trainer = L.Trainer(limit_train_batches=100,max_epochs=1,accelerator='gpu',devices=[0,1])
trainer.fit(model=litcarl,train_dataloaders=data_loader)

GPU available: True (cuda), used: True
TPU available: False, using: 0 TPU cores
IPU available: False, using: 0 IPUs
HPU available: False, using: 0 HPUs
You are using a CUDA device ('NVIDIA RTX A6000') that has Tensor Cores. To properly utilize them, you should set `torch.set_float32_matmul_precision('medium' | 'high')` which will trade-off precision for performance. For more details, read https://pytorch.org/docs/stable/generated/torch.set_float32_matmul_precision.html#torch.set_float32_matmul_precision
LOCAL_RANK: 0 - CUDA_VISIBLE_DEVICES: [0,1,2,3,4,5,6]

  | Name | Type | Params
------------------------------
0 | carl | CARL | 27.4 M
------------------------------
27.4 M    Trainable params
0         Non-trainable params
27.4 M    Total params
109.554   Total estimated model params size (MB)
2023-11-07 15:33:18.151109: I tensorflow/core/util/util.cc:169] oneDNN custom operations are on. You may see slightly different numerical results due to floating-point round-off errors from diff

Epoch 0:   2%|████                                                                                                                                                                                                          | 2/100 [00:13<10:41,  6.55s/it, loss=1.6, v_num=15]

  rank_zero_warn("Detected KeyboardInterrupt, attempting graceful shutdown...")
