In [1]:
from models import get_model, avail_models, norm_vals
from video_dataset import get_data_loader, get_wlasl_info
from configs import get_avail_splits
import torch
import gc
import torch.nn as nn
import torchvision.models as models
from torch.profiler import profile, ProfilerActivity, record_function



In [2]:
device = torch.device('cuda')
print(device)

cuda


### Available models

In [3]:
avail_m = avail_models()
print(', '.join(avail_m))

S3D, R3D_18, R(2+1)D_18, Swin3D_T, Swin3D_S, Swin3D_B, MViTv2_S, MViTv1_B


### Available splits

In [4]:
avail_sp = get_avail_splits()
print(', '.join(avail_sp))

asl100, asl300


## Input

In [5]:
# fs = 224 #frame size
# nf = 32 #num frames
# bs = 8 #batch size
fs = 224 
nf = 16
bs = 2


nvals = norm_vals(avail_m[0]) #normalisation won't make a difference in this case, but S3D

wlasl_info = get_wlasl_info(avail_sp[0], 'test') #asl100

testloader, ncls, _, _ = get_data_loader(
    mean=nvals['mean'],
    std=nvals['std'],
    frame_size=fs,
    num_frames=nf,
    set_info=wlasl_info,
    batch_size=bs
)

print("Numclasses: ", ncls)

Numclasses:  100


### Get a single video

In [6]:
dicty = next(iter(testloader))
vid0, target = dicty["frames"], dicty["label_num"]
vid0 = vid0.to(device)  # Fix: reassign the result
target = target.to(device)
print(vid0.shape)
print(f"vid0 device: {vid0.device}")  # Verify it's on CUDA

torch.Size([2, 3, 16, 224, 224])
vid0 device: cuda:0


## Get models

In [7]:
nc = 100 #num classes
dropout = 0.0 #no dropout
all_models = []
for arch in avail_m:
    model = get_model(arch, nc, dropout)
    all_models.append((arch, model)) #tuple: arch, model



## From PyTorch Docs:

In [8]:
def profile_it(model, inputs, title):
    activities = [ProfilerActivity.CPU]
    if torch.cuda.is_available():
        device = "cuda"
        activities += [ProfilerActivity.CUDA]
    elif torch.xpu.is_available():
        device = "xpu"
        activities += [ProfilerActivity.XPU]
    else:
        print(
            "Neither CUDA nor XPU devices are available to demonstrate profiling on acceleration devices"
        )
        import sys

        sys.exit(0)

    sort_by_keyword = device + "_time_total"

    model = model.to(device)
    inputs = inputs.to(device)

    with profile(activities=activities, record_shapes=True, profile_memory=True) as prof:
        with record_function(f"{title}_inference"):
            model(inputs)

    print(prof.key_averages().table(sort_by=sort_by_keyword, row_limit=200))

In [9]:
# for arch, model in all_models:
#     profile_it(model, vid0, arch)
#     print("\n"*2, "-"*(250-22), "\n"*2)

In [10]:
print(len('----------------------'))

22
