In [1]:
from models import get_model, avail_models, norm_vals
from video_dataset import get_data_loader, get_wlasl_info
from configs import get_avail_splits
import torch
import gc
import torch.nn as nn
import torchvision.models as models
from torch.profiler import profile, ProfilerActivity, record_function



In [2]:
device = torch.device('cuda')

### Available models

In [3]:
avail_m = avail_models()
print(', '.join(avail_m))

S3D, R3D_18, R(2+1)D_18, Swin3D_T, Swin3D_S, Swin3D_B, MViTv2_S, MViTv1_B


### Available splits

In [4]:
avail_sp = get_avail_splits()
print(', '.join(avail_sp))

asl100, asl300


### Initial test 

using one model on asl100 test split

In [5]:
s3d = avail_m[0]
asl100 = avail_sp[0]

## Input

In [6]:
fs = 224 #frame size
nf = 32 #num frames
bs = 8 #batch size

nvals = norm_vals(s3d) #normalisation won't make a difference in this case

wlasl_info = get_wlasl_info(asl100, 'test')

testloader, ncls, _, _ = get_data_loader(
    mean=nvals['mean'],
    std=nvals['std'],
    frame_size=fs,
    num_frames=nf,
    set_info=wlasl_info,
    batch_size=bs
)

print("Numclasses: ", ncls)

Numclasses:  100


### Get a single video

In [7]:
dicty = next(iter(testloader))
vid0, target = dicty["frames"], dicty["label_num"]
print(vid0.shape)

torch.Size([8, 3, 32, 224, 224])


## First test on S3D

In [8]:
nc = 100 #num classes
dropout = 0.0 #no dropout
model = get_model(s3d, nc, dropout)

In [9]:
# print(model)

In [10]:
activities = [ProfilerActivity.CPU]
if torch.cuda.is_available():
    device = "cuda"
    activities += [ProfilerActivity.CUDA]
elif torch.xpu.is_available():
    device = "xpu"
    activities += [ProfilerActivity.XPU]
else:
    print(
        "Neither CUDA nor XPU devices are available to demonstrate profiling on acceleration devices"
    )
    import sys

    sys.exit(0)

sort_by_keyword = device + "_time_total"

model = model.to(device)
inputs = vid0.to(device)

with profile(activities=activities, record_shapes=True, profile_memory=True) as prof:
    with record_function("model_inference"):
        model(inputs)

print(prof.key_averages().table(sort_by=sort_by_keyword, row_limit=10))

[W1102 17:57:11.895975124 kineto_shim.cpp:415] Adding profiling metadata requires using torch.profiler with Kineto support (USE_KINETO=1)
[W1102 17:57:11.895995231 kineto_shim.cpp:415] Adding profiling metadata requires using torch.profiler with Kineto support (USE_KINETO=1)


---------------------------------  ------------  ------------  ------------  ------------  ------------  ------------  ------------  ------------  ------------  ------------  ------------  ------------  ------------  ------------  
                             Name    Self CPU %      Self CPU   CPU total %     CPU total  CPU time avg     Self CUDA   Self CUDA %    CUDA total  CUDA time avg       CPU Mem  Self CPU Mem      CUDA Mem  Self CUDA Mem    # of Calls  
---------------------------------  ------------  ------------  ------------  ------------  ------------  ------------  ------------  ------------  ------------  ------------  ------------  ------------  ------------  ------------  
                  model_inference         3.44%       8.727ms       100.00%     253.653ms     253.653ms       1.677ms         0.66%     253.694ms     253.694ms           0 b           0 b           0 b      -7.18 Gb             1  
                     aten::conv3d         0.19%     473.969us        5

# Profiling function