# Experimenting with some pretrained video classification models

In [1]:
import torch
from torch.utils.data import Dataset, DataLoader
import torchvision.transforms as transforms
import numpy as np
import os
from PIL import Image
import torchvision.models.video as video_models
import torch.nn as nn
import tqdm

In [2]:
import video_dataset as Dataset

raw_path = '../data/WLASL2000'
instances_path = './preprocessed_labels/asl100/train_instances_fixed_bboxes_short.json'
classes_path = './wlasl_class_list.json'
train_set = Dataset.VideoDataset(
  root=raw_path,
  instances_path=instances_path,
  classes_path=classes_path,
  transform=Dataset.min_transform_rI3d
)


print(f"Length: {len(train_set)}")

Length: 1442


In [3]:
torch.manual_seed(42)
train_loader = DataLoader(
  train_set,
  batch_size=2,
  shuffle=True,
  num_workers=0 #to use manual seed
)
train_loader

<torch.utils.data.dataloader.DataLoader at 0x7b0d55b396c0>

In [4]:
frames, label = next(iter(train_loader))
frames.shape

torch.Size([2, 3, 64, 244, 244])

In [5]:
#load pretrained mode
model = video_models.r3d_18(pretrained=True)

#alter classifier
num_classes = 100
model.fc = nn.Linear(model.fc.in_features, num_classes)



In [6]:
device = 'cuda' if torch.cuda.is_available() else 'cpu'
# device = 'cpu'  worked with cpu
print(f"Device: {device}")

model = model.to(device)

for param in model.parameters():
  param.requires_grad = True

optimizer = torch.optim.Adam(model.parameters(), lr=0.001)
loss_func = nn.CrossEntropyLoss()
  

Device: cuda


In [7]:
def train_model(model, data_loader, optimizer, loss_func, epochs=10,val_loader=None):
  model.train()
    
  for epoch in tqdm.tqdm(range(epochs), desc="Training R3D"):
    running_loss = 0.0
      
    for batch_idx, (data, target) in enumerate(data_loader):
      # data shape: (batch_size, 3, frames, height, width)
      data, target = data.to(device), target.to(device)
      
      optimizer.zero_grad()
      output = model(data)
      loss = loss_func(output, target)
      loss.backward()
      optimizer.step()
      
      running_loss += loss.item()
      
      # if batch_idx % 100 == 0:
      #     print(f'Epoch [{epoch+1}/{epochs}], Step [{batch_idx}], Loss: {loss.item():.4f}')
      
    print(f'Epoch [{epoch+1}/{epochs}], Average Loss: {running_loss/len(data_loader):.4f}')

In [8]:
import torch
print(f"PyTorch version: {torch.__version__}")
print(f"CUDA available: {torch.cuda.is_available()}")
if torch.cuda.is_available():
    print(f"CUDA version: {torch.version.cuda}")
    print(f"GPU device: {torch.cuda.get_device_name(0)}")

PyTorch version: 2.5.1+cu121
CUDA available: True
CUDA version: 12.1
GPU device: NVIDIA GeForce GTX 1080 Ti


In [9]:
torch.cuda.empty_cache()

In [11]:
import torch
print(f"PyTorch CUDA version: {torch.version.cuda}")
print(f"CUDA available: {torch.cuda.is_available()}")

# Test basic GPU operations
if torch.cuda.is_available():
    x = torch.randn(100, 100).cuda()
    y = torch.randn(100, 100).cuda()
    z = torch.mm(x, y)
    print("Basic GPU operation successful")

PyTorch CUDA version: 12.1
CUDA available: True
Basic GPU operation successful


In [1]:
import torch
import torch.nn as nn

# Test basic 3D convolution
if torch.cuda.is_available():
    # Create a 3D conv layer
    conv3d = nn.Conv3d(3, 64, kernel_size=3, padding=1).cuda()
    
    # Test with sample input (batch, channels, depth, height, width)
    x = torch.randn(1, 3, 16, 112, 112).cuda()
    
    try:
        output = conv3d(x)
        print("3D convolution test passed")
        print(f"Output shape: {output.shape}")
    except Exception as e:
        print(f"3D convolution failed: {e}")

3D convolution failed: GET was unable to find an engine to execute this computation


In [10]:
train_model(model=model, data_loader=train_loader, optimizer=optimizer, loss_func=loss_func)

Training R3D:   0%|          | 0/10 [00:00<?, ?it/s]


RuntimeError: GET was unable to find an engine to execute this computation