# Going to try train with more classes as the model was overfitting

In [1]:
import torch
from torch.utils.data import Dataset, DataLoader
import torchvision.transforms as transforms
import torch.nn.functional as F

from PIL import Image
import torchvision.models.video as video_models
import torch.nn as nn
import tqdm
import matplotlib.pyplot as plt
from torch.utils.tensorboard import SummaryWriter
import json

from train import train_model_4
import video_dataset as Dataset
# from test import test_model


2025-07-20 02:50:02.930283: I tensorflow/core/util/port.cc:153] oneDNN custom operations are on. You may see slightly different numerical results due to floating-point round-off errors from different computation orders. To turn them off, set the environment variable `TF_ENABLE_ONEDNN_OPTS=0`.
2025-07-20 02:50:02.939423: E external/local_xla/xla/stream_executor/cuda/cuda_fft.cc:485] Unable to register cuFFT factory: Attempting to register factory for plugin cuFFT when one has already been registered
2025-07-20 02:50:02.949610: E external/local_xla/xla/stream_executor/cuda/cuda_dnn.cc:8454] Unable to register cuDNN factory: Attempting to register factory for plugin cuDNN when one has already been registered
2025-07-20 02:50:02.952745: E external/local_xla/xla/stream_executor/cuda/cuda_blas.cc:1452] Unable to register cuBLAS factory: Attempting to register factory for plugin cuBLAS when one has already been registered
2025-07-20 02:50:02.960596: I tensorflow/core/platform/cpu_feature_guar

In [2]:
train_inst_path = './preprocessed/labels/asl300/train_instances_fixed_frange_bboxes_len.json'
train_clss_path = './preprocessed/labels/asl300/train_classes_fixed_frange_bboxes_len.json'
val_inst_path = './preprocessed/labels/asl300/val_instances_fixed_frange_bboxes_len.json'
val_clss_path = './preprocessed/labels/asl300/val_classes_fixed_frange_bboxes_len.json'
raw_path = '../data/WLASL2000'
transform0 = transforms.Compose([
    transforms.Lambda(lambda x: Dataset.correct_num_frames(x, 16)),  # (T, C, H, W)
    transforms.Lambda(lambda x: x.float() / 255.0),  # Convert to float and normalize to [0,1]
    transforms.Lambda(lambda x: F.interpolate(x, size=(112, 112), mode='bilinear', align_corners=False)),  # Resize after normalization
    transforms.Lambda(lambda x: Dataset.normalise(x,  mean=[0.43216, 0.394666, 0.37645], std=[0.22803, 0.22145, 0.216989])),  # Normalize per channel
    transforms.Lambda(lambda x: x.permute(1, 0, 2, 3)),  # (T, C, H, W) -> (C, T, H, W)
]) #The transform that got the best result
train_set = Dataset.VideoDataset(
    root=raw_path,
    instances_path=train_inst_path,
    classes_path=train_clss_path,
    transform=transform0
)
val_set = Dataset.VideoDataset(
    root=raw_path,
    instances_path=val_inst_path,
    classes_path=val_clss_path,
    transform=transform0
)
print(f"Number of training samples: {len(train_set)}")
print(f"Number of training classes: {len(set(train_set.classes))}")
print(f"Number of validation samples: {len(val_set)}")
print(f"Number of validation classes: {len(set(val_set.classes))}")

Number of training samples: 3549
Number of training classes: 300
Number of validation samples: 901
Number of validation classes: 300


In [3]:
# torch.manual_seed(42) #probably doesnt work because of numworkers
train_loader = DataLoader(
  train_set,
  batch_size=32, 
  shuffle=True,
  num_workers=2, #this was 4 but I previously had issues with the computer crashing (though this was with more data)
  drop_last=True
)

print(f'Train loader:\n{train_loader}')

val_loader = DataLoader(
  val_set,
  batch_size=32,
  shuffle=False,
  drop_last=False,
  num_workers=2
)

print(f"Validation loader:\n{val_loader}")


Train loader:
<torch.utils.data.dataloader.DataLoader object at 0x7a12d8101cc0>
Validation loader:
<torch.utils.data.dataloader.DataLoader object at 0x7a12d8100b20>


In [4]:
device = torch.device("cuda" if torch.cuda.is_available() else "cpu")
print(f"Using device: {device}")


Using device: cuda


In [5]:
model = video_models.r3d_18(pretrained=True)
num_classes = 300
model.fc = nn.Linear(model.fc.in_features, num_classes)

for param in model.parameters():
  param.requires_grad = True
  
for layer_name in ['layer4', 'fc']:
  if hasattr(model, layer_name):
    for param in getattr(model, layer_name).parameters():
      param.requires_grad = True
      
for name, param in model.named_parameters():
  if param.requires_grad:
    print(f"Training parameter: {name}")
  else:
    print(f"Freezing parameter: {name}")
    
for name, module in model.named_modules():
  if isinstance(module, (nn.BatchNorm1d, nn.BatchNorm2d, nn.BatchNorm3d)):
    # Check if this BatchNorm is in a frozen layer
    is_in_frozen_layer = not any(unfreeze_layer in name for unfreeze_layer in ['layer4', 'fc'])
    
    if is_in_frozen_layer:
      module.eval()
      module.track_running_stats = False
      print(f"Set {name} to eval mode (frozen layer)")



Training parameter: stem.0.weight
Training parameter: stem.1.weight
Training parameter: stem.1.bias
Training parameter: layer1.0.conv1.0.weight
Training parameter: layer1.0.conv1.1.weight
Training parameter: layer1.0.conv1.1.bias
Training parameter: layer1.0.conv2.0.weight
Training parameter: layer1.0.conv2.1.weight
Training parameter: layer1.0.conv2.1.bias
Training parameter: layer1.1.conv1.0.weight
Training parameter: layer1.1.conv1.1.weight
Training parameter: layer1.1.conv1.1.bias
Training parameter: layer1.1.conv2.0.weight
Training parameter: layer1.1.conv2.1.weight
Training parameter: layer1.1.conv2.1.bias
Training parameter: layer2.0.conv1.0.weight
Training parameter: layer2.0.conv1.1.weight
Training parameter: layer2.0.conv1.1.bias
Training parameter: layer2.0.conv2.0.weight
Training parameter: layer2.0.conv2.1.weight
Training parameter: layer2.0.conv2.1.bias
Training parameter: layer2.0.downsample.0.weight
Training parameter: layer2.0.downsample.1.weight
Training parameter: la

In [6]:
trainable_params = [p for p in model.parameters() if p.requires_grad]
optimizer = torch.optim.Adam(trainable_params, lr=1e-4)  #this learning rate might be too high
#TODO : try this code
# optimizer = torch.optim.Adam([
#     {'params': model.layer4.parameters(), 'lr': 1e-4},
#     {'params': model.fc.parameters(), 'lr': 1e-3}  # Higher LR for new classifier
# ])
print(len(trainable_params), "trainable parameters")
loss_func = nn.CrossEntropyLoss() #TODO : try Contrastive loss

62 trainable parameters


In [7]:
schedular = torch.optim.lr_scheduler.ReduceLROnPlateau(
  optimizer,
  mode='min',
  factor=0.1,
  patience=15,
) #not sure if i should use the schedular, but will try

In [8]:
train_losses, val_losses = train_model_4(
  model=model,
  train_loader=train_loader,
  optimizer=optimizer,
  loss_func=loss_func,
  epochs=100,
  val_loader=val_loader,
  schedular=schedular,
  output='runs/asl300/r3d18_exp0'
)

Output directory set to: runs/asl300/r3d18_exp0
Save directory set to: runs/asl300/r3d18_exp0/checkpoints
Logs directory set to: runs/asl300/r3d18_exp0/logs


Training R3D:   0%|          | 0/100 [00:00<?, ?it/s]

  Epoch 1/100:
  Train Loss: 5.6973, Train Acc: 1.70%
  Val Loss: 5.5110, Val Acc: 1.78%
  Learning Rate: 0.000100


Training R3D:   1%|          | 1/100 [02:08<3:31:12, 128.01s/it]

  Epoch 2/100:
  Train Loss: 4.7047, Train Acc: 19.23%
  Val Loss: 5.0608, Val Acc: 9.10%
  Learning Rate: 0.000100


Training R3D:   2%|▏         | 2/100 [04:16<3:29:57, 128.54s/it]

  Epoch 3/100:
  Train Loss: 3.6868, Train Acc: 55.11%
  Val Loss: 4.7698, Val Acc: 12.87%
  Learning Rate: 0.000100


Training R3D:   3%|▎         | 3/100 [07:45<4:10:55, 155.21s/it]


KeyboardInterrupt: 