# Comparing different built in models with the available data

In [12]:
import torch
from torch.utils.data import Dataset, DataLoader
import torchvision.transforms as transforms
import numpy as np
import os
from PIL import Image
import torchvision.models.video as video_models
import torch.nn as nn
import tqdm

### import custom dataset

In [13]:
import video_dataset as Dataset

raw_path = '../data/WLASL2000'
instances_path = './preprocessed_labels/asl100/train_instances_fixed_bboxes_short.json'
classes_path = './wlasl_class_list.json'
train_set = Dataset.VideoDataset(
  root=raw_path,
  instances_path=instances_path,
  classes_path=classes_path,
  transform=Dataset.min_transform_rI3d
)


print(f"Length: {len(train_set)}")

Length: 1442


### Setup dataloader

In [14]:
torch.manual_seed(42)
train_loader = DataLoader(
  train_set,
  batch_size=2,
  shuffle=True,
  num_workers=0 #to use manual seed
)
train_loader

<torch.utils.data.dataloader.DataLoader at 0x7b9a20115c90>

## 3D CNNs

### r3d_18

There may be some issues with memory...

### Initialisation:

In [15]:
#load pretrained mode
model = video_models.r3d_18(pretrained=True)

#alter classifier
num_classes = 100
model.fc = nn.Linear(model.fc.in_features, num_classes)



### Training setup:

In [16]:
device = 'cuda' if torch.cuda.is_available() else 'cpu'
print(f"Device: {device}")

model = model.to(device)

for param in model.parameters():
  param.requires_grad = True

optimizer = torch.optim.Adam(model.parameters(), lr=0.001)
loss_func = nn.CrossEntropyLoss()
  

Device: cuda


In [17]:
def train_model(model, data_loader, optimizer, loss_func, epochs=10,val_loader=None):
  model.train()
    
  for epoch in tqdm.tqdm(range(epochs), desc="Training R3D"):
    running_loss = 0.0
      
    for batch_idx, (data, target) in enumerate(data_loader):
      # data shape: (batch_size, 3, frames, height, width)
      data, target = data.to(device), target.to(device)
      
      optimizer.zero_grad()
      output = model(data)
      loss = loss_func(output, target)
      loss.backward()
      optimizer.step()
      
      running_loss += loss.item()
      
      # if batch_idx % 100 == 0:
      #     print(f'Epoch [{epoch+1}/{epochs}], Step [{batch_idx}], Loss: {loss.item():.4f}')
      
    print(f'Epoch [{epoch+1}/{epochs}], Average Loss: {running_loss/len(data_loader):.4f}')

In [18]:
train_model(model=model, data_loader=train_loader, optimizer=optimizer, loss_func=loss_func)

Training R3D:  10%|█         | 1/10 [17:22<2:36:25, 1042.82s/it]

Epoch [1/10], Average Loss: 4.8476


Training R3D:  20%|██        | 2/10 [34:35<2:18:15, 1036.90s/it]

Epoch [2/10], Average Loss: 4.6423


Training R3D:  30%|███       | 3/10 [51:47<2:00:40, 1034.42s/it]

Epoch [3/10], Average Loss: 4.6112


Training R3D:  40%|████      | 4/10 [1:08:59<1:43:20, 1033.45s/it]

Epoch [4/10], Average Loss: 4.6109


Training R3D:  50%|█████     | 5/10 [1:26:10<1:26:04, 1032.90s/it]

Epoch [5/10], Average Loss: 4.6518


Training R3D:  60%|██████    | 6/10 [1:43:23<1:08:50, 1032.73s/it]

Epoch [6/10], Average Loss: 4.6090


Training R3D:  70%|███████   | 7/10 [2:00:35<51:37, 1032.44s/it]  

Epoch [7/10], Average Loss: 4.6163


Training R3D:  80%|████████  | 8/10 [2:17:54<34:29, 1034.78s/it]

Epoch [8/10], Average Loss: 4.6055


Training R3D:  90%|█████████ | 9/10 [2:35:18<17:17, 1037.43s/it]

Epoch [9/10], Average Loss: 4.6015


Training R3D: 100%|██████████| 10/10 [2:52:28<00:00, 1034.89s/it]

Epoch [10/10], Average Loss: 4.6005





### Output from nvidia-smi:

NVIDIA-SMI 555.42.06              Driver Version: 555.42.06      CUDA Version: 12.5     |
|-----------------------------------------+------------------------+----------------------+
| GPU  Name                 Persistence-M | Bus-Id          Disp.A | Volatile Uncorr. ECC |
| Fan  Temp   Perf          Pwr:Usage/Cap |           Memory-Usage | GPU-Util  Compute M. |
|                                         |                        |               MIG M. |
|=========================================+========================+======================|
|   0  NVIDIA GeForce GTX 1080 Ti     Off |   00000000:01:00.0  On |                  N/A |
| 53%   72C    P2            288W /  300W |    4448MiB /  11264MiB |    100%      Default |
|                                         |                        |                  N/A |

can probably stand to increase batch size


### Increased batch size to 2:

 NVIDIA-SMI 555.42.06              Driver Version: 555.42.06      CUDA Version: 12.5     |
|-----------------------------------------+------------------------+----------------------+
| GPU  Name                 Persistence-M | Bus-Id          Disp.A | Volatile Uncorr. ECC |
| Fan  Temp   Perf          Pwr:Usage/Cap |           Memory-Usage | GPU-Util  Compute M. |
|                                         |                        |               MIG M. |
|=========================================+========================+======================|
|   0  NVIDIA GeForce GTX 1080 Ti     Off |   00000000:01:00.0  On |                  N/A |
| 42%   67C    P2            303W /  300W |   10054MiB /  11264MiB |    100%      Default |
|                                         |                        |                  N/A |


### the loss seems to be decreasing, but lets see if we can do better plotting 

In [24]:
# !pip install tensorboard
from torch.utils.tensorboard import SummaryWriter
import os

In [None]:

def train_model_2(model, train_loader, optimizer, loss_func, epochs=10,val_loader=None,
                  output='runs/exp_0', logs='logs', save='checkpoints', save_every=1):
  if os.path.exists(output) and output[-1].isdigit():
    output = output[:-1] + str(int(output[-1])+ 1) #enumerate file name
  if save:
    save_path = os.path.join(output, save)
    os.makedirs(save_path,exist_ok=True)
  logs_path = os.path.join(output, logs)
  writer = SummaryWriter(logs_path) #watching loss
  train_losses = []
  val_losses = []
  best_val_loss = float('inf')
  
  model.train()
  for epoch in tqdm.tqdm(range(epochs), desc="Training R3D"):
    #Training phase
    running_loss = 0.0
    train_samples = 0
    
    for data, target in train_loader:
      data, target = data.to(device), target.to(device)
      
      optimizer.zero_grad()
      model_output = model(data)
      loss = loss_func(model_output, target)
      loss.backward()
      optimizer.step()
      
      running_loss += loss.item() * data.size(0) #weight by batch size
      train_samples += data.size(0)
      
    avg_train_loss = running_loss / train_samples
    train_losses.append(avg_train_loss)
    writer.add_scalar('Loss/Train', avg_train_loss, epoch)
    #Validation phase
    if val_loader:
      model.eval()
      val_loss = 0.0
      val_samples = 0
      
      with torch.no_grad():
        for data, target in val_loader:
          data, target = data.to(device), target.to(device)
          
          model_output = model(data)
          loss = loss_func(model_output, target)
          
          val_loss += loss.item() * data.size(0) #weight by batch size
          val_samples += data.size(0)
          
      avg_val_loss = val_loss / val_samples
      val_losses.append(avg_val_loss)
      writer.add_scalar('Loss/Val', avg_val_loss, epoch)
      
      if save and avg_val_loss < best_val_loss:
        best_val_loss = avg_val_loss
        torch.save(model.state_dict(),
                   os.path.join(save_path, 'best.pth'))
      
      print(f"Epoch [{epoch+1}/{epochs}], Train Loss: {avg_train_loss:.4f}, Val Loss: {avg_val_loss:.4f}")
      model.train() # return back to train
    else:
      print(f'Epoch [{epoch+1}/{epochs}], Average Loss: {avg_train_loss:.4f}')
    
    if save and epoch % save_every == 0:
      avg_train_loss = avg_train_loss if avg_train_loss else 'N/A'
      avg_val_loss = avg_val_loss if avg_val_loss else 'N/A'
      torch.save({
        'epoch': epoch,
        'model_state_dict': model.state_dict(),
        'optimizer_state_dict': optimizer.state_dict(),
        'train loss': avg_train_loss,
        'val loss': avg_val_loss,
        'train losses': train_losses,
        'val losses': val_losses
        }, os.path.join(save_path, f'checkpoint_{epoch}.pth'))
    
  return train_losses, val_losses

setup validation loader

In [38]:
instances_path = './preprocessed_labels/asl100/val_instances_fixed_bboxes_short.json'
val_set = Dataset.VideoDataset(
  root=raw_path,
  instances_path=instances_path,
  classes_path=classes_path,
  transform=Dataset.min_transform_rI3d
)
val_loader = DataLoader(
  val_set,
  batch_size=2,
  shuffle=True,
  num_workers=0 #to use manual seed
)
val_loader

<torch.utils.data.dataloader.DataLoader at 0x7b9a20317be0>

In [40]:
train_losses, val_losses = train_model_2(
  model=model,
  train_loader=train_loader,
  optimizer=optimizer,
  loss_func=loss_func,
  epochs=100,
  val_loader=val_loader,
  output='runs/exp0_r3d18'
)

AttributeError: module 'posixpath' has no attribute 'makedirs'