# ASLive Sign2Text Model

This notebook implements the sign language to text model following the architecture:
- **Vision Layer (CNN)**: Extracts spatial features from each frame
- **Positional Encoding (PE)**: Adds temporal position information
- **Attention Layer (LSTM)**: Processes temporal sequence with attention
- **FC Layer**: Final classification layer


In [1]:
!pip install kagglehub torchcodec torchvision
!pip install git+https://github.com/facebookresearch/pytorchvideo

Collecting torchcodec
  Downloading torchcodec-0.9.0-cp312-cp312-manylinux_2_28_x86_64.whl.metadata (11 kB)
Downloading torchcodec-0.9.0-cp312-cp312-manylinux_2_28_x86_64.whl (2.1 MB)
[2K   [90m━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━[0m [32m2.1/2.1 MB[0m [31m54.4 MB/s[0m eta [36m0:00:00[0m
[?25hInstalling collected packages: torchcodec
Successfully installed torchcodec-0.9.0
Collecting git+https://github.com/facebookresearch/pytorchvideo
  Cloning https://github.com/facebookresearch/pytorchvideo to /tmp/pip-req-build-jdslzkjl
  Running command git clone --filter=blob:none --quiet https://github.com/facebookresearch/pytorchvideo /tmp/pip-req-build-jdslzkjl
  Resolved https://github.com/facebookresearch/pytorchvideo to commit 0f9a5e102e4d84972b829fd30e3c3f78c7c7fd1a
  Preparing metadata (setup.py) ... [?25l[?25hdone
Collecting fvcore (from pytorchvideo==0.1.5)
  Downloading fvcore-0.1.5.post20221221.tar.gz (50 kB)
[2K     [90m━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━[0m [

In [2]:
# Before running, add everything from SQ_dataloader.ipynb into the cell below and run

## 1. Data Loading (from SQ_dataloader)


In [3]:
#add SQ_dataloader code here
from torch.utils.data import Dataset
from torchvision import transforms
from torchcodec.decoders import VideoDecoder
import kagglehub
import os
import json

import torch # Assuming torch is imported elsewhere
from torch.utils.data import Dataset
from torchvision import transforms
from torchcodec.decoders import VideoDecoder
import kagglehub
import os
import json
from PIL import Image # Needed for cropping if working with PIL images

class WLASLTorchCodec(Dataset):
  download_path = None

  def __init__(self, json_path=None, video_dir=None, download=True, max_classes=None, split="train", num_frames=32, transform=None):
    print("Will download:", download)
    if (json_path is None or video_dir is None) and download == False:
      raise ValueError("json_path and video_dir must be provided with download false")
    if download:
      if WLASLTorchCodec.download_path is None:
        path = kagglehub.dataset_download("sttaseen/wlasl2000-resized")
        WLASLTorchCodec.download_path = path
      else:
        path = WLASLTorchCodec.download_path
      print("Downloaded at path: ", path)

      self.video_dir = os.path.join(path, "wlasl-complete", "videos")
      json_path = os.path.join(path, "wlasl-complete","WLASL_v0.3.json")
      downloaded = True
    else:
      self.video_dir = video_dir
    self.num_frames = num_frames
    self.transform = transform

    # Read json
    with open(json_path, "r") as f:
      data = json.load(f)
    if max_classes is not None:
        if isinstance(max_classes, int):
            # Keep only the first N entries (Usually the most frequent in WLASL)
            data = data[:max_classes]
            print(f"Limiting dataset to top {max_classes} classes.")
        elif isinstance(max_classes, list):
            # Keep only entries that match specific glosses
            data = [entry for entry in data if entry['gloss'] in max_classes]
            print(f"Limiting dataset to {len(data)} specific classes.")
    self.samples = []
    self.label_map = {}
    label_id = 0

    for entry in data:
      gloss = entry["gloss"]

      if gloss not in self.label_map:
        self.label_map[gloss] = label_id
        label_id += 1

      label = self.label_map[gloss]

      for inst in entry["instances"]:
        if inst["split"] != split:
          continue

        video_id = inst["video_id"]
        file_path = os.path.join(self.video_dir, f"{video_id}.mp4")

        # 1. Modification in __init__: Extract and store frame/bbox info
        frame_start = inst.get("frame_start", 1) # Default to 1 if missing
        frame_end = inst.get("frame_end", -1)   # Default to -1 if missing
        bbox = inst.get("bbox", [0, 0, 1.0, 1.0]) # Default to normalized full frame if missing

        if os.path.isfile(file_path):
          # Store a tuple of (file_path, label, frame_start, frame_end, bbox)
          self.samples.append((file_path, label, frame_start, frame_end, bbox))
        self.num_classes = label_id
  def __len__(self):
    return len(self.samples)

  def __getitem__(self, idx):
    # 2. Modification in __getitem__: Unpack all instance info
    video_path, label, frame_start, frame_end, bbox = self.samples[idx]

    # Convert WLASL 1-based indices (inclusive start, exclusive end) to
    # torchcodec's 0-based indices (inclusive start, inclusive end).

    decoder = VideoDecoder(video_path)
    video_length = decoder.metadata.num_frames
    end_frame = frame_end - 1 if frame_end > 0 else video_length
    start_frame = 0
    if end_frame > video_length:
      end_frame = video_length
    else:
      end_frame = frame_end - 2 if frame_end > 0 else None
    if frame_start > video_length:
      start_frame = 0
    else:
      start_frame = frame_start - 1
    frames = decoder[start_frame:end_frame]
    if self.transform:
      # Transform should handle T x C x H x W input
      frames = self.transform(frames)
    return frames, torch.tensor(label) # Ensure label is a tensor

In [4]:
import pytorchvideo.transforms as ptv_transforms
from torchvision.transforms import Compose, Lambda


mean = [0.45, 0.45, 0.45]
std = [0.225, 0.225, 0.225]

# Test out dataset
train_transform = Compose(
    [
        # 1. Spatial Resize: Scale the shortest edge to SIDE_SIZE
        ptv_transforms.UniformTemporalSubsample(num_samples=24, temporal_dim=0),
        ptv_transforms.ConvertUint8ToFloat(),
        Lambda(lambda x: x.permute(1, 0, 2, 3)),
        ptv_transforms.Normalize(mean, std),
        Lambda(lambda x: x.permute(1, 0, 2, 3)),
        ptv_transforms.ShortSideScale(size=224),
        # ptv_transforms.RandAugment(magnitude=6, num_layers=2),
        # ptv_transforms.AugMix(magnitude=3),
    ]
)

def show_frame(video, frame_idx):
  import matplotlib.pyplot as plt
  import numpy as np
  single_frame = video[frame_idx]
  frame_np = single_frame.detach().cpu().numpy()

  frame_np = np.transpose(frame_np, (1, 2, 0))
  plt.figure(figsize=(5, 5))
  plt.imshow(frame_np)
  plt.title(f'Frame {frame_idx} from Video Batch')
  plt.axis('off') # Hide axis ticks and labels
  plt.show()

# clip = WLASLTorchCodec(max_classes=1, transform=train_transform)

# for video, label in clip:
#   show_frame(video, 0)

In [5]:
import os
import json
import math
import torch
import torch.nn as nn
import torch.nn.functional as F
from torch.utils.data import Dataset, DataLoader
from torchvision import transforms, models
from torchcodec.decoders import VideoDecoder
import numpy as np
from tqdm import tqdm

# Set device
device = torch.device("cuda" if torch.cuda.is_available() else "cpu")
print(f"Using device: {device}")


Using device: cuda


## 2. Vision Layer (CNN Backbone)

The Vision Layer extracts spatial features from each video frame using a CNN. We use a pretrained ResNet-18 as the backbone and remove the final classification layer to get feature embeddings.


In [6]:
class VisionLayer(nn.Module):
    """CNN backbone for extracting spatial features from video frames.

    Uses pretrained ResNet-18 as feature extractor.
    Input: (batch, T, C, H, W) - batch of T frames
    Output: (batch, T, feature_dim) - feature vectors for each frame
    """

    def __init__(self, feature_dim=512, pretrained=True, freeze_backbone=False):
        super(VisionLayer, self).__init__()

        # Load pretrained ResNet-18
        resnet = models.resnet18(weights='IMAGENET1K_V1' if pretrained else None)

        # Remove the final FC layer
        self.backbone = nn.Sequential(*list(resnet.children())[:-1])

        # ResNet-18 outputs 512-dim features
        self.resnet_feature_dim = 512

        # Optional projection layer to adjust feature dimension
        if feature_dim != self.resnet_feature_dim:
            self.projection = nn.Linear(self.resnet_feature_dim, feature_dim)
        else:
            self.projection = None

        self.feature_dim = feature_dim

        # Freeze backbone if specified
        self.set_freeze_backbone(freeze_backbone)

    def set_freeze_backbone(self, is_frozen):
      for param in self.backbone.parameters():
          param.requires_grad = not is_frozen

    def forward(self, x):
        """
        Args:
            x: Input tensor of shape (batch, T, C, H, W)
        Returns:
            Feature tensor of shape (batch, T, feature_dim)
        """
        batch_size, T, C, H, W = x.shape

        # Reshape to process all frames together: (batch * T, C, H, W)
        x = x.view(batch_size * T, C, H, W)

        # Extract features: (batch * T, 512, 1, 1)
        features = self.backbone(x)

        # Flatten: (batch * T, 512)
        features = features.view(batch_size * T, -1)

        # Project features if needed
        if self.projection is not None:
            features = self.projection(features)

        # Reshape back: (batch, T, feature_dim)
        features = features.view(batch_size, T, self.feature_dim)

        return features


## 3. Positional Encoding (PE)

Sinusoidal positional encoding adds temporal position information to the frame features before feeding them to the LSTM.


In [7]:
class PositionalEncoding(nn.Module):
    """Sinusoidal positional encoding for temporal sequences.

    Adds position information to help the model understand the order of frames.
    """

    def __init__(self, d_model, max_len=500, dropout=0.1):
        super(PositionalEncoding, self).__init__()

        # self.dropout = nn.Dropout(p=dropout)

        # Create positional encoding matrix
        pe = torch.zeros(max_len, d_model)
        position = torch.arange(0, max_len, dtype=torch.float).unsqueeze(1)
        div_term = torch.exp(torch.arange(0, d_model, 2).float() * (-math.log(10000.0) / d_model))

        pe[:, 0::2] = torch.sin(position * div_term)
        pe[:, 1::2] = torch.cos(position * div_term)

        # Add batch dimension: (1, max_len, d_model)
        pe = pe.unsqueeze(0)

        # Register as buffer (not a parameter, but should be saved/loaded)
        self.register_buffer('pe', pe)

    def forward(self, x):
        """
        Args:
            x: Input tensor of shape (batch, T, d_model)
        Returns:
            Tensor with positional encoding added: (batch, T, d_model)
        """
        x = x + self.pe[:, :x.size(1), :]
        return x


## 4. Attention Layer (LSTM with Attention)

Bidirectional LSTM processes the sequence of frame features, followed by an attention mechanism to weight the importance of different time steps.


In [8]:

class Attention(nn.Module):
    """Attention mechanism for weighting LSTM outputs.

    Computes attention weights over the sequence and returns a weighted sum.
    """

    def __init__(self, hidden_dim):
        super(Attention, self).__init__()

        self.attention = nn.Sequential(
            nn.Linear(hidden_dim, hidden_dim // 2),
            nn.Tanh(),
            nn.Linear(hidden_dim // 2, 1)
        )

    def forward(self, lstm_output):
        """
        Args:
            lstm_output: LSTM outputs of shape (batch, T, hidden_dim)
        Returns:
            context: Weighted sum of shape (batch, hidden_dim)
            attention_weights: Attention weights of shape (batch, T)
        """
        # Compute attention scores: (batch, T, 1)
        scores = self.attention(lstm_output)

        # Apply softmax over time dimension: (batch, T, 1)
        attention_weights = F.softmax(scores, dim=1)

        # Compute weighted sum: (batch, hidden_dim)
        context = torch.sum(attention_weights * lstm_output, dim=1)

        return context, attention_weights.squeeze(-1)


class AttentionLSTM(nn.Module):
    """Bidirectional LSTM with attention mechanism.

    Processes temporal sequence of frame features and outputs a fixed-size representation.
    """

    def __init__(self, input_dim, hidden_dim=256, num_layers=2, dropout=0.3, bidirectional=True):
        super(AttentionLSTM, self).__init__()

        self.hidden_dim = hidden_dim
        self.num_layers = num_layers
        self.bidirectional = bidirectional
        self.num_directions = 2 if bidirectional else 1

        # LSTM layer
        self.lstm = nn.LSTM(
            input_size=input_dim,
            hidden_size=hidden_dim,
            num_layers=num_layers,
            batch_first=True,
            dropout=dropout if num_layers > 1 else 0,
            bidirectional=bidirectional
        )

        # Attention mechanism
        self.attention = Attention(hidden_dim * self.num_directions)

        # Output dimension
        self.output_dim = hidden_dim * self.num_directions

    def forward(self, x):
        """
        Args:
            x: Input tensor of shape (batch, T, input_dim)
        Returns:
            output: Context vector of shape (batch, hidden_dim * num_directions)
            attention_weights: Attention weights of shape (batch, T)
        """
        # LSTM forward pass: (batch, T, hidden_dim * num_directions)
        lstm_output, (hidden, cell) = self.lstm(x)

        # Apply attention
        context, attention_weights = self.attention(lstm_output)

        return context, attention_weights

## 5. Complete Sign2Text Model

Combines all components: Vision Layer → Positional Encoding → Attention LSTM → FC Layer → Classification


In [9]:
class Sign2TextModel(nn.Module):
    """Complete Sign Language to Text model.

    Architecture:
    1. Vision Layer (CNN): Extract spatial features from each frame
    2. Positional Encoding: Add temporal position information
    3. Attention LSTM: Process temporal sequence with attention
    4. FC Layer: Final classification
    """

    def __init__(self, num_classes, feature_dim=512, hidden_dim=256,
                 num_lstm_layers=2, dropout=0.3, pretrained_cnn=True,
                 classification_layers=(256,),
                 freeze_cnn=False, max_frames=100):
        super(Sign2TextModel, self).__init__()

        # Vision Layer (CNN)
        self.vision_layer = VisionLayer(
            feature_dim=feature_dim,
            pretrained=pretrained_cnn,
            freeze_backbone=freeze_cnn
        )

        # Positional Encoding
        self.positional_encoding = PositionalEncoding(
            d_model=feature_dim,
            max_len=100,
            dropout=dropout
        )

        # Attention Layer (LSTM)
        self.attention_lstm = AttentionLSTM(
            input_dim=feature_dim,
            hidden_dim=hidden_dim,
            num_layers=num_lstm_layers,
            dropout=dropout,
            bidirectional=True
        )

        layers = []
        self.lstm_ln = nn.LayerNorm(hidden_dim * 2)
        input_dim = self.attention_lstm.output_dim
        for i, dim in enumerate(classification_layers):
            layers.append(nn.Linear(input_dim, dim))
            layers.append(nn.BatchNorm1d(dim))
            layers.append(nn.ReLU())
            layers.append(nn.Dropout(dropout))
            input_dim = dim

        layers.append(nn.Linear(input_dim, num_classes))
        # FC Layer (Classification)
        self.fc_layer = nn.Sequential(
            *layers
        )

        self.num_classes = num_classes

        self._init_weights()

    def set_freeze(self, is_frozen):
        self.vision_layer.set_freeze_backbone(is_frozen)

    def forward(self, x, return_attention=False):
        """
        Args:
            x: Input video frames of shape (batch, T, C, H, W)
            return_attention: If True, also return attention weights
        Returns:
            logits: Classification logits of shape (batch, num_classes)
            attention_weights (optional): Attention weights of shape (batch, T)
        """
        # Vision Layer: (batch, T, C, H, W) → (batch, T, feature_dim)
        features = self.vision_layer(x)

        # Positional Encoding: (batch, T, feature_dim) → (batch, T, feature_dim)
        features = self.positional_encoding(features)

        # Attention LSTM: (batch, T, feature_dim) → (batch, hidden_dim * 2)
        context, attention_weights = self.attention_lstm(features)

        # FC Layer: (batch, hidden_dim * 2) → (batch, num_classes)
        logits = self.fc_layer(context)

        if return_attention:
            return logits, attention_weights
        return logits

    def _init_weights(self):
        """Applies Xavier initialization to Linear layers and LSTM weights."""

        # Initialize LSTM weights
        # For LSTMs, orthogonal initialization for recurrent weights and Xavier for input weights is common.
        # However, nn.init.xavier_uniform_ is a good general starting point.
        for name, param in self.attention_lstm.named_parameters():
            if 'weight' in name:
                # Apply Xavier/Glorot for weights
                nn.init.xavier_uniform_(param)
            elif 'bias' in name:
                # Initialize biases to zero (or use a specific trick like setting forget gate bias to 1)
                nn.init.constant_(param, 0.0)

        # Initialize Classification FC Layers
        for m in self.fc_layer.modules():
            if isinstance(m, nn.Linear):
                # Use Xavier/Glorot for weights
                nn.init.xavier_uniform_(m.weight)
                if m.bias is not None:
                    nn.init.constant_(m.bias, 0.0)

## 6. Training Utilities


In [10]:
from torch.cuda.amp import autocast, GradScaler
scaler = GradScaler()

def train_epoch(model, dataloader, criterion, optimizer, device):
    """Train the model for one epoch."""
    model.train()
    running_loss = 0.0
    correct = 0
    total = 0

    progress_bar = tqdm(dataloader, desc="Training")
    for frames, labels in progress_bar:
        frames = frames.to(device)
        labels = labels.to(device)
        # print(frames.shape)
        # Zero gradients
        optimizer.zero_grad()

        # Forward pass
        with autocast():
          outputs = model(frames)
          loss = criterion(outputs, labels)
        # Backward pass

        scaler.scale(loss).backward()

        torch.nn.utils.clip_grad_norm_(model.parameters(), max_norm=1.0)

        scaler.step(optimizer)
        scaler.update()
        # loss.backward()
        # optimizer.step()

        # Statistics
        running_loss += loss.item() * frames.size(0)
        _, predicted = torch.max(outputs, 1)
        total += labels.size(0)
        correct += (predicted == labels).sum().item()

        progress_bar.set_postfix({
            'loss': loss.item(),
            'acc': 100 * correct / total
        })

    epoch_loss = running_loss / total
    epoch_acc = 100 * correct / total

    return epoch_loss, epoch_acc


def evaluate(model, dataloader, criterion, device):
    """Evaluate the model on a dataset."""
    model.eval()
    running_loss = 0.0
    correct = 0
    total = 0

    with torch.no_grad():
        for frames, labels in tqdm(dataloader, desc="Evaluating"):
            frames = frames.to(device)
            labels = labels.to(device)

            with autocast():
                outputs = model(frames)
                loss = criterion(outputs, labels)

            running_loss += loss.item() * frames.size(0)
            _, predicted = torch.max(outputs, 1)
            total += labels.size(0)
            correct += (predicted == labels).sum().item()

    epoch_loss = running_loss / total
    epoch_acc = 100 * correct / total

    return epoch_loss, epoch_acc


  scaler = GradScaler()


## 7. Configuration and Setup


In [11]:
# ============================================
# CONFIGURATION - Modify these paths and hyperparameters
# ============================================

# Data paths
JSON_PATH = "/content/drive/MyDrive/wlasl_resized/wlasl-complete/WLASL_v0.3.json"  # Path to WLASL JSON
VIDEO_DIR = "/content/drive/MyDrive/wlasl_resized/wlasl-complete/videos"  # Path to video directory

# Model hyperparameters
NUM_FRAMES = 24           # Number of frames to sample from each video
FEATURE_DIM = 512        # CNN feature dimension
HIDDEN_DIM = 256         # LSTM hidden dimension
NUM_LSTM_LAYERS = 2      # Number of LSTM layers
DROPOUT = 0.3            # Dropout rate

# Training hyperparameters
BATCH_SIZE = 18           # Batch size (adjust based on GPU memory)
LEARNING_RATE = 5e-4     # Learning rate
NUM_EPOCHS = 200          # Number of training epochs
WEIGHT_DECAY = 1e-3     # L2 regularization
IMG_SIZE=224
# Options
FREEZE_CNN = True       # Whether to freeze CNN backbone
PRETRAINED_CNN = True    # Use pretrained CNN weights
WORKERS = 10
EPOCHS_UNTIL_UNFREEZE = 50
CLASSES_COUNT = 50
PREFETCH = 4

In [12]:
from torchvision.transforms import v2
# Data transforms for training and validation
# train_transform = transforms.Compose([
#     v2.Resize((IMG_SIZE, IMG_SIZE)),
#     # v2.RandomHorizontalFlip(),
#     # v2.ColorJitter(brightness=0.2, contrast=0.2, saturation=0.2),
#     v2.Compose([v2.ToImage(), v2.ToDtype(torch.float32, scale=True)]),
# ])

# val_transform = transforms.Compose([
#     v2.Resize((IMG_SIZE, IMG_SIZE)),
#     v2.Compose([v2.ToImage(), v2.ToDtype(torch.float32, scale=True)]),
# ])

from torchvision.transforms import Compose
import pytorchvideo.transforms as ptv_transforms
from pytorchvideo.transforms import functional as ptv_functional

import torch
import torch.nn as nn
import torch.nn.functional as F

# Note: The transforms below expect the video tensor to be in the range [0.0, 1.0]
# and of shape (T, C, H, W). The `WLASLTorchCodec` implementation already ensures
# the shape is (T, C, H, W), but you must ensure the pixel values are converted
# to float and normalized to [0, 1] before applying the standard normalization.



mean = [0.45, 0.45, 0.45]
std = [0.225, 0.225, 0.225]

# Test out dataset
train_transform = Compose(
    [
        # 1. Spatial Resize: Scale the shortest edge to SIDE_SIZE
        ptv_transforms.UniformTemporalSubsample(num_samples=NUM_FRAMES, temporal_dim=0),
        ptv_transforms.ConvertUint8ToFloat(),
        Lambda(lambda x: x.permute(1, 0, 2, 3)),
        ptv_transforms.Normalize(mean, std),
        Lambda(lambda x: x.permute(1, 0, 2, 3)),
        ptv_transforms.ShortSideScale(size=IMG_SIZE),
        ptv_transforms.RandAugment(magnitude=9, num_layers=2, prob=0.6),
        # ptv_transforms.AugMix(magnitude=3),
    ]
)

# train_transform = Compose(
#     [
#         # 1. Spatial Resize: Scale the shortest edge to SIDE_SIZE
#         ptv_transforms.UniformTemporalSubsample(num_samples=NUM_FRAMES, temporal_dim=0),
#         ptv_transforms.ConvertUint8ToFloat(),
#         ptv_transforms.ShortSideScale(size=IMG_SIZE),
#         ptv_transforms.RandAugment(magnitude=15, num_layers=2),
#         ptv_transforms.AugMix(magnitude=3),
#     ]
# )

test_transform = Compose(
    [
        ptv_transforms.UniformTemporalSubsample(num_samples=NUM_FRAMES, temporal_dim=0),
        ptv_transforms.ConvertUint8ToFloat(),
        Lambda(lambda x: x.permute(1, 0, 2, 3)),
        ptv_transforms.Normalize(mean, std),
        Lambda(lambda x: x.permute(1, 0, 2, 3)),
        ptv_transforms.ShortSideScale(size=IMG_SIZE),
    ]
)
val_transform =test_transform

In [13]:
# Create datasets
train_dataset = WLASLTorchCodec(
    download=True,
    split="train",
    max_classes=CLASSES_COUNT,
    num_frames=NUM_FRAMES,
    transform=train_transform
)

val_dataset = WLASLTorchCodec(
    download=True,
    split="val",
    max_classes=CLASSES_COUNT,
    num_frames=NUM_FRAMES,
    transform=val_transform
)

test_dataset = WLASLTorchCodec(
    download=True,
    split="test",
    max_classes=CLASSES_COUNT,
    num_frames=NUM_FRAMES,
    transform=val_transform
)

# Get number of classes from dataset
NUM_CLASSES = train_dataset.num_classes

print(f"Number of training samples: {len(train_dataset)}")
print(f"Number of validation samples: {len(val_dataset)}")
print(f"Number of test samples: {len(test_dataset)}")
print(f"Number of classes: {NUM_CLASSES}")

# Create data loaders
train_loader = DataLoader(
    train_dataset,
    batch_size=BATCH_SIZE,
    shuffle=True,
    num_workers=WORKERS,           # Start high. The optimal value is often 4 to 12.
    pin_memory=True,         # Accelerates the transfer of data from CPU to GPU VRAM.
    persistent_workers=True, # Recommended for PyTorch multi-process workers to save epoch setup time.
    prefetch_factor=PREFETCH
)

val_loader = DataLoader(
    val_dataset,
    batch_size=BATCH_SIZE,
    shuffle=False,
    num_workers=WORKERS,           # Start high. The optimal value is often 4 to 12.
                             # Since video decoding is CPU-heavy, 8 is a good starting point.
    pin_memory=True,         # Accelerates the transfer of data from CPU to GPU VRAM.
    persistent_workers=True, # Recommended for PyTorch multi-process workers to save epoch setup time.
    prefetch_factor=PREFETCH
)

test_loader = DataLoader(
    test_dataset,
    batch_size=BATCH_SIZE,
    shuffle=False,
    num_workers=WORKERS,           # Start high. The optimal value is often 4 to 12.
                             # Since video decoding is CPU-heavy, 8 is a good starting point.
    pin_memory=True,         # Accelerates the transfer of data from CPU to GPU VRAM.
    persistent_workers=True, # Recommended for PyTorch multi-process workers to save epoch setup time.
    prefetch_factor=PREFETCH
)

Will download: True
Downloading from https://www.kaggle.com/api/v1/datasets/download/sttaseen/wlasl2000-resized?dataset_version_number=1...


100%|██████████| 1.87G/1.87G [00:48<00:00, 41.6MB/s]

Extracting files...





Downloaded at path:  /root/.cache/kagglehub/datasets/sttaseen/wlasl2000-resized/versions/1
Limiting dataset to top 50 classes.
Will download: True
Downloaded at path:  /root/.cache/kagglehub/datasets/sttaseen/wlasl2000-resized/versions/1
Limiting dataset to top 50 classes.
Will download: True
Downloaded at path:  /root/.cache/kagglehub/datasets/sttaseen/wlasl2000-resized/versions/1
Limiting dataset to top 50 classes.
Number of training samples: 785
Number of validation samples: 183
Number of test samples: 143
Number of classes: 50




In [14]:
# Initialize model
model = Sign2TextModel(
    num_classes=NUM_CLASSES,
    feature_dim=FEATURE_DIM,
    hidden_dim=HIDDEN_DIM,
    num_lstm_layers=NUM_LSTM_LAYERS,
    dropout=DROPOUT,
    pretrained_cnn=PRETRAINED_CNN,
    freeze_cnn=FREEZE_CNN,
    max_frames=NUM_FRAMES
).to(device)

# model = SlowFast(num_classes=NUM_CLASSES, dropout=DROPOUT).to(device)

# model = SignTimeSformer(
#     num_classes=NUM_CLASSES,
#     img_size=IMG_SIZE,
#     num_frames=NUM_FRAMES,
#     heads=12,
#     L=5,
#     dropout=DROPOUT
# ).to(device)

criterion = nn.CrossEntropyLoss()
optimizer = torch.optim.AdamW( model.parameters(), lr=LEARNING_RATE, weight_decay=WEIGHT_DECAY )
scheduler = torch.optim.lr_scheduler.ReduceLROnPlateau( optimizer, mode='min', factor=0.3, patience=5 )

# Print model summary
# print(model)
print(f"\nTotal parameters: {sum(p.numel() for p in model.parameters()):,}")
print(f"Trainable parameters: {sum(p.numel() for p in model.parameters() if p.requires_grad):,}")


Downloading: "https://download.pytorch.org/models/resnet18-f37072fd.pth" to /root/.cache/torch/hub/checkpoints/resnet18-f37072fd.pth


100%|██████████| 44.7M/44.7M [00:00<00:00, 235MB/s]



Total parameters: 14,607,731
Trainable parameters: 3,431,219


## 8. Training Loop


In [None]:
from typing import *
import os

# Training loop
best_val_acc = 0.0
history = {
    'train_loss': [], 'train_acc': [],
    'val_loss': [], 'val_acc': []
}

from torch.utils.flop_counter import FlopCounterMode

def get_flops(model, inp: Union[torch.Tensor, Tuple], with_backward=False):

    istrain = model.training
    model.eval()

    inp = inp if isinstance(inp, torch.Tensor) else torch.randn(inp)

    flop_counter = FlopCounterMode(mods=model, display=False, depth=None)
    with flop_counter:
        if with_backward:
            model(inp).sum().backward()
        else:
            model(inp)
    total_flops =  flop_counter.get_total_flops()
    if istrain:
        model.train()
    return total_flops

import torch
torch.cuda.empty_cache()

FOLDER_PATH = "/content/drive/MyDrive/Intro_2_DL"
MODEL_NAME = f"2d_cnn_lstm_wlasl{CLASSES_COUNT}.pth"
MODEL_PATH = os.path.join(FOLDER_PATH, MODEL_NAME)
CONTINUE = True
start_epoch = 0
"""
  'epoch': epoch,
  'model_state_dict': model.state_dict(),
  'optimizer_state_dict': optimizer.state_dict(),
  'val_acc': val_acc,
  'label_map': train_dataset.label_map
"""

try:
  if CONTINUE:
      checkpoint = torch.load(MODEL_PATH)
      optimizer.load_state_dict(checkpoint['optimizer_state_dict'])
      start_epoch = checkpoint['epoch'] + 1
      model.load_state_dict(checkpoint['model_state_dict'])
      # val_acc.extend(checkpoint['val_acc'])
except:
  pass


for epoch in range(start_epoch, NUM_EPOCHS):
    print(f"\nEpoch {epoch + 1}/{NUM_EPOCHS}")
    print("-" * 40)
    if epoch > EPOCHS_UNTIL_UNFREEZE and FREEZE_CNN:
        model.set_freeze(False)
    # Train

    train_loss, train_acc = train_epoch(model, train_loader, criterion, optimizer, device)
    # train_flops = get_flops(model, )
    # Validate
    val_loss, val_acc = evaluate(model, val_loader, criterion, device)

    # Update scheduler
    scheduler.step(val_loss)

    # Save history
    history['train_loss'].append(train_loss)
    history['train_acc'].append(train_acc)
    history['val_loss'].append(val_loss)
    history['val_acc'].append(val_acc)

    print(f"Train Loss: {train_loss:.4f}, Train Acc: {train_acc:.2f}%")
    print(f"Val Loss: {val_loss:.4f}, Val Acc: {val_acc:.2f}%")

    # Save best model
    if val_acc > best_val_acc:
        best_val_acc = val_acc
        torch.save({
            'epoch': epoch,
            'model_state_dict': model.state_dict(),
            'optimizer_state_dict': optimizer.state_dict(),
            'val_acc': val_acc,
            'label_map': train_dataset.label_map
        }, MODEL_PATH)
        print(f"✓ Saved new best model with Val Acc: {val_acc:.2f}%")

print(f"\nTraining complete! Best Val Acc: {best_val_acc:.2f}%")



Epoch 38/200
----------------------------------------


  with autocast():
Training: 100%|██████████| 44/44 [00:20<00:00,  2.19it/s, loss=3.21, acc=20.8]
  with autocast():
Evaluating: 100%|██████████| 11/11 [00:04<00:00,  2.41it/s]


Train Loss: 3.1076, Train Acc: 20.76%
Val Loss: 3.5291, Val Acc: 17.49%
✓ Saved new best model with Val Acc: 17.49%

Epoch 39/200
----------------------------------------


Training: 100%|██████████| 44/44 [00:20<00:00,  2.16it/s, loss=2.76, acc=21]
Evaluating: 100%|██████████| 11/11 [00:04<00:00,  2.30it/s]


Train Loss: 3.0185, Train Acc: 21.02%
Val Loss: 3.5188, Val Acc: 18.58%
✓ Saved new best model with Val Acc: 18.58%

Epoch 40/200
----------------------------------------


Training: 100%|██████████| 44/44 [00:20<00:00,  2.18it/s, loss=3.1, acc=21.7]
Evaluating: 100%|██████████| 11/11 [00:05<00:00,  2.07it/s]


Train Loss: 3.0474, Train Acc: 21.66%
Val Loss: 3.5220, Val Acc: 18.03%

Epoch 41/200
----------------------------------------


Training: 100%|██████████| 44/44 [00:21<00:00,  2.06it/s, loss=2.9, acc=21.9]
Evaluating: 100%|██████████| 11/11 [00:04<00:00,  2.34it/s]


Train Loss: 3.0127, Train Acc: 21.91%
Val Loss: 3.5168, Val Acc: 15.85%

Epoch 42/200
----------------------------------------


Training: 100%|██████████| 44/44 [00:20<00:00,  2.12it/s, loss=3.13, acc=21.7]
Evaluating: 100%|██████████| 11/11 [00:04<00:00,  2.24it/s]


Train Loss: 3.0927, Train Acc: 21.66%
Val Loss: 3.5528, Val Acc: 15.85%

Epoch 43/200
----------------------------------------


Training: 100%|██████████| 44/44 [00:20<00:00,  2.10it/s, loss=3.03, acc=21.4]
Evaluating: 100%|██████████| 11/11 [00:04<00:00,  2.35it/s]


Train Loss: 3.0497, Train Acc: 21.40%
Val Loss: 3.5059, Val Acc: 18.58%

Epoch 44/200
----------------------------------------


Training: 100%|██████████| 44/44 [00:20<00:00,  2.12it/s, loss=2.9, acc=19.5]
Evaluating: 100%|██████████| 11/11 [00:04<00:00,  2.29it/s]


Train Loss: 3.0173, Train Acc: 19.49%
Val Loss: 3.5195, Val Acc: 18.58%

Epoch 45/200
----------------------------------------


Training: 100%|██████████| 44/44 [00:21<00:00,  2.08it/s, loss=2.76, acc=19.5]
Evaluating: 100%|██████████| 11/11 [00:04<00:00,  2.40it/s]


Train Loss: 3.1113, Train Acc: 19.49%
Val Loss: 3.5413, Val Acc: 17.49%

Epoch 46/200
----------------------------------------


Training: 100%|██████████| 44/44 [00:20<00:00,  2.13it/s, loss=3.64, acc=22]
Evaluating: 100%|██████████| 11/11 [00:04<00:00,  2.37it/s]


Train Loss: 3.0314, Train Acc: 22.04%
Val Loss: 3.5126, Val Acc: 20.22%
✓ Saved new best model with Val Acc: 20.22%

Epoch 47/200
----------------------------------------


Training: 100%|██████████| 44/44 [00:20<00:00,  2.10it/s, loss=2.49, acc=22.4]
Evaluating: 100%|██████████| 11/11 [00:04<00:00,  2.39it/s]


Train Loss: 2.9654, Train Acc: 22.42%
Val Loss: 3.4984, Val Acc: 16.94%

Epoch 48/200
----------------------------------------


Training: 100%|██████████| 44/44 [00:20<00:00,  2.15it/s, loss=3.38, acc=22.7]
Evaluating: 100%|██████████| 11/11 [00:04<00:00,  2.26it/s]


Train Loss: 2.9767, Train Acc: 22.68%
Val Loss: 3.4954, Val Acc: 17.49%

Epoch 49/200
----------------------------------------


Training: 100%|██████████| 44/44 [00:20<00:00,  2.11it/s, loss=2.24, acc=25.4]
Evaluating: 100%|██████████| 11/11 [00:04<00:00,  2.32it/s]


Train Loss: 2.9118, Train Acc: 25.35%
Val Loss: 3.4478, Val Acc: 17.49%

Epoch 50/200
----------------------------------------


Training: 100%|██████████| 44/44 [00:19<00:00,  2.23it/s, loss=2.76, acc=25.4]
Evaluating: 100%|██████████| 11/11 [00:05<00:00,  2.14it/s]


Train Loss: 2.8917, Train Acc: 25.35%
Val Loss: 3.4577, Val Acc: 16.94%

Epoch 51/200
----------------------------------------


Training: 100%|██████████| 44/44 [00:20<00:00,  2.19it/s, loss=2.97, acc=23.1]
Evaluating: 100%|██████████| 11/11 [00:04<00:00,  2.20it/s]


Train Loss: 2.9840, Train Acc: 23.06%
Val Loss: 3.4688, Val Acc: 15.85%

Epoch 52/200
----------------------------------------


Training: 100%|██████████| 44/44 [00:30<00:00,  1.45it/s, loss=2.49, acc=22.7]
Evaluating: 100%|██████████| 11/11 [00:05<00:00,  2.20it/s]


Train Loss: 2.9488, Train Acc: 22.68%
Val Loss: 3.1983, Val Acc: 21.86%
✓ Saved new best model with Val Acc: 21.86%

Epoch 53/200
----------------------------------------


Training: 100%|██████████| 44/44 [00:30<00:00,  1.45it/s, loss=3.29, acc=28]
Evaluating: 100%|██████████| 11/11 [00:04<00:00,  2.25it/s]


Train Loss: 2.7614, Train Acc: 28.03%
Val Loss: 2.8605, Val Acc: 26.23%
✓ Saved new best model with Val Acc: 26.23%

Epoch 54/200
----------------------------------------


Training: 100%|██████████| 44/44 [00:29<00:00,  1.49it/s, loss=2.59, acc=30.6]
Evaluating: 100%|██████████| 11/11 [00:04<00:00,  2.23it/s]


Train Loss: 2.5405, Train Acc: 30.57%
Val Loss: 2.7945, Val Acc: 26.78%
✓ Saved new best model with Val Acc: 26.78%

Epoch 55/200
----------------------------------------


Training: 100%|██████████| 44/44 [00:30<00:00,  1.46it/s, loss=2.17, acc=37.3]
Evaluating: 100%|██████████| 11/11 [00:04<00:00,  2.29it/s]


Train Loss: 2.3892, Train Acc: 37.32%
Val Loss: 2.7126, Val Acc: 24.04%

Epoch 56/200
----------------------------------------


Training: 100%|██████████| 44/44 [00:29<00:00,  1.49it/s, loss=1.95, acc=40.4]
Evaluating: 100%|██████████| 11/11 [00:04<00:00,  2.31it/s]


Train Loss: 2.2867, Train Acc: 40.38%
Val Loss: 2.5571, Val Acc: 32.24%
✓ Saved new best model with Val Acc: 32.24%

Epoch 57/200
----------------------------------------


Training: 100%|██████████| 44/44 [00:30<00:00,  1.45it/s, loss=2.57, acc=44.2]
Evaluating: 100%|██████████| 11/11 [00:04<00:00,  2.25it/s]


Train Loss: 2.1739, Train Acc: 44.20%
Val Loss: 2.4685, Val Acc: 34.43%
✓ Saved new best model with Val Acc: 34.43%

Epoch 58/200
----------------------------------------


Training: 100%|██████████| 44/44 [00:29<00:00,  1.49it/s, loss=1.97, acc=47.4]
Evaluating: 100%|██████████| 11/11 [00:04<00:00,  2.30it/s]


Train Loss: 2.0618, Train Acc: 47.39%
Val Loss: 2.4989, Val Acc: 36.61%
✓ Saved new best model with Val Acc: 36.61%

Epoch 59/200
----------------------------------------


Training: 100%|██████████| 44/44 [00:30<00:00,  1.44it/s, loss=1.74, acc=49.6]
Evaluating: 100%|██████████| 11/11 [00:05<00:00,  2.10it/s]


Train Loss: 1.9237, Train Acc: 49.55%
Val Loss: 2.3463, Val Acc: 38.25%
✓ Saved new best model with Val Acc: 38.25%

Epoch 60/200
----------------------------------------


Training: 100%|██████████| 44/44 [00:29<00:00,  1.48it/s, loss=2.2, acc=54.5]
Evaluating: 100%|██████████| 11/11 [00:04<00:00,  2.41it/s]


Train Loss: 1.8288, Train Acc: 54.52%
Val Loss: 2.2851, Val Acc: 38.25%

Epoch 61/200
----------------------------------------


Training: 100%|██████████| 44/44 [00:30<00:00,  1.42it/s, loss=2.34, acc=59.5]
Evaluating: 100%|██████████| 11/11 [00:04<00:00,  2.27it/s]


Train Loss: 1.6606, Train Acc: 59.49%
Val Loss: 2.2530, Val Acc: 40.98%
✓ Saved new best model with Val Acc: 40.98%

Epoch 62/200
----------------------------------------


Training: 100%|██████████| 44/44 [00:29<00:00,  1.51it/s, loss=1.8, acc=59.7]
Evaluating: 100%|██████████| 11/11 [00:04<00:00,  2.30it/s]


Train Loss: 1.6403, Train Acc: 59.75%
Val Loss: 2.2764, Val Acc: 37.70%

Epoch 63/200
----------------------------------------


Training: 100%|██████████| 44/44 [00:29<00:00,  1.47it/s, loss=1.9, acc=63.6]
Evaluating: 100%|██████████| 11/11 [00:05<00:00,  2.17it/s]


Train Loss: 1.5365, Train Acc: 63.57%
Val Loss: 2.1051, Val Acc: 42.62%
✓ Saved new best model with Val Acc: 42.62%

Epoch 64/200
----------------------------------------


Training: 100%|██████████| 44/44 [00:30<00:00,  1.43it/s, loss=1.13, acc=66.6]
Evaluating: 100%|██████████| 11/11 [00:04<00:00,  2.22it/s]


Train Loss: 1.4352, Train Acc: 66.62%
Val Loss: 2.0866, Val Acc: 45.90%
✓ Saved new best model with Val Acc: 45.90%

Epoch 65/200
----------------------------------------


Training: 100%|██████████| 44/44 [00:30<00:00,  1.46it/s, loss=1.14, acc=66.9]
Evaluating: 100%|██████████| 11/11 [00:05<00:00,  2.16it/s]


Train Loss: 1.3950, Train Acc: 66.88%
Val Loss: 2.0875, Val Acc: 42.62%

Epoch 66/200
----------------------------------------


Training: 100%|██████████| 44/44 [00:30<00:00,  1.46it/s, loss=1.3, acc=70.8]
Evaluating: 100%|██████████| 11/11 [00:04<00:00,  2.27it/s]


Train Loss: 1.2451, Train Acc: 70.83%
Val Loss: 2.0766, Val Acc: 42.08%

Epoch 67/200
----------------------------------------


Training: 100%|██████████| 44/44 [00:29<00:00,  1.47it/s, loss=0.735, acc=69.7]
Evaluating: 100%|██████████| 11/11 [00:05<00:00,  2.19it/s]


Train Loss: 1.2816, Train Acc: 69.68%
Val Loss: 2.0692, Val Acc: 46.45%
✓ Saved new best model with Val Acc: 46.45%

Epoch 68/200
----------------------------------------


Training: 100%|██████████| 44/44 [00:30<00:00,  1.45it/s, loss=1.4, acc=70.7]
Evaluating: 100%|██████████| 11/11 [00:04<00:00,  2.38it/s]


Train Loss: 1.2723, Train Acc: 70.70%
Val Loss: 2.0615, Val Acc: 46.45%

Epoch 69/200
----------------------------------------


Training: 100%|██████████| 44/44 [00:30<00:00,  1.45it/s, loss=1.3, acc=74.6]
Evaluating: 100%|██████████| 11/11 [00:04<00:00,  2.36it/s]


Train Loss: 1.0988, Train Acc: 74.65%
Val Loss: 1.9641, Val Acc: 49.18%
✓ Saved new best model with Val Acc: 49.18%

Epoch 70/200
----------------------------------------


Training: 100%|██████████| 44/44 [00:30<00:00,  1.46it/s, loss=1.45, acc=76.8]
Evaluating: 100%|██████████| 11/11 [00:04<00:00,  2.28it/s]


Train Loss: 1.0302, Train Acc: 76.82%
Val Loss: 1.9666, Val Acc: 46.45%

Epoch 71/200
----------------------------------------


Training: 100%|██████████| 44/44 [00:29<00:00,  1.48it/s, loss=1.4, acc=77.6]
Evaluating: 100%|██████████| 11/11 [00:04<00:00,  2.33it/s]


Train Loss: 1.0405, Train Acc: 77.58%
Val Loss: 1.9288, Val Acc: 48.63%

Epoch 72/200
----------------------------------------


Training: 100%|██████████| 44/44 [00:29<00:00,  1.48it/s, loss=0.701, acc=78.1]
Evaluating: 100%|██████████| 11/11 [00:04<00:00,  2.27it/s]


Train Loss: 0.9564, Train Acc: 78.09%
Val Loss: 1.8915, Val Acc: 47.54%

Epoch 73/200
----------------------------------------


Training: 100%|██████████| 44/44 [00:29<00:00,  1.50it/s, loss=0.993, acc=81]
Evaluating: 100%|██████████| 11/11 [00:04<00:00,  2.43it/s]


Train Loss: 0.8819, Train Acc: 81.02%
Val Loss: 1.8541, Val Acc: 47.54%

Epoch 74/200
----------------------------------------


Training: 100%|██████████| 44/44 [00:31<00:00,  1.40it/s, loss=0.695, acc=80.8]
Evaluating: 100%|██████████| 11/11 [00:04<00:00,  2.22it/s]


Train Loss: 0.9185, Train Acc: 80.76%
Val Loss: 1.8650, Val Acc: 51.91%
✓ Saved new best model with Val Acc: 51.91%

Epoch 75/200
----------------------------------------


Training: 100%|██████████| 44/44 [00:29<00:00,  1.49it/s, loss=1.52, acc=81.9]
Evaluating: 100%|██████████| 11/11 [00:04<00:00,  2.22it/s]


Train Loss: 0.8089, Train Acc: 81.91%
Val Loss: 1.8708, Val Acc: 49.18%

Epoch 76/200
----------------------------------------


Training: 100%|██████████| 44/44 [00:29<00:00,  1.49it/s, loss=0.836, acc=81.3]
Evaluating: 100%|██████████| 11/11 [00:04<00:00,  2.30it/s]


Train Loss: 0.8558, Train Acc: 81.27%
Val Loss: 1.9113, Val Acc: 49.73%

Epoch 77/200
----------------------------------------


Training: 100%|██████████| 44/44 [00:30<00:00,  1.45it/s, loss=0.913, acc=84.8]
Evaluating: 100%|██████████| 11/11 [00:04<00:00,  2.27it/s]


Train Loss: 0.7863, Train Acc: 84.84%
Val Loss: 1.8274, Val Acc: 53.01%
✓ Saved new best model with Val Acc: 53.01%

Epoch 78/200
----------------------------------------


Training: 100%|██████████| 44/44 [00:29<00:00,  1.50it/s, loss=0.805, acc=84.6]
Evaluating: 100%|██████████| 11/11 [00:04<00:00,  2.34it/s]


Train Loss: 0.7505, Train Acc: 84.59%
Val Loss: 1.8134, Val Acc: 50.27%

Epoch 79/200
----------------------------------------


Training: 100%|██████████| 44/44 [00:31<00:00,  1.41it/s, loss=0.989, acc=86.1]
Evaluating: 100%|██████████| 11/11 [00:04<00:00,  2.35it/s]


Train Loss: 0.6739, Train Acc: 86.11%
Val Loss: 1.8501, Val Acc: 50.27%

Epoch 80/200
----------------------------------------


Training: 100%|██████████| 44/44 [00:29<00:00,  1.49it/s, loss=0.17, acc=86.2]
Evaluating: 100%|██████████| 11/11 [00:04<00:00,  2.21it/s]


Train Loss: 0.6474, Train Acc: 86.24%
Val Loss: 1.8430, Val Acc: 50.82%

Epoch 81/200
----------------------------------------


Training: 100%|██████████| 44/44 [00:29<00:00,  1.49it/s, loss=0.997, acc=84.8]
Evaluating: 100%|██████████| 11/11 [00:04<00:00,  2.34it/s]


Train Loss: 0.6732, Train Acc: 84.84%
Val Loss: 1.7903, Val Acc: 52.46%

Epoch 82/200
----------------------------------------


Training: 100%|██████████| 44/44 [00:29<00:00,  1.49it/s, loss=0.763, acc=85.4]
Evaluating: 100%|██████████| 11/11 [00:04<00:00,  2.26it/s]


Train Loss: 0.6703, Train Acc: 85.35%
Val Loss: 1.8184, Val Acc: 53.55%
✓ Saved new best model with Val Acc: 53.55%

Epoch 83/200
----------------------------------------


Training: 100%|██████████| 44/44 [00:30<00:00,  1.43it/s, loss=1.18, acc=89.7]
Evaluating: 100%|██████████| 11/11 [00:04<00:00,  2.28it/s]


Train Loss: 0.5584, Train Acc: 89.68%
Val Loss: 1.7497, Val Acc: 53.55%

Epoch 84/200
----------------------------------------


Training: 100%|██████████| 44/44 [00:30<00:00,  1.44it/s, loss=0.683, acc=90.6]
Evaluating: 100%|██████████| 11/11 [00:04<00:00,  2.36it/s]


Train Loss: 0.4948, Train Acc: 90.57%
Val Loss: 1.7738, Val Acc: 55.19%
✓ Saved new best model with Val Acc: 55.19%

Epoch 85/200
----------------------------------------


Training: 100%|██████████| 44/44 [00:30<00:00,  1.44it/s, loss=0.326, acc=89.8]
Evaluating: 100%|██████████| 11/11 [00:05<00:00,  2.18it/s]


Train Loss: 0.5239, Train Acc: 89.81%
Val Loss: 1.7679, Val Acc: 51.37%

Epoch 86/200
----------------------------------------


Training: 100%|██████████| 44/44 [00:30<00:00,  1.47it/s, loss=0.423, acc=91]
Evaluating: 100%|██████████| 11/11 [00:04<00:00,  2.36it/s]


Train Loss: 0.4754, Train Acc: 90.96%
Val Loss: 1.7090, Val Acc: 55.19%

Epoch 87/200
----------------------------------------


Training: 100%|██████████| 44/44 [00:31<00:00,  1.40it/s, loss=0.257, acc=90.4]
Evaluating: 100%|██████████| 11/11 [00:04<00:00,  2.37it/s]


Train Loss: 0.4622, Train Acc: 90.45%
Val Loss: 1.7328, Val Acc: 54.64%

Epoch 88/200
----------------------------------------


Training: 100%|██████████| 44/44 [00:30<00:00,  1.44it/s, loss=0.407, acc=90.4]
Evaluating: 100%|██████████| 11/11 [00:04<00:00,  2.27it/s]


Train Loss: 0.4772, Train Acc: 90.45%
Val Loss: 1.7308, Val Acc: 53.55%

Epoch 89/200
----------------------------------------


Training: 100%|██████████| 44/44 [00:29<00:00,  1.47it/s, loss=0.584, acc=91]
Evaluating: 100%|██████████| 11/11 [00:04<00:00,  2.40it/s]


Train Loss: 0.4783, Train Acc: 90.96%
Val Loss: 1.7608, Val Acc: 55.74%
✓ Saved new best model with Val Acc: 55.74%

Epoch 90/200
----------------------------------------


Training: 100%|██████████| 44/44 [00:29<00:00,  1.49it/s, loss=1.09, acc=89]
Evaluating: 100%|██████████| 11/11 [00:04<00:00,  2.24it/s]


Train Loss: 0.5174, Train Acc: 89.04%
Val Loss: 1.7226, Val Acc: 51.37%

Epoch 91/200
----------------------------------------


Training: 100%|██████████| 44/44 [00:29<00:00,  1.47it/s, loss=0.894, acc=91.8]
Evaluating: 100%|██████████| 11/11 [00:04<00:00,  2.51it/s]


Train Loss: 0.4139, Train Acc: 91.85%
Val Loss: 1.7062, Val Acc: 53.55%

Epoch 92/200
----------------------------------------


Training: 100%|██████████| 44/44 [00:30<00:00,  1.45it/s, loss=0.261, acc=91.7]
Evaluating: 100%|██████████| 11/11 [00:04<00:00,  2.27it/s]


Train Loss: 0.3984, Train Acc: 91.72%
Val Loss: 1.7254, Val Acc: 54.10%

Epoch 93/200
----------------------------------------


Training: 100%|██████████| 44/44 [00:29<00:00,  1.47it/s, loss=0.424, acc=90.2]
Evaluating: 100%|██████████| 11/11 [00:04<00:00,  2.22it/s]


Train Loss: 0.4722, Train Acc: 90.19%
Val Loss: 1.7339, Val Acc: 54.10%

Epoch 94/200
----------------------------------------


Training: 100%|██████████| 44/44 [00:30<00:00,  1.44it/s, loss=0.571, acc=90.2]
Evaluating: 100%|██████████| 11/11 [00:04<00:00,  2.23it/s]


Train Loss: 0.4596, Train Acc: 90.19%
Val Loss: 1.7555, Val Acc: 52.46%

Epoch 95/200
----------------------------------------


Training: 100%|██████████| 44/44 [00:30<00:00,  1.42it/s, loss=0.63, acc=90.6]
Evaluating: 100%|██████████| 11/11 [00:05<00:00,  2.07it/s]


Train Loss: 0.4253, Train Acc: 90.57%
Val Loss: 1.7104, Val Acc: 54.10%

Epoch 96/200
----------------------------------------


Training: 100%|██████████| 44/44 [00:29<00:00,  1.51it/s, loss=0.0747, acc=91.7]
Evaluating: 100%|██████████| 11/11 [00:04<00:00,  2.27it/s]


Train Loss: 0.4077, Train Acc: 91.72%
Val Loss: 1.6896, Val Acc: 55.19%

Epoch 97/200
----------------------------------------


Training: 100%|██████████| 44/44 [00:29<00:00,  1.48it/s, loss=0.412, acc=91.5]
Evaluating: 100%|██████████| 11/11 [00:04<00:00,  2.38it/s]


Train Loss: 0.4046, Train Acc: 91.46%
Val Loss: 1.7616, Val Acc: 54.10%

Epoch 98/200
----------------------------------------


Training: 100%|██████████| 44/44 [00:30<00:00,  1.45it/s, loss=0.244, acc=91.5]
Evaluating: 100%|██████████| 11/11 [00:04<00:00,  2.37it/s]


Train Loss: 0.4290, Train Acc: 91.46%
Val Loss: 1.7854, Val Acc: 53.01%

Epoch 99/200
----------------------------------------


Training: 100%|██████████| 44/44 [00:30<00:00,  1.45it/s, loss=1.04, acc=92.6]
Evaluating: 100%|██████████| 11/11 [00:04<00:00,  2.37it/s]


Train Loss: 0.3816, Train Acc: 92.61%
Val Loss: 1.7215, Val Acc: 55.74%

Epoch 100/200
----------------------------------------


Training: 100%|██████████| 44/44 [00:31<00:00,  1.39it/s, loss=0.211, acc=93.8]
Evaluating: 100%|██████████| 11/11 [00:04<00:00,  2.28it/s]


Train Loss: 0.3316, Train Acc: 93.76%
Val Loss: 1.7602, Val Acc: 53.55%

Epoch 101/200
----------------------------------------


Training: 100%|██████████| 44/44 [00:30<00:00,  1.46it/s, loss=0.711, acc=92.1]
Evaluating: 100%|██████████| 11/11 [00:04<00:00,  2.34it/s]


Train Loss: 0.3643, Train Acc: 92.10%
Val Loss: 1.7617, Val Acc: 55.19%

Epoch 102/200
----------------------------------------


Training: 100%|██████████| 44/44 [00:30<00:00,  1.44it/s, loss=0.802, acc=92.6]
Evaluating: 100%|██████████| 11/11 [00:04<00:00,  2.41it/s]


Train Loss: 0.3320, Train Acc: 92.61%
Val Loss: 1.7925, Val Acc: 50.82%

Epoch 103/200
----------------------------------------


Training: 100%|██████████| 44/44 [00:30<00:00,  1.44it/s, loss=0.506, acc=91.8]
Evaluating: 100%|██████████| 11/11 [00:04<00:00,  2.33it/s]


Train Loss: 0.4037, Train Acc: 91.85%
Val Loss: 1.7164, Val Acc: 54.10%

Epoch 104/200
----------------------------------------


Training: 100%|██████████| 44/44 [00:29<00:00,  1.50it/s, loss=0.137, acc=93.9]
Evaluating: 100%|██████████| 11/11 [00:05<00:00,  2.14it/s]


Train Loss: 0.3089, Train Acc: 93.89%
Val Loss: 1.7336, Val Acc: 53.01%

Epoch 105/200
----------------------------------------


Training: 100%|██████████| 44/44 [00:29<00:00,  1.47it/s, loss=0.946, acc=91.5]
Evaluating: 100%|██████████| 11/11 [00:04<00:00,  2.27it/s]


Train Loss: 0.3900, Train Acc: 91.46%
Val Loss: 1.7368, Val Acc: 51.91%

Epoch 106/200
----------------------------------------


Training: 100%|██████████| 44/44 [00:29<00:00,  1.48it/s, loss=0.183, acc=93]
Evaluating: 100%|██████████| 11/11 [00:05<00:00,  2.19it/s]


Train Loss: 0.3554, Train Acc: 92.99%
Val Loss: 1.7262, Val Acc: 54.10%

Epoch 107/200
----------------------------------------


Training: 100%|██████████| 44/44 [00:30<00:00,  1.45it/s, loss=0.193, acc=94]
Evaluating: 100%|██████████| 11/11 [00:04<00:00,  2.31it/s]


Train Loss: 0.2817, Train Acc: 94.01%
Val Loss: 1.6892, Val Acc: 55.19%

Epoch 108/200
----------------------------------------


Training: 100%|██████████| 44/44 [00:30<00:00,  1.46it/s, loss=0.406, acc=94.4]
Evaluating: 100%|██████████| 11/11 [00:04<00:00,  2.31it/s]


Train Loss: 0.3107, Train Acc: 94.39%
Val Loss: 1.6984, Val Acc: 52.46%

Epoch 109/200
----------------------------------------


Training: 100%|██████████| 44/44 [00:29<00:00,  1.49it/s, loss=0.356, acc=94.5]
Evaluating: 100%|██████████| 11/11 [00:05<00:00,  2.16it/s]


Train Loss: 0.3048, Train Acc: 94.52%
Val Loss: 1.6884, Val Acc: 54.64%

Epoch 110/200
----------------------------------------


Training: 100%|██████████| 44/44 [00:30<00:00,  1.45it/s, loss=0.784, acc=93.2]
Evaluating: 100%|██████████| 11/11 [00:04<00:00,  2.35it/s]


Train Loss: 0.3159, Train Acc: 93.25%
Val Loss: 1.6884, Val Acc: 54.64%

Epoch 111/200
----------------------------------------


Training: 100%|██████████| 44/44 [00:29<00:00,  1.49it/s, loss=0.0953, acc=93.8]
Evaluating: 100%|██████████| 11/11 [00:04<00:00,  2.40it/s]


Train Loss: 0.3032, Train Acc: 93.76%
Val Loss: 1.6841, Val Acc: 53.55%

Epoch 112/200
----------------------------------------


Training: 100%|██████████| 44/44 [00:29<00:00,  1.49it/s, loss=0.336, acc=94]
Evaluating: 100%|██████████| 11/11 [00:05<00:00,  2.19it/s]


Train Loss: 0.2702, Train Acc: 94.01%
Val Loss: 1.6735, Val Acc: 54.10%

Epoch 113/200
----------------------------------------


Training: 100%|██████████| 44/44 [00:29<00:00,  1.48it/s, loss=0.249, acc=93.4]
Evaluating: 100%|██████████| 11/11 [00:04<00:00,  2.21it/s]


Train Loss: 0.3278, Train Acc: 93.38%
Val Loss: 1.6831, Val Acc: 55.74%

Epoch 114/200
----------------------------------------


Training: 100%|██████████| 44/44 [00:30<00:00,  1.45it/s, loss=0.635, acc=94.3]
Evaluating: 100%|██████████| 11/11 [00:04<00:00,  2.23it/s]


Train Loss: 0.2801, Train Acc: 94.27%
Val Loss: 1.6931, Val Acc: 55.19%

Epoch 115/200
----------------------------------------


Training: 100%|██████████| 44/44 [00:29<00:00,  1.50it/s, loss=0.172, acc=93.9]
Evaluating: 100%|██████████| 11/11 [00:05<00:00,  2.11it/s]


Train Loss: 0.2822, Train Acc: 93.89%
Val Loss: 1.6724, Val Acc: 55.74%

Epoch 116/200
----------------------------------------


Training: 100%|██████████| 44/44 [00:29<00:00,  1.51it/s, loss=0.527, acc=93.2]
Evaluating: 100%|██████████| 11/11 [00:04<00:00,  2.35it/s]


Train Loss: 0.3165, Train Acc: 93.25%
Val Loss: 1.6748, Val Acc: 53.55%

Epoch 117/200
----------------------------------------


Training: 100%|██████████| 44/44 [00:30<00:00,  1.44it/s, loss=0.65, acc=94.4]
Evaluating: 100%|██████████| 11/11 [00:04<00:00,  2.25it/s]


Train Loss: 0.2865, Train Acc: 94.39%
Val Loss: 1.6580, Val Acc: 57.38%
✓ Saved new best model with Val Acc: 57.38%

Epoch 118/200
----------------------------------------


Training: 100%|██████████| 44/44 [00:29<00:00,  1.47it/s, loss=0.675, acc=93.8]
Evaluating: 100%|██████████| 11/11 [00:04<00:00,  2.33it/s]


Train Loss: 0.2829, Train Acc: 93.76%
Val Loss: 1.6722, Val Acc: 54.10%

Epoch 119/200
----------------------------------------


Training: 100%|██████████| 44/44 [00:29<00:00,  1.47it/s, loss=0.118, acc=93.5]
Evaluating: 100%|██████████| 11/11 [00:04<00:00,  2.26it/s]


Train Loss: 0.3150, Train Acc: 93.50%
Val Loss: 1.6784, Val Acc: 55.19%

Epoch 120/200
----------------------------------------


Training: 100%|██████████| 44/44 [00:30<00:00,  1.43it/s, loss=0.0634, acc=95.2]
Evaluating: 100%|██████████| 11/11 [00:05<00:00,  2.08it/s]


Train Loss: 0.2414, Train Acc: 95.16%
Val Loss: 1.6678, Val Acc: 54.10%

Epoch 121/200
----------------------------------------


Training: 100%|██████████| 44/44 [00:29<00:00,  1.47it/s, loss=0.454, acc=92.6]
Evaluating: 100%|██████████| 11/11 [00:04<00:00,  2.27it/s]


Train Loss: 0.3226, Train Acc: 92.61%
Val Loss: 1.6910, Val Acc: 54.64%

Epoch 122/200
----------------------------------------


Training: 100%|██████████| 44/44 [00:30<00:00,  1.44it/s, loss=0.291, acc=94.4]
Evaluating: 100%|██████████| 11/11 [00:04<00:00,  2.32it/s]


Train Loss: 0.2494, Train Acc: 94.39%
Val Loss: 1.6588, Val Acc: 56.28%

Epoch 123/200
----------------------------------------


Training: 100%|██████████| 44/44 [00:29<00:00,  1.50it/s, loss=0.304, acc=94.5]
Evaluating: 100%|██████████| 11/11 [00:04<00:00,  2.23it/s]


Train Loss: 0.2734, Train Acc: 94.52%
Val Loss: 1.6505, Val Acc: 55.19%

Epoch 124/200
----------------------------------------


Training: 100%|██████████| 44/44 [00:31<00:00,  1.41it/s, loss=0.243, acc=93.9]
Evaluating: 100%|██████████| 11/11 [00:05<00:00,  2.19it/s]


Train Loss: 0.2836, Train Acc: 93.89%
Val Loss: 1.7059, Val Acc: 54.10%

Epoch 125/200
----------------------------------------


Training: 100%|██████████| 44/44 [00:31<00:00,  1.40it/s, loss=0.0527, acc=94.9]
Evaluating: 100%|██████████| 11/11 [00:04<00:00,  2.23it/s]


Train Loss: 0.2574, Train Acc: 94.90%
Val Loss: 1.6534, Val Acc: 54.10%

Epoch 126/200
----------------------------------------


Training: 100%|██████████| 44/44 [00:30<00:00,  1.42it/s, loss=0.197, acc=93.9]
Evaluating: 100%|██████████| 11/11 [00:05<00:00,  2.03it/s]


Train Loss: 0.2825, Train Acc: 93.89%
Val Loss: 1.6166, Val Acc: 55.19%

Epoch 127/200
----------------------------------------


Training: 100%|██████████| 44/44 [00:30<00:00,  1.46it/s, loss=0.0642, acc=94.1]
Evaluating: 100%|██████████| 11/11 [00:04<00:00,  2.36it/s]


Train Loss: 0.2999, Train Acc: 94.14%
Val Loss: 1.6903, Val Acc: 54.64%

Epoch 128/200
----------------------------------------


Training: 100%|██████████| 44/44 [00:29<00:00,  1.48it/s, loss=0.175, acc=93.9]
Evaluating: 100%|██████████| 11/11 [00:05<00:00,  2.02it/s]


Train Loss: 0.2777, Train Acc: 93.89%
Val Loss: 1.6649, Val Acc: 55.74%

Epoch 129/200
----------------------------------------


Training: 100%|██████████| 44/44 [00:30<00:00,  1.45it/s, loss=0.443, acc=93.8]
Evaluating: 100%|██████████| 11/11 [00:04<00:00,  2.28it/s]


Train Loss: 0.2831, Train Acc: 93.76%
Val Loss: 1.6473, Val Acc: 56.28%

Epoch 130/200
----------------------------------------


Training: 100%|██████████| 44/44 [00:30<00:00,  1.46it/s, loss=0.158, acc=95.7]
Evaluating: 100%|██████████| 11/11 [00:04<00:00,  2.33it/s]


Train Loss: 0.2290, Train Acc: 95.67%
Val Loss: 1.6302, Val Acc: 55.74%

Epoch 131/200
----------------------------------------


Training: 100%|██████████| 44/44 [00:30<00:00,  1.45it/s, loss=0.224, acc=96.4]
Evaluating: 100%|██████████| 11/11 [00:04<00:00,  2.29it/s]


Train Loss: 0.2159, Train Acc: 96.43%
Val Loss: 1.6559, Val Acc: 57.38%

Epoch 132/200
----------------------------------------


Training: 100%|██████████| 44/44 [00:30<00:00,  1.46it/s, loss=0.265, acc=94.1]
Evaluating: 100%|██████████| 11/11 [00:04<00:00,  2.29it/s]


Train Loss: 0.2903, Train Acc: 94.14%
Val Loss: 1.6621, Val Acc: 58.47%
✓ Saved new best model with Val Acc: 58.47%

Epoch 133/200
----------------------------------------


Training: 100%|██████████| 44/44 [00:30<00:00,  1.44it/s, loss=0.195, acc=94.5]
Evaluating: 100%|██████████| 11/11 [00:05<00:00,  2.19it/s]


Train Loss: 0.2458, Train Acc: 94.52%
Val Loss: 1.6695, Val Acc: 54.64%

Epoch 134/200
----------------------------------------


Training: 100%|██████████| 44/44 [00:29<00:00,  1.48it/s, loss=0.0653, acc=94.5]
Evaluating: 100%|██████████| 11/11 [00:04<00:00,  2.36it/s]


Train Loss: 0.2787, Train Acc: 94.52%
Val Loss: 1.6354, Val Acc: 57.38%

Epoch 135/200
----------------------------------------


Training: 100%|██████████| 44/44 [00:30<00:00,  1.43it/s, loss=0.467, acc=94.1]
Evaluating: 100%|██████████| 11/11 [00:04<00:00,  2.25it/s]


Train Loss: 0.2834, Train Acc: 94.14%
Val Loss: 1.6566, Val Acc: 57.38%

Epoch 136/200
----------------------------------------


Training: 100%|██████████| 44/44 [00:30<00:00,  1.43it/s, loss=0.0918, acc=95.5]
Evaluating: 100%|██████████| 11/11 [00:04<00:00,  2.29it/s]


Train Loss: 0.2315, Train Acc: 95.54%
Val Loss: 1.6327, Val Acc: 55.74%

Epoch 137/200
----------------------------------------


Training: 100%|██████████| 44/44 [00:29<00:00,  1.48it/s, loss=0.153, acc=94.6]
Evaluating: 100%|██████████| 11/11 [00:05<00:00,  2.19it/s]


Train Loss: 0.2749, Train Acc: 94.65%
Val Loss: 1.6379, Val Acc: 56.83%

Epoch 138/200
----------------------------------------


Training: 100%|██████████| 44/44 [00:31<00:00,  1.39it/s, loss=0.134, acc=95.8]
Evaluating: 100%|██████████| 11/11 [00:04<00:00,  2.26it/s]


Train Loss: 0.2026, Train Acc: 95.80%
Val Loss: 1.6463, Val Acc: 57.38%

Epoch 139/200
----------------------------------------


Training: 100%|██████████| 44/44 [00:30<00:00,  1.42it/s, loss=0.14, acc=95.3]
Evaluating: 100%|██████████| 11/11 [00:04<00:00,  2.23it/s]


Train Loss: 0.2155, Train Acc: 95.29%
Val Loss: 1.6353, Val Acc: 54.64%

Epoch 140/200
----------------------------------------


Training: 100%|██████████| 44/44 [00:28<00:00,  1.52it/s, loss=0.36, acc=94.5]
Evaluating: 100%|██████████| 11/11 [00:04<00:00,  2.20it/s]


Train Loss: 0.2683, Train Acc: 94.52%
Val Loss: 1.6567, Val Acc: 56.28%

Epoch 141/200
----------------------------------------


Training: 100%|██████████| 44/44 [00:30<00:00,  1.42it/s, loss=0.884, acc=94.4]
Evaluating: 100%|██████████| 11/11 [00:05<00:00,  2.13it/s]


Train Loss: 0.2595, Train Acc: 94.39%
Val Loss: 1.6358, Val Acc: 56.28%

Epoch 142/200
----------------------------------------


Training: 100%|██████████| 44/44 [00:29<00:00,  1.49it/s, loss=0.473, acc=94.8]
Evaluating: 100%|██████████| 11/11 [00:05<00:00,  2.17it/s]


Train Loss: 0.2567, Train Acc: 94.78%
Val Loss: 1.6560, Val Acc: 56.83%

Epoch 143/200
----------------------------------------


Training: 100%|██████████| 44/44 [00:29<00:00,  1.47it/s, loss=0.0554, acc=95.3]
Evaluating: 100%|██████████| 11/11 [00:04<00:00,  2.44it/s]


Train Loss: 0.2376, Train Acc: 95.29%
Val Loss: 1.6515, Val Acc: 57.38%

Epoch 144/200
----------------------------------------


Training: 100%|██████████| 44/44 [00:31<00:00,  1.41it/s, loss=0.426, acc=93.8]
Evaluating: 100%|██████████| 11/11 [00:04<00:00,  2.20it/s]


Train Loss: 0.2848, Train Acc: 93.76%
Val Loss: 1.6648, Val Acc: 56.28%

Epoch 145/200
----------------------------------------


Training: 100%|██████████| 44/44 [00:29<00:00,  1.50it/s, loss=0.147, acc=94.4]
Evaluating: 100%|██████████| 11/11 [00:04<00:00,  2.45it/s]


Train Loss: 0.2762, Train Acc: 94.39%
Val Loss: 1.6509, Val Acc: 57.38%

Epoch 146/200
----------------------------------------


Training: 100%|██████████| 44/44 [00:29<00:00,  1.48it/s, loss=0.476, acc=94.1]
Evaluating: 100%|██████████| 11/11 [00:05<00:00,  2.05it/s]


Train Loss: 0.2670, Train Acc: 94.14%
Val Loss: 1.6548, Val Acc: 57.92%

Epoch 147/200
----------------------------------------


Training: 100%|██████████| 44/44 [00:29<00:00,  1.50it/s, loss=0.631, acc=95.8]
Evaluating: 100%|██████████| 11/11 [00:04<00:00,  2.49it/s]


Train Loss: 0.2089, Train Acc: 95.80%
Val Loss: 1.6590, Val Acc: 57.38%

Epoch 148/200
----------------------------------------


Training: 100%|██████████| 44/44 [00:30<00:00,  1.45it/s, loss=0.868, acc=93.6]
Evaluating: 100%|██████████| 11/11 [00:05<00:00,  2.14it/s]


Train Loss: 0.2904, Train Acc: 93.63%
Val Loss: 1.6593, Val Acc: 56.83%

Epoch 149/200
----------------------------------------


Training: 100%|██████████| 44/44 [00:30<00:00,  1.44it/s, loss=0.0798, acc=95.8]
Evaluating: 100%|██████████| 11/11 [00:04<00:00,  2.21it/s]


Train Loss: 0.2078, Train Acc: 95.80%
Val Loss: 1.6359, Val Acc: 54.10%

Epoch 150/200
----------------------------------------


Training: 100%|██████████| 44/44 [00:29<00:00,  1.50it/s, loss=0.0907, acc=95]
Evaluating: 100%|██████████| 11/11 [00:04<00:00,  2.24it/s]


Train Loss: 0.2468, Train Acc: 95.03%
Val Loss: 1.6560, Val Acc: 56.83%

Epoch 151/200
----------------------------------------


Training: 100%|██████████| 44/44 [00:30<00:00,  1.42it/s, loss=0.1, acc=94.5]
Evaluating: 100%|██████████| 11/11 [00:04<00:00,  2.24it/s]


Train Loss: 0.2659, Train Acc: 94.52%
Val Loss: 1.6476, Val Acc: 56.28%

Epoch 152/200
----------------------------------------


Training: 100%|██████████| 44/44 [00:29<00:00,  1.48it/s, loss=0.239, acc=93.2]
Evaluating: 100%|██████████| 11/11 [00:04<00:00,  2.27it/s]


Train Loss: 0.2967, Train Acc: 93.25%
Val Loss: 1.6473, Val Acc: 55.74%

Epoch 153/200
----------------------------------------


Training: 100%|██████████| 44/44 [00:29<00:00,  1.49it/s, loss=0.665, acc=95.5]
Evaluating: 100%|██████████| 11/11 [00:05<00:00,  2.15it/s]


Train Loss: 0.2110, Train Acc: 95.54%
Val Loss: 1.6519, Val Acc: 56.28%

Epoch 154/200
----------------------------------------


Training: 100%|██████████| 44/44 [00:30<00:00,  1.44it/s, loss=0.164, acc=95.3]
Evaluating: 100%|██████████| 11/11 [00:05<00:00,  2.05it/s]


Train Loss: 0.2351, Train Acc: 95.29%
Val Loss: 1.6607, Val Acc: 56.83%

Epoch 155/200
----------------------------------------


Training: 100%|██████████| 44/44 [00:29<00:00,  1.51it/s, loss=0.145, acc=95]
Evaluating: 100%|██████████| 11/11 [00:04<00:00,  2.35it/s]


Train Loss: 0.2415, Train Acc: 95.03%
Val Loss: 1.6646, Val Acc: 57.38%

Epoch 156/200
----------------------------------------


Training:  66%|██████▌   | 29/44 [00:21<00:08,  1.73it/s, loss=0.17, acc=92.1]

In [16]:
from google.colab import drive
drive.mount('/content/drive')

Mounted at /content/drive


## 9. Evaluation and Visualization


In [None]:
import matplotlib.pyplot as plt

# Plot training history
fig, axes = plt.subplots(1, 2, figsize=(14, 5))

# Loss plot
axes[0].plot(history['train_loss'], label='Train Loss', marker='o')
axes[0].plot(history['val_loss'], label='Val Loss', marker='s')
axes[0].set_xlabel('Epoch')
axes[0].set_ylabel('Loss')
axes[0].set_title('Training and Validation Loss')
axes[0].legend()
axes[0].grid(True)

# Accuracy plot
axes[1].plot(history['train_acc'], label='Train Acc', marker='o')
axes[1].plot(history['val_acc'], label='Val Acc', marker='s')
axes[1].set_xlabel('Epoch')
axes[1].set_ylabel('Accuracy (%)')
axes[1].set_title('Training and Validation Accuracy')
axes[1].legend()
axes[1].grid(True)

plt.tight_layout()
plt.savefig('training_history.png', dpi=150)
plt.show()


In [97]:
# Load best model and evaluate on test set
checkpoint = torch.load('/content/drive/MyDrive/Intro_2_DL/2d_cnn_lstm_wlasl10.pth')
model.load_state_dict(checkpoint['model_state_dict'])

test_loss, test_acc = evaluate(model, test_loader, criterion, device)
print(f"\nTest Results:")
print(f"Test Loss: {test_loss:.4f}")
print(f"Test Accuracy: {test_acc:.2f}%")

print(test_loader.dataset.label_map)


  with autocast():
Evaluating: 100%|██████████| 2/2 [00:01<00:00,  1.06it/s]


Test Results:
Test Loss: 1.1794
Test Accuracy: 77.14%
{'book': 0, 'drink': 1, 'computer': 2, 'before': 3, 'chair': 4, 'go': 5, 'clothes': 6, 'who': 7, 'candy': 8, 'cousin': 9}





In [None]:
from IPython.display import Video

Video("/content/test.mp4", embed=True)

## 10. Attention Visualization

Visualize which frames the model attends to most when making predictions.


In [None]:
import matplotlib.pyplot as plt


def visualize_attention(model, frames, true_label, label_map, device):
    """Visualize attention weights over video frames."""
    model.eval()

    # Get reverse label map
    idx_to_label = {v: k for k, v in label_map.items()}

    with torch.no_grad():
        # Add batch dimension
        frames_batch = frames.unsqueeze(0).to(device)

        # Get predictions and attention weights
        logits, attention_weights = model(frames_batch, return_attention=True)
        pred_label = torch.argmax(logits, dim=1).item()
        attention = attention_weights[0].cpu().numpy()

    # Create visualization
    num_frames = frames.shape[0]
    fig, axes = plt.subplots(2, num_frames, figsize=(2 * num_frames, 6))

    # Denormalize frames for visualization
    mean = torch.tensor([0.485, 0.456, 0.406]).view(3, 1, 1)
    std = torch.tensor([0.229, 0.224, 0.225]).view(3, 1, 1)

    for i in range(num_frames):
        frame = frames[i].cpu()
        frame = frame * std + mean
        frame = frame.clamp(0, 1).permute(1, 2, 0).numpy()

        # Frame image
        axes[0, i].imshow(frame)
        axes[0, i].set_title(f"Frame {i+1}")
        axes[0, i].axis('off')

        # Attention weight bar
        axes[1, i].bar([0], [attention[i]], color='blue', alpha=0.7)
        axes[1, i].set_ylim(0, max(attention) * 1.2)
        axes[1, i].set_title(f"{attention[i]:.3f}")
        axes[1, i].axis('off')

    plt.suptitle(
        f"True: {idx_to_label.get(true_label, true_label)} | "
        f"Predicted: {idx_to_label.get(pred_label, pred_label)}",
        fontsize=14
    )
    plt.tight_layout()
    plt.show()

    return pred_label, attention


In [None]:
# Visualize attention for a sample from the test set
sample_idx = 0
frames, label = test_dataset[sample_idx]
pred, attn = visualize_attention(model, frames, label, train_dataset.label_map, device)


## 11. Inference Function


In [123]:
def predict_video(model, video_path, transform, num_frames, label_map, device):
    """Predict the sign language class for a video file."""
    model.eval()

    # Get reverse label map
    idx_to_label = {v: k for k, v in label_map.items()}

    # Decode video
    decoder = VideoDecoder(video_path)
    frames = transform(decoder[:])

    # Predict
    with torch.no_grad():
        frames_batch = frames.unsqueeze(0).to(device)
        logits, attention = model(frames_batch, return_attention=True)
        probabilities = F.softmax(logits, dim=1)
        pred_idx = torch.argmax(logits, dim=1).item()
        confidence = probabilities[0, pred_idx].item()
        # print(probabilities[0])
        confidence_map = {k:probabilities[0][v] for k, v in label_map.items()}

    predicted_label = idx_to_label.get(pred_idx, f"Unknown ({pred_idx})")

    return {
        'prediction': predicted_label,
        'confidence': confidence,
        'attention_weights': attention[0].cpu().numpy(),
        'all_probabilities': probabilities[0].cpu().numpy(),
        'confidence_map': confidence_map
    }


In [126]:
# Example inference (uncomment and modify path to use)
result = predict_video(
    model=model,
    video_path="/content/test3.mp4",
    transform=val_transform,
    num_frames=NUM_FRAMES,
    label_map=train_dataset.label_map,
    device=device
)
result

{'prediction': 'clothes',
 'confidence': 0.38274869322776794,
 'attention_weights': array([0.0020458 , 0.00242777, 0.00301321, 0.00372175, 0.00546827,
        0.00813784, 0.01599127, 0.03130766, 0.05722787, 0.09245452,
        0.13012624, 0.17093936, 0.16195637, 0.13383292, 0.08268953,
        0.04849194, 0.02445068, 0.01156051, 0.00566186, 0.00293349,
        0.00176526, 0.00131966, 0.00119694, 0.00127926], dtype=float32),
 'all_probabilities': array([0.1414117 , 0.00289593, 0.09375774, 0.09477092, 0.02518048,
        0.04935614, 0.3827487 , 0.05513893, 0.14449586, 0.01024358],
       dtype=float32),
 'confidence_map': {'book': tensor(0.1414, device='cuda:0'),
  'drink': tensor(0.0029, device='cuda:0'),
  'computer': tensor(0.0938, device='cuda:0'),
  'before': tensor(0.0948, device='cuda:0'),
  'chair': tensor(0.0252, device='cuda:0'),
  'go': tensor(0.0494, device='cuda:0'),
  'clothes': tensor(0.3827, device='cuda:0'),
  'who': tensor(0.0551, device='cuda:0'),
  'candy': tensor(0.1

## 12. Save Final Model


In [None]:
# Save complete model for deployment
torch.save({
    'model_state_dict': model.state_dict(),
    'label_map': train_dataset.label_map,
    'config': {
        'num_classes': NUM_CLASSES,
        'feature_dim': FEATURE_DIM,
        'hidden_dim': HIDDEN_DIM,
        'num_lstm_layers': NUM_LSTM_LAYERS,
        'num_frames': NUM_FRAMES,
        'dropout': DROPOUT
    }
}, 'sign2text_model_final.pth')

print("Model saved to sign2text_model_final.pth")


---
**Note:** The cells above contain the complete implementation. Make sure to run them in order from top to bottom.


In [None]:
# PositionalEncoding class is defined below cell 7 - this cell can be ignored
# The model requires running cells in sequential order
