In [1]:
import torch
import torch.nn as nn
import torch.optim as optim
import torch.optim.lr_scheduler as lr_scheduler

from video_datasets import HuggingFaceSSV2Dataset
# from models.r2plus1d import R2Plus1DClassifier
from models.r2plus1d_attn import R2Plus1DClassifier

  from .autonotebook import tqdm as notebook_tqdm


In [2]:
import tqdm
class Trainer:
    """
    Trainer for video classification models with tqdm progress bars.
    """
    def __init__(self, model, train_loader, val_loader, device):
        self.model = model.to(device)
        self.train_loader = train_loader
        self.val_loader = val_loader
        self.device = device
        self.criterion = nn.CrossEntropyLoss()
        self.optimizer = optim.Adam(self.model.parameters(), lr=1e-4)
        self.scheduler = lr_scheduler.ReduceLROnPlateau(self.optimizer, mode='min', patience=3)

    def train_epoch(self, epoch):
        self.model.train()
        total_loss, correct, total = 0.0, 0, 0
        pbar = tqdm.tqdm(self.train_loader, desc=f"Epoch {epoch} [Train]", unit="batch", leave=False)
        for videos, labels in pbar:
            videos = videos.to(self.device)
            labels = labels.to(self.device)
            self.optimizer.zero_grad()
            logits = self.model(videos)
            loss = self.criterion(logits, labels)
            loss.backward()
            self.optimizer.step()

            total_loss += loss.item() * videos.size(0)
            preds = logits.argmax(dim=1)
            correct += (preds == labels).sum().item()
            total += videos.size(0)

            running_loss = total_loss / total
            running_acc = correct / total
            pbar.set_postfix(loss=f"{running_loss:.4f}", acc=f"{running_acc:.4f}")

        avg_loss = total_loss / total
        acc = correct / total
        return avg_loss, acc

    def eval_epoch(self, epoch):
        self.model.eval()
        total_loss, correct, total = 0.0, 0, 0
        pbar = tqdm.tqdm(self.val_loader, desc=f"Epoch {epoch} [Val]  ", unit="batch", leave=False)
        with torch.no_grad():
            for videos, labels in pbar:
                videos = videos.to(self.device)
                labels = labels.to(self.device)
                logits = self.model(videos)
                loss = self.criterion(logits, labels)

                total_loss += loss.item() * videos.size(0)
                preds = logits.argmax(dim=1)
                correct += (preds == labels).sum().item()
                total += videos.size(0)

                running_loss = total_loss / total
                running_acc = correct / total
                pbar.set_postfix(loss=f"{running_loss:.4f}", acc=f"{running_acc:.4f}")

        avg_loss = total_loss / total
        acc = correct / total
        return avg_loss, acc

    def fit(self, epochs):
        for epoch in range(1, epochs + 1):
            train_loss, train_acc = self.train_epoch(epoch)
            val_loss, val_acc = self.eval_epoch(epoch)
            self.scheduler.step(val_loss)
            print(f"Epoch {epoch}:")
            print(f"  Train loss: {train_loss:.4f}, acc: {train_acc:.4f}")
            print(f"  Val   loss: {val_loss:.4f}, acc: {val_acc:.4f}")

In [3]:
device = torch.device("cuda" if torch.cuda.is_available() else "cpu")
model = R2Plus1DClassifier(num_classes=174, pretrained=True)



stem.0.weight is matching!
stem.1.weight is matching!
stem.1.bias is matching!
stem.1.running_mean is matching!
stem.1.running_var is matching!
stem.1.num_batches_tracked is matching!
stem.3.weight is matching!
stem.4.weight is matching!
stem.4.bias is matching!
stem.4.running_mean is matching!
stem.4.running_var is matching!
stem.4.num_batches_tracked is matching!
layer1.0.conv1.0.0.weight is matching!
layer1.0.conv1.0.1.weight is matching!
layer1.0.conv1.0.1.bias is matching!
layer1.0.conv1.0.1.running_mean is matching!
layer1.0.conv1.0.1.running_var is matching!
layer1.0.conv1.0.1.num_batches_tracked is matching!
layer1.0.conv1.0.3.weight is matching!
layer1.0.conv1.1.weight is matching!
layer1.0.conv1.1.bias is matching!
layer1.0.conv1.1.running_mean is matching!
layer1.0.conv1.1.running_var is matching!
layer1.0.conv1.1.num_batches_tracked is matching!
layer1.0.conv2.0.0.weight is not expected
layer1.0.conv2.0.1.weight is not expected
layer1.0.conv2.0.1.bias is not expected
layer1

In [4]:
from torchvision.transforms import Compose

data_root = "./data/something-something-v2"
train_set = HuggingFaceSSV2Dataset(data_root)
val_set = HuggingFaceSSV2Dataset(data_root, data_split='validation')

num_cls = len(train_set.idx2templates)

loading train dataset files, it may take a while...
train dataset loaded
loading validation dataset files, it may take a while...
validation dataset loaded


In [5]:
from torch.utils.data import DataLoader

train_loader = DataLoader(train_set, batch_size=16, shuffle=True, num_workers=4)
val_loader = DataLoader(val_set, batch_size=16, shuffle=False, num_workers=4)

trainer = Trainer(model, train_loader, val_loader, device)
trainer.fit(epochs=10)

                                                                                                  

KeyboardInterrupt: 

In [6]:
trainer.fit(epochs=10)

                                                                                                    

Epoch 1:
  Train loss: 1.3462, acc: 0.6181
  Val   loss: 4.7826, acc: 0.1700


                                                                                                    

Epoch 2:
  Train loss: 0.6608, acc: 0.8137
  Val   loss: 4.9276, acc: 0.1736


                                                                                                    

Epoch 3:
  Train loss: 0.4568, acc: 0.8751
  Val   loss: 5.0893, acc: 0.1728


                                                                                                   

KeyboardInterrupt: 