# Training a new model pipeline example

This notebook shows how to train new models based on a h5py file with all preprocessed tensors and a labels folder with generated labels.

## Defining loading and training functions

In [1]:
import torch
import torch.nn as nn
import torch.optim as optim
from torch.utils.data import DataLoader,Dataset
import numpy as np
from tqdm import tqdm
import json
import os
import h5py


class H5VideoDataset(Dataset): 
    def __init__(self, h5_path, labels_dict, transform=None, num_frames=13): 
        self.h5_path = h5_path 
        self.labels_dict = {} 
        self.keys = [] 
        self.transform = transform 
        self.num_frames = num_frames 
        # Open just valid processed video tensors from HDF5
        with h5py.File(self.h5_path, "r") as f: 
            for key, label in labels_dict.items(): 
                if key in f["data"] and f["data"][key].shape == (num_frames, 252, 252, 3): 
                    self.keys.append(key) 
                    self.labels_dict[key] = label 
                    
        
        print(f"{len(self.keys)} v√≠deos v√°lidos encontrados de {len(labels_dict)}") 
    
    def __len__(self): return len(self.keys) 
        
    def __getitem__(self, idx): 
        key = self.keys[idx] 
        with h5py.File(self.h5_path, "r") as f: 
            data = np.array(f["data"][key], copy=True, dtype=np.float32) 

        # Data transformations as pre-trained models expected
        video = np.transpose(data, (3,0,1,2)) / 255.0 
        mean = np.array([0.45,0.45,0.45], dtype=np.float32) 
        std = np.array([0.225,0.225,0.225], dtype=np.float32) 
        video = (video - mean.reshape(3,1,1,1)) / std.reshape(3,1,1,1)
        
        if self.transform: 
            video = self.transform(video) 
            
        label = self.labels_dict[key] 
        
        return torch.from_numpy(video).float(), torch.tensor(label, dtype=torch.long)
def train_one_epoch(model, loader, criterion, optimizer, device):
    model.train()
    running_loss = 0.0
    for videos, labels in tqdm(loader, desc="Treinando", leave=False):
        videos, labels = videos.to(device), labels.to(device)
        optimizer.zero_grad()
        outputs = model(videos)
        loss = criterion(outputs, labels)
        loss.backward()
        optimizer.step()
        running_loss += loss.item() * videos.size(0)
    return running_loss / len(loader.dataset)

def eval(model, loader, device, num_classes):
    model.eval()
    correct_per_class = np.zeros(num_classes)
    total_per_class = np.zeros(num_classes)

    with torch.no_grad():
        for videos, labels in tqdm(loader, desc="Validando", leave=False):
            videos, labels = videos.to(device), labels.to(device)
            outputs = model(videos)
            _, predicted = torch.max(outputs, 1)
            for i in range(num_classes):
                correct_per_class[i] += ((predicted == i) & (labels == i)).sum().item()
                total_per_class[i] += (labels == i).sum().item()

    acc_per_class = np.divide(correct_per_class, total_per_class, out=np.zeros_like(correct_per_class), where=total_per_class != 0)
    precision = np.divide(np.sum(correct_per_class),np.sum(total_per_class))
    mean_acc = np.mean(acc_per_class)
    return mean_acc, acc_per_class,precision

## Loading Dataset and pre-trained model

In [None]:
# Define labels folder path
labels_path = "../Common/Labels/Exp_1"

# Define HDF5 dataset path
h5_path = "../Common/Tensors/tensors_strategy_1.h5"

# Define output to save training checkpoints
output_path = "Checkpoints/exp1/"

os.makedirs(output_path, exist_ok=True)

device = torch.device("cuda" if torch.cuda.is_available() else "cpu")
print("Using device:", device)

# === Hyperparams ===
num_epochs = 20
lr = 1e-4
patience = 2
batch_size = 8 # Adjust based on your system capabilities
num_workers = 4 # Adjust based on your system capabilities

# Load labels
with open(os.path.join(labels_path, "train_labels.json"), "r") as f:
    train_labels = json.load(f)

with open(os.path.join(labels_path, "val_labels.json"), "r") as f:
    val_labels = json.load(f)

with open(os.path.join(labels_path, "event_to_idx.json"), "r") as f:
    event_to_idx = json.load(f)

with open(os.path.join(labels_path, "idx_to_event.json"), "r") as f:    
    idx_to_event = json.load(f)

num_classes = len(set(train_labels.values()))

# === Datasets e Loaders ===
train_dataset = H5VideoDataset(h5_path, train_labels)
val_dataset = H5VideoDataset(h5_path, val_labels)


train_loader = DataLoader(train_dataset, batch_size=batch_size, shuffle=True, num_workers=num_workers)
val_loader = DataLoader(val_dataset, batch_size=batch_size, shuffle=False, num_workers=num_workers)

# === Loading pre-treined X3D-S and reseting last 5 layers ===
model = torch.hub.load('facebookresearch/pytorchvideo', 'x3d_s', pretrained=True)
in_features = model.blocks[5].proj.in_features
model.blocks[5].proj = nn.Linear(in_features, num_classes)
model = model.to(device)

# === Loss e Optimizer ===
criterion = nn.CrossEntropyLoss()
optimizer = optim.Adam(model.parameters(), lr=lr)

Using device: cuda
55140 v√≠deos v√°lidos encontrados de 55141
18961 v√≠deos v√°lidos encontrados de 19053


Using cache found in /home/gabriel/.cache/torch/hub/facebookresearch_pytorchvideo_main


## Training Model

In [None]:
best_acc = 0.0
best_epoch = 0
epochs_no_improve = 0

for epoch in range(1, num_epochs + 1):
        
        print(f"\n===== Epoch {epoch}/{num_epochs} =====")

        train_loss = train_one_epoch(model, train_loader, criterion, optimizer, device)
        mean_acc, acc_per_class,precision = eval(model, val_loader, device, num_classes)

        print(f"üìâ train loss: {train_loss:.4f}")
        print(f"üéØ Validation mean accuracy: {mean_acc:.4f}")
        print(f"General Precision: {precision:.4f}")
        for i, acc in enumerate(acc_per_class):
            print(f"  Class {idx_to_event[str(i)]}: {acc:.4f}")

        # === Salvar checkpoint ===
        checkpoint_path = os.path.join(output_path, f"checkpoint_epoch{epoch+1}.pth")
        torch.save({
            'epoch': epoch,
            'model_state_dict': model.state_dict(),
            'optimizer_state_dict': optimizer.state_dict(),
            'mean_acc': mean_acc,
            'acc_per_class': acc_per_class.tolist(),
            'train_loss': train_loss,
            'precision': precision,
        }, checkpoint_path)
        print(f"üíæ Checkpoint saved on: {checkpoint_path}")

        # === Early Stopping ===
        if mean_acc > best_acc:
            best_acc = mean_acc
            epochs_no_improve = 0
            best_epoch = epoch + 1
        else:
            epochs_no_improve += 1
            if epochs_no_improve >= patience:
                print(f"‚èπÔ∏è Early stopping: accuracy did not improve in {patience + 1} epochs.")
                break

print(f"\nüèÅ Training step ended. Best mean accuracy: {best_acc:.4f}. Beast epoch: {best_epoch}")
