# Test Model

In [1]:
import os
from pathlib import Path
import sys
sys.path.append(str(Path(os.getcwd()).parent))

from settings.global_settings import GlobalSettings

config = GlobalSettings.get_config(
    config_file = "../config.ini",
    secrets_file = "../secrets.ini"
)
from dataset.video_loader import VideoDataLoader
from dataset.video_dataset import VideoDataset, default_augmentation_pipeline
from model.training_loop import train, EarlyStoppingParams
from model.multimodal_har_model import MultiModalHARModel

Loading config...
Loading secrets...


2025-11-24 13:40:37,452 - INFO - Sentry DSN set to: https://f4f21cc936b3ba9f5dbc1464b7a40ea4@o4504168838070272.ingest.us.sentry.io/4506464560414720
2025-11-24 13:40:37,453 - INFO - Sentry initialized with environment: development


## Initializing Training

**Creating Dataloaders**

In [2]:
TRAIN_DIR = os.path.join(
    config.model_settings.video_data_dir,
    "train"
)
TEST_DIR = os.path.join(
    config.model_settings.video_data_dir,
    "validation"
)
VALIDATION_DIR = os.path.join(
    config.model_settings.video_data_dir,
    "test"
)

train_video_data_loader = VideoDataLoader(
    path=TRAIN_DIR,
)
test_video_data_loader = VideoDataLoader(
    path=TEST_DIR,
)
validation_video_data_loader = VideoDataLoader(
    path=VALIDATION_DIR,
)

train_dataset = VideoDataset(
    video_data_loader=train_video_data_loader,
    normalization_type="across_frames",
    # transform=default_augmentation_pipeline(target_len=16, noise_std=0.02),
)
test_dataset = VideoDataset(
    video_data_loader=test_video_data_loader,
    normalization_type="across_frames",
)
validation_dataset = VideoDataset(
    video_data_loader=validation_video_data_loader,
    normalization_type="across_frames",
)

len(train_dataset)
for _ in train_dataset:
    pass
len(train_dataset.labels_map)

len(test_dataset)
for _ in test_dataset:
    pass

len(validation_dataset)
for _ in validation_dataset:
    pass


display(len(test_dataset.labels_map))
display(len(validation_dataset.labels_map))
display(len(train_dataset.labels_map))

2025-11-24 13:40:39,409 - INFO - [VideoDataLoader] Loding action videos for action: a01
2025-11-24 13:40:39,613 - INFO - [VideoDataLoader] Loding action videos for action: a02
2025-11-24 13:40:39,914 - INFO - [VideoDataLoader] Loding action videos for action: a03
2025-11-24 13:40:40,217 - INFO - [VideoDataLoader] Loding action videos for action: a04
2025-11-24 13:40:40,575 - INFO - [VideoDataLoader] Loding action videos for action: a05
2025-11-24 13:40:40,765 - INFO - [VideoDataLoader] Loding action videos for action: a06
2025-11-24 13:40:40,937 - INFO - [VideoDataLoader] Loding action videos for action: a08
2025-11-24 13:40:41,485 - INFO - [VideoDataLoader] Loding action videos for action: a09
2025-11-24 13:40:41,972 - INFO - [VideoDataLoader] Loding action videos for action: a11
2025-11-24 13:40:42,163 - INFO - [VideoDataLoader] Loding action videos for action: a12
2025-11-24 13:40:46,169 - INFO - [VideoDataLoader] Loding action videos for action: a01
2025-11-24 13:40:46,201 - INFO -

10

10

10

**Splitting Train and Test Datasets**

In [3]:
# num_total = len(train_dataset)
# num_train = int(0.8 * num_total)
# num_test = num_total - num_train
# train_dataset, test_dataset = random_split(train_dataset, [num_train, num_test])

**Creating Model**

In [4]:

attn_heads = 4
hidden_size = 192

assert hidden_size % attn_heads == 0, "Hidden size must be divisible by number of attention heads."

har_model = MultiModalHARModel(
    obj_in=train_dataset[0].graphs_objects[0].x.shape[1],
    joint_in=train_dataset[0].graphs_joints[0].x.shape[1],
    gat_hidden=hidden_size,
    gat_out=hidden_size,
    temporal_hidden=hidden_size,
    num_classes=len(train_dataset.labels_map), 
    dropout=0.1,
    temporal_pooling="attn_pool",
    attention_pooling_heads=attn_heads,
    temporal_transformer_heads=attn_heads,
    use_layer_norm=True,
    use_object_branch=True,
)

2025-11-24 13:40:47,495 - INFO - Model configuration: {'obj_in': 5, 'joint_in': 3, 'gat_hidden': 192, 'gat_out': 192, 'temporal_hidden': 192, 'num_classes': 10, 'dropout': 0.1, 'temporal_pooling': 'attn_pool', 'use_layer_norm': True, 'attention_pooling_heads': 4, 'temporal_transformer_heads': 4, 'use_object_branch': True, 'device': 'cpu'}


**Create Evaluate Function For Early Stopping**

In [5]:
def evaluate(model, dataset):
    import torch
    device = 'cpu'
    model.eval()
    correct = 0
    total = 0

    with torch.no_grad():
        for i in range(len(dataset)):
            sample = dataset[i]
            label = sample.label.to(device)

            # Move all graph tensors to device
            graphs_objects = [g.to(device) for g in sample.graphs_objects]
            graphs_joints = [g.to(device) for g in sample.graphs_joints]

            # Forward pass
            output = model(graphs_objects, graphs_joints)

            # Compute prediction
            if output.dim() == 1:
                predicted = torch.argmax(output).unsqueeze(0)
            else:
                _, predicted = torch.max(output, dim=1)

            correct += (predicted == label).sum().item()
            total += 1

    accuracy = 100 * correct / total
    return accuracy


In [6]:
early_stopping_params = EarlyStoppingParams(
    patience=20,
    min_delta=1e-4,
    mode='max',
    evaluation_function=evaluate,
    evaluation_dataset=validation_dataset,
)

In [7]:
from model.training_loop import WarmupSchedulerParams


train_history = train(
    model=har_model,
    video_dataset=train_dataset,
    device='cpu',
    # epochs=120,
    epochs=90,
    lr=1e-4,
    weight_decay=1e-4,
    # early_stopping=early_stopping_params,
    warmup_scheduler_params=WarmupSchedulerParams(True, 400),
    cross_entropy_label_smoothing=0.1,
)

2025-11-24 13:40:47,564 - INFO - Starting training loop...
2025-11-24 13:40:47,566 - INFO - Using weight decay: 0.0001
2025-11-24 13:40:47,567 - INFO - Using Label Smoothing Cross Entropy with smoothing=0.1
Epoch 1/90: 100%|██████████| 1176/1176 [01:12<00:00, 16.19it/s]
2025-11-24 13:42:00,237 - INFO - Epoch 1/90, Loss: 2.1602
Epoch 2/90: 100%|██████████| 1176/1176 [01:11<00:00, 16.35it/s]
2025-11-24 13:43:12,174 - INFO - Epoch 2/90, Loss: 1.6646
Epoch 3/90: 100%|██████████| 1176/1176 [01:12<00:00, 16.12it/s]
2025-11-24 13:44:25,125 - INFO - Epoch 3/90, Loss: 1.4250
Epoch 4/90: 100%|██████████| 1176/1176 [01:12<00:00, 16.24it/s]
2025-11-24 13:45:37,523 - INFO - Epoch 4/90, Loss: 1.3638
Epoch 5/90: 100%|██████████| 1176/1176 [01:12<00:00, 16.16it/s]
2025-11-24 13:46:50,291 - INFO - Epoch 5/90, Loss: 1.2842
Epoch 6/90: 100%|██████████| 1176/1176 [01:14<00:00, 15.78it/s]
2025-11-24 13:48:04,814 - INFO - Epoch 6/90, Loss: 1.2160
Epoch 7/90: 100%|██████████| 1176/1176 [01:12<00:00, 16.25it/

## Training Model

**Early Stopping**

**Training**

**Saving Model**

In [8]:
har_model.save(
    training_history=train_history
)

2025-11-24 15:32:24,910 - INFO - Saving model to /Volumes/KODAK/masters/model/validation_datasets/NW-UCLA/model/har_model_v1.0.0_nw_ucla_20251124_153224.pht...
2025-11-24 15:32:25,302 - INFO - Model saved successfully.


## Running tests

**Accuracy**

In [9]:
accuracy_evaluation = evaluate(har_model, test_dataset)
print(f"Test Accuracy: {accuracy_evaluation:.2f}%")

Test Accuracy: 80.26%


## Debugging

In [10]:
import torch, torch.nn.functional as F
device = 'cpu'   # match training device
har_model.to(device)
har_model.eval()

sample = train_dataset[0]
graphs_objects = [g.to(device) for g in sample.graphs_objects]
graphs_joints = [g.to(device) for g in sample.graphs_joints]
label = sample.label.unsqueeze(0).to(device)

with torch.no_grad():
    out = har_model(graphs_objects, graphs_joints)   # expect [1, num_classes]
    probs = F.softmax(out, dim=-1)
    ent = -(probs * probs.log()).sum(dim=-1)     # entropy
    pred = torch.argmax(probs, dim=-1)

print("out.shape", out.shape)
print("out device", out.device, "label device", label.device)
print("logits:", out.cpu().numpy())
print("probs:", probs.cpu().numpy())
print("entropy:", ent.item())
print("pred:", pred.item(), "label:", label.item())


out.shape torch.Size([1, 10])
out device cpu label device cpu
logits: [[ 2.0842776 -2.352966  -2.471104  -2.5631952 -2.5137284 -2.408984
  -2.4595914 -2.5207999 -2.384496  -2.4309044]]
probs: [[0.9122282  0.01079029 0.00958796 0.00874443 0.00918787 0.01020246
  0.00969898 0.00912313 0.01045538 0.00998125]]
entropy: 0.4900222718715668
pred: 0 label: 0


**Mapping Consistency - Distribution**

In [11]:
from collections import Counter
def inspect_dataset(dataset, name):
    labels = [int(s.label) for s in dataset]
    print(f"=== {name} ===")
    print("len:", len(labels))
    print("unique labels:", sorted(set(labels)))
    print("counts:", Counter(labels))
    print("labels_map (sample):", getattr(dataset, "labels_map", None))
    print()

inspect_dataset(train_dataset, "TRAIN")
inspect_dataset(validation_dataset, "VAL")
inspect_dataset(test_dataset, "TEST")


=== TRAIN ===
len: 1176
unique labels: [0, 1, 2, 3, 4, 5, 6, 7, 8, 9]
counts: Counter({3: 138, 5: 119, 0: 118, 4: 118, 1: 116, 8: 116, 6: 113, 7: 113, 9: 113, 2: 112})
labels_map (sample): {'a01': 0, 'a02': 1, 'a03': 2, 'a04': 3, 'a05': 4, 'a06': 5, 'a08': 6, 'a09': 7, 'a11': 8, 'a12': 9}

=== VAL ===
len: 147
unique labels: [0, 1, 2, 3, 4, 5, 6, 7, 8, 9]
counts: Counter({3: 17, 0: 15, 1: 15, 4: 15, 5: 15, 2: 14, 6: 14, 7: 14, 8: 14, 9: 14})
labels_map (sample): {'a01': 0, 'a02': 1, 'a03': 2, 'a04': 3, 'a05': 4, 'a06': 5, 'a08': 6, 'a09': 7, 'a11': 8, 'a12': 9}

=== TEST ===
len: 152
unique labels: [0, 1, 2, 3, 4, 5, 6, 7, 8, 9]
counts: Counter({3: 18, 0: 15, 1: 15, 4: 15, 5: 15, 6: 15, 7: 15, 8: 15, 9: 15, 2: 14})
labels_map (sample): {'a01': 0, 'a02': 1, 'a03': 2, 'a04': 3, 'a05': 4, 'a06': 5, 'a08': 6, 'a09': 7, 'a11': 8, 'a12': 9}



**Prediction Distribution - Predicts only few classes**

In [12]:
from collections import Counter
import torch, torch.nn.functional as F

def pred_distribution(model, dataset, device='cpu'):
    model.eval()
    preds = []
    truths = []
    with torch.no_grad():
        for s in dataset:
            graphs_objects = [g.to(device) for g in s.graphs_objects]
            graphs_joints  = [g.to(device) for g in s.graphs_joints]
            out = model(graphs_objects, graphs_joints)  # [1, C]
            preds.append(int(torch.argmax(out, dim=-1)))
            truths.append(int(s.label))
    print("pred distribution:", Counter(preds))
    print("true distribution :", Counter(truths))
    return preds, truths

preds, truths = pred_distribution(har_model, validation_dataset, device='cpu')


pred distribution: Counter({9: 23, 1: 17, 5: 17, 0: 15, 2: 15, 6: 15, 8: 14, 3: 12, 4: 12, 7: 7})
true distribution : Counter({3: 17, 0: 15, 1: 15, 4: 15, 5: 15, 2: 14, 6: 14, 7: 14, 8: 14, 9: 14})


**Confusion Matrix - Per Class Prediction**

In [13]:
from sklearn.metrics import confusion_matrix, classification_report
cm = confusion_matrix(truths, preds)
print("Confusion matrix:\n", cm)
print(classification_report(truths, preds, digits=4))


Confusion matrix:
 [[13  1  1  0  0  0  0  0  0  0]
 [ 1 14  0  0  0  0  0  0  0  0]
 [ 1  0  8  1  0  0  0  1  1  2]
 [ 0  0  5  9  0  0  0  0  1  2]
 [ 0  1  0  0 12  2  0  0  0  0]
 [ 0  0  0  0  0 15  0  0  0  0]
 [ 0  1  0  0  0  0 12  0  1  0]
 [ 0  0  0  2  0  0  3  6  1  2]
 [ 0  0  1  0  0  0  0  0 10  3]
 [ 0  0  0  0  0  0  0  0  0 14]]
              precision    recall  f1-score   support

           0     0.8667    0.8667    0.8667        15
           1     0.8235    0.9333    0.8750        15
           2     0.5333    0.5714    0.5517        14
           3     0.7500    0.5294    0.6207        17
           4     1.0000    0.8000    0.8889        15
           5     0.8824    1.0000    0.9375        15
           6     0.8000    0.8571    0.8276        14
           7     0.8571    0.4286    0.5714        14
           8     0.7143    0.7143    0.7143        14
           9     0.6087    1.0000    0.7568        14

    accuracy                         0.7687       147
