# Test Model

In [1]:
import os
from pathlib import Path
import sys
sys.path.append(str(Path(os.getcwd()).parent))

from settings.global_settings import GlobalSettings

config = GlobalSettings.get_config(
    config_file = "../config.ini",
    secrets_file = "../secrets.ini"
)
from dataset.video_loader import VideoDataLoader
from dataset.video_dataset import VideoDataset, default_augmentation_pipeline
from model.training_loop import train, EarlyStoppingParams
from model.multimodal_har_model import MultiModalHARModel

Loading config...
Loading secrets...


2025-11-27 12:51:23,874 - INFO - Sentry DSN set to: https://f4f21cc936b3ba9f5dbc1464b7a40ea4@o4504168838070272.ingest.us.sentry.io/4506464560414720
2025-11-27 12:51:23,875 - INFO - Sentry initialized with environment: development


In [2]:
OBSERVATION_RATIO = 20
EAR_RATIO = OBSERVATION_RATIO / 100
WITH_OBJECT_BRANCH = False

## Initializing Training

**Creating Dataloaders**

In [3]:
TRAIN_DIR = os.path.join(
    config.model_settings.video_data_dir,
    "train"
)
TEST_DIR = os.path.join(
    config.model_settings.video_data_dir,
    "validation"
)
VALIDATION_DIR = os.path.join(
    config.model_settings.video_data_dir,
    "test"
)

train_video_data_loader = VideoDataLoader(
    path=TRAIN_DIR,
)
test_video_data_loader = VideoDataLoader(
    path=TEST_DIR,
)
validation_video_data_loader = VideoDataLoader(
    path=VALIDATION_DIR,
)

train_dataset = VideoDataset(
    video_data_loader=train_video_data_loader,
    normalization_type="across_frames",
    EAR_ratio=EAR_RATIO,
    # transform=default_augmentation_pipeline(target_len=16, noise_std=0.02),
)
test_dataset = VideoDataset(
    video_data_loader=test_video_data_loader,
    normalization_type="across_frames",
    EAR_ratio=EAR_RATIO,
)
validation_dataset = VideoDataset(
    video_data_loader=validation_video_data_loader,
    normalization_type="across_frames",
    EAR_ratio=EAR_RATIO,
)

len(train_dataset)
for _ in train_dataset:
    pass
len(train_dataset.labels_map)

len(test_dataset)
for _ in test_dataset:
    pass

len(validation_dataset)
for _ in validation_dataset:
    pass


display(len(test_dataset.labels_map))
display(len(validation_dataset.labels_map))
display(len(train_dataset.labels_map))

2025-11-27 12:51:25,945 - INFO - [VideoDataLoader] Loding action videos for action: a01
2025-11-27 12:51:26,263 - INFO - [VideoDataLoader] Loding action videos for action: a02
2025-11-27 12:51:26,605 - INFO - [VideoDataLoader] Loding action videos for action: a03
2025-11-27 12:51:26,931 - INFO - [VideoDataLoader] Loding action videos for action: a04
2025-11-27 12:51:27,326 - INFO - [VideoDataLoader] Loding action videos for action: a05
2025-11-27 12:51:27,547 - INFO - [VideoDataLoader] Loding action videos for action: a06
2025-11-27 12:51:27,738 - INFO - [VideoDataLoader] Loding action videos for action: a08
2025-11-27 12:51:28,338 - INFO - [VideoDataLoader] Loding action videos for action: a09
2025-11-27 12:51:28,811 - INFO - [VideoDataLoader] Loding action videos for action: a11
2025-11-27 12:51:29,019 - INFO - [VideoDataLoader] Loding action videos for action: a12
2025-11-27 12:51:30,066 - INFO - [VideoDataLoader] Loding action videos for action: a01
2025-11-27 12:51:30,092 - INFO -

10

10

10

**Splitting Train and Test Datasets**

In [4]:
# num_total = len(train_dataset)
# num_train = int(0.8 * num_total)
# num_test = num_total - num_train
# train_dataset, test_dataset = random_split(train_dataset, [num_train, num_test])

**Creating Model**

In [5]:

attn_heads = 4
hidden_size = 192

assert hidden_size % attn_heads == 0, "Hidden size must be divisible by number of attention heads."

har_model = MultiModalHARModel(
    obj_in=train_dataset[0].graphs_objects[0].x.shape[1],
    joint_in=train_dataset[0].graphs_joints[0].x.shape[1],
    gat_hidden=hidden_size,
    gat_out=hidden_size,
    temporal_hidden=hidden_size,
    num_classes=len(train_dataset.labels_map), 
    dropout=0.1,
    temporal_pooling="attn_pool",
    attention_pooling_heads=attn_heads,
    temporal_transformer_heads=attn_heads,
    use_layer_norm=True,
    use_object_branch=WITH_OBJECT_BRANCH, # Testing without object branch
)

2025-11-27 12:51:30,876 - INFO - Model configuration: {'obj_in': 5, 'joint_in': 3, 'gat_hidden': 192, 'gat_out': 192, 'temporal_hidden': 192, 'num_classes': 10, 'dropout': 0.1, 'temporal_pooling': 'attn_pool', 'use_layer_norm': True, 'attention_pooling_heads': 4, 'temporal_transformer_heads': 4, 'use_object_branch': False, 'device': 'cpu'}


**Create Evaluate Function For Early Stopping**

In [6]:
def evaluate(model, dataset):
    import torch
    device = 'cpu'
    model.eval()
    correct = 0
    total = 0

    with torch.no_grad():
        for i in range(len(dataset)):
            sample = dataset[i]
            label = sample.label.to(device)

            # Move all graph tensors to device
            graphs_objects = [g.to(device) for g in sample.graphs_objects]
            graphs_joints = [g.to(device) for g in sample.graphs_joints]

            # Forward pass
            output = model(graphs_objects, graphs_joints)

            # Compute prediction
            if output.dim() == 1:
                predicted = torch.argmax(output).unsqueeze(0)
            else:
                _, predicted = torch.max(output, dim=1)

            correct += (predicted == label).sum().item()
            total += 1

    accuracy = 100 * correct / total
    return accuracy


In [7]:
early_stopping_params = EarlyStoppingParams(
    patience=20,
    min_delta=1e-4,
    mode='max',
    evaluation_function=evaluate,
    evaluation_dataset=validation_dataset,
)

In [8]:
from model.training_loop import WarmupSchedulerParams


train_history = train(
    model=har_model,
    video_dataset=train_dataset,
    device='cpu',
    # epochs=120,
    epochs=90,
    lr=1e-4,
    weight_decay=1e-4,
    # early_stopping=early_stopping_params,
    warmup_scheduler_params=WarmupSchedulerParams(True, 400),
    cross_entropy_label_smoothing=0.1,
)

2025-11-27 12:51:30,968 - INFO - Starting training loop...
2025-11-27 12:51:30,971 - INFO - Using weight decay: 0.0001
2025-11-27 12:51:30,972 - INFO - Using Label Smoothing Cross Entropy with smoothing=0.1
Epoch 1/90: 100%|██████████| 1176/1176 [00:27<00:00, 42.97it/s]
2025-11-27 12:51:58,359 - INFO - Epoch 1/90, Loss: 2.1754
Epoch 2/90: 100%|██████████| 1176/1176 [00:21<00:00, 54.34it/s]
2025-11-27 12:52:20,002 - INFO - Epoch 2/90, Loss: 2.0585
Epoch 3/90: 100%|██████████| 1176/1176 [00:26<00:00, 44.98it/s]
2025-11-27 12:52:46,151 - INFO - Epoch 3/90, Loss: 2.0229
Epoch 4/90: 100%|██████████| 1176/1176 [00:21<00:00, 53.47it/s]
2025-11-27 12:53:08,149 - INFO - Epoch 4/90, Loss: 1.9890
Epoch 5/90: 100%|██████████| 1176/1176 [00:21<00:00, 53.63it/s]
2025-11-27 12:53:30,080 - INFO - Epoch 5/90, Loss: 1.9740
Epoch 6/90: 100%|██████████| 1176/1176 [00:26<00:00, 44.78it/s]
2025-11-27 12:53:56,344 - INFO - Epoch 6/90, Loss: 1.9523
Epoch 7/90: 100%|██████████| 1176/1176 [00:21<00:00, 53.93it/

## Training Model

**Early Stopping**

**Training**

**Saving Model**

In [9]:
har_model.save(
    training_history=train_history,
    EAR_ratio=EAR_RATIO,
    with_object_branch=WITH_OBJECT_BRANCH,
)

2025-11-27 13:28:14,497 - INFO - Saving model to /Volumes/KODAK/masters/model/validation_datasets/NW-UCLA/model/har_model_v1.0.1_nw_ucla_20_no_obj_20251127_132814.pht...
2025-11-27 13:28:14,792 - INFO - Model saved successfully.


## Running tests

**Accuracy**

In [10]:
accuracy_evaluation = evaluate(har_model, test_dataset)
print(f"Test Accuracy: {accuracy_evaluation:.2f}%")

Test Accuracy: 30.26%


## Debugging

In [11]:
import torch, torch.nn.functional as F
device = 'cpu'   # match training device
har_model.to(device)
har_model.eval()

sample = train_dataset[0]
graphs_objects = [g.to(device) for g in sample.graphs_objects]
graphs_joints = [g.to(device) for g in sample.graphs_joints]
label = sample.label.unsqueeze(0).to(device)

with torch.no_grad():
    out = har_model(graphs_objects, graphs_joints)   # expect [1, num_classes]
    probs = F.softmax(out, dim=-1)
    ent = -(probs * probs.log()).sum(dim=-1)     # entropy
    pred = torch.argmax(probs, dim=-1)

print("out.shape", out.shape)
print("out device", out.device, "label device", label.device)
print("logits:", out.cpu().numpy())
print("probs:", probs.cpu().numpy())
print("entropy:", ent.item())
print("pred:", pred.item(), "label:", label.item())


out.shape torch.Size([1, 10])
out device cpu label device cpu
logits: [[ 2.138848   -1.4557503   0.18490607 -3.3747542  -0.63881165 -2.5294802
  -2.743491   -2.8771312  -3.1752052  -4.603438  ]]
probs: [[0.7904503  0.02171502 0.11201812 0.00318675 0.04915326 0.00742071
  0.00599103 0.00524158 0.00389054 0.00093269]]
entropy: 0.8033349514007568
pred: 0 label: 0


**Mapping Consistency - Distribution**

In [12]:
from collections import Counter
def inspect_dataset(dataset, name):
    labels = [int(s.label) for s in dataset]
    print(f"=== {name} ===")
    print("len:", len(labels))
    print("unique labels:", sorted(set(labels)))
    print("counts:", Counter(labels))
    print("labels_map (sample):", getattr(dataset, "labels_map", None))
    print()

inspect_dataset(train_dataset, "TRAIN")
inspect_dataset(validation_dataset, "VAL")
inspect_dataset(test_dataset, "TEST")


=== TRAIN ===
len: 1176
unique labels: [0, 1, 2, 3, 4, 5, 6, 7, 8, 9]
counts: Counter({3: 138, 5: 119, 0: 118, 4: 118, 1: 116, 8: 116, 6: 113, 7: 113, 9: 113, 2: 112})
labels_map (sample): {'a01': 0, 'a02': 1, 'a03': 2, 'a04': 3, 'a05': 4, 'a06': 5, 'a08': 6, 'a09': 7, 'a11': 8, 'a12': 9}

=== VAL ===
len: 147
unique labels: [0, 1, 2, 3, 4, 5, 6, 7, 8, 9]
counts: Counter({3: 17, 0: 15, 1: 15, 4: 15, 5: 15, 2: 14, 6: 14, 7: 14, 8: 14, 9: 14})
labels_map (sample): {'a01': 0, 'a02': 1, 'a03': 2, 'a04': 3, 'a05': 4, 'a06': 5, 'a08': 6, 'a09': 7, 'a11': 8, 'a12': 9}

=== TEST ===
len: 152
unique labels: [0, 1, 2, 3, 4, 5, 6, 7, 8, 9]
counts: Counter({3: 18, 0: 15, 1: 15, 4: 15, 5: 15, 6: 15, 7: 15, 8: 15, 9: 15, 2: 14})
labels_map (sample): {'a01': 0, 'a02': 1, 'a03': 2, 'a04': 3, 'a05': 4, 'a06': 5, 'a08': 6, 'a09': 7, 'a11': 8, 'a12': 9}



**Prediction Distribution - Predicts only few classes**

In [13]:
from collections import Counter
import torch, torch.nn.functional as F

def pred_distribution(model, dataset, device='cpu'):
    model.eval()
    preds = []
    truths = []
    with torch.no_grad():
        for s in dataset:
            graphs_objects = [g.to(device) for g in s.graphs_objects]
            graphs_joints  = [g.to(device) for g in s.graphs_joints]
            out = model(graphs_objects, graphs_joints)  # [1, C]
            preds.append(int(torch.argmax(out, dim=-1)))
            truths.append(int(s.label))
    print("pred distribution:", Counter(preds))
    print("true distribution :", Counter(truths))
    return preds, truths

preds, truths = pred_distribution(har_model, validation_dataset, device='cpu')


pred distribution: Counter({9: 22, 0: 17, 4: 17, 5: 17, 3: 14, 1: 14, 7: 13, 8: 13, 2: 11, 6: 9})
true distribution : Counter({3: 17, 0: 15, 1: 15, 4: 15, 5: 15, 2: 14, 6: 14, 7: 14, 8: 14, 9: 14})


**Confusion Matrix - Per Class Prediction**

In [14]:
from sklearn.metrics import confusion_matrix, classification_report
cm = confusion_matrix(truths, preds)
print("Confusion matrix:\n", cm)
print(classification_report(truths, preds, digits=4))


Confusion matrix:
 [[4 4 2 2 2 0 0 1 0 0]
 [5 4 0 0 2 1 0 1 1 1]
 [1 0 0 3 1 3 0 0 3 3]
 [1 0 0 3 0 6 0 0 1 6]
 [1 3 4 1 2 0 0 0 0 4]
 [1 0 1 1 5 5 0 0 2 0]
 [1 1 0 0 0 0 7 4 0 1]
 [0 0 0 3 0 0 2 7 2 0]
 [2 1 2 0 5 0 0 0 0 4]
 [1 1 2 1 0 2 0 0 4 3]]
              precision    recall  f1-score   support

           0     0.2353    0.2667    0.2500        15
           1     0.2857    0.2667    0.2759        15
           2     0.0000    0.0000    0.0000        14
           3     0.2143    0.1765    0.1935        17
           4     0.1176    0.1333    0.1250        15
           5     0.2941    0.3333    0.3125        15
           6     0.7778    0.5000    0.6087        14
           7     0.5385    0.5000    0.5185        14
           8     0.0000    0.0000    0.0000        14
           9     0.1364    0.2143    0.1667        14

    accuracy                         0.2381       147
   macro avg     0.2600    0.2391    0.2451       147
weighted avg     0.2583    0.2381    0.2439   