# Test Model

In [1]:
import os
from pathlib import Path
import sys
sys.path.append(str(Path(os.getcwd()).parent))

from settings.global_settings import GlobalSettings

config = GlobalSettings.get_config(
    config_file = "../config.ini",
    secrets_file = "../secrets.ini"
)
from dataset.video_loader import VideoDataLoader
from dataset.video_dataset import VideoDataset, default_augmentation_pipeline
from model.training_loop import train, EarlyStoppingParams
from model.multimodal_har_model import MultiModalHARModel

Loading config...
Loading secrets...


2025-11-16 12:40:49,518 - INFO - Sentry DSN set to: https://f4f21cc936b3ba9f5dbc1464b7a40ea4@o4504168838070272.ingest.us.sentry.io/4506464560414720
2025-11-16 12:40:49,519 - INFO - Sentry initialized with environment: development


## Initializing Training

**Creating Dataloaders**

In [2]:
TRAIN_DIR = os.path.join(
    config.model_settings.video_data_dir,
    "train"
)
TEST_DIR = os.path.join(
    config.model_settings.video_data_dir,
    "validation"
)
VALIDATION_DIR = os.path.join(
    config.model_settings.video_data_dir,
    "test"
)

train_video_data_loader = VideoDataLoader(
    path=TRAIN_DIR,
)
test_video_data_loader = VideoDataLoader(
    path=TEST_DIR,
)
validation_video_data_loader = VideoDataLoader(
    path=VALIDATION_DIR,
)

train_dataset = VideoDataset(
    video_data_loader=train_video_data_loader,
    normalization_type="across_frames",
    # transform=default_augmentation_pipeline(target_len=16, noise_std=0.02),
)
test_dataset = VideoDataset(
    video_data_loader=test_video_data_loader,
    normalization_type="across_frames",
)
validation_dataset = VideoDataset(
    video_data_loader=validation_video_data_loader,
    normalization_type="across_frames",
)

len(train_dataset)
for _ in train_dataset:
    pass
len(train_dataset.labels_map)

len(test_dataset)
for _ in test_dataset:
    pass

len(validation_dataset)
for _ in validation_dataset:
    pass


display(len(test_dataset.labels_map))
display(len(validation_dataset.labels_map))
display(len(train_dataset.labels_map))

2025-11-16 12:40:51,690 - INFO - [VideoDataLoader] Loding action videos for action: a01
2025-11-16 12:40:51,960 - INFO - [VideoDataLoader] Loding action videos for action: a02
2025-11-16 12:40:52,327 - INFO - [VideoDataLoader] Loding action videos for action: a03
2025-11-16 12:40:52,655 - INFO - [VideoDataLoader] Loding action videos for action: a04
2025-11-16 12:40:53,099 - INFO - [VideoDataLoader] Loding action videos for action: a05
2025-11-16 12:40:53,360 - INFO - [VideoDataLoader] Loding action videos for action: a06
2025-11-16 12:40:53,598 - INFO - [VideoDataLoader] Loding action videos for action: a08
2025-11-16 12:40:54,204 - INFO - [VideoDataLoader] Loding action videos for action: a09
2025-11-16 12:40:54,740 - INFO - [VideoDataLoader] Loding action videos for action: a11
2025-11-16 12:40:54,988 - INFO - [VideoDataLoader] Loding action videos for action: a12
2025-11-16 12:40:59,650 - INFO - [VideoDataLoader] Loding action videos for action: a01
2025-11-16 12:40:59,690 - INFO -

10

10

10

**Splitting Train and Test Datasets**

In [3]:
# num_total = len(train_dataset)
# num_train = int(0.8 * num_total)
# num_test = num_total - num_train
# train_dataset, test_dataset = random_split(train_dataset, [num_train, num_test])

**Creating Model**

In [4]:

har_model = MultiModalHARModel(
    obj_in=train_dataset[0].graphs_objects[0].x.shape[1],
    joint_in=train_dataset[0].graphs_joints[0].x.shape[1],
    gat_hidden=128,
    gat_out=128,
    temporal_hidden=128,
    num_classes=len(train_dataset.labels_map), 
    dropout=0.1,
    temporal_pooling="attn_pool"
)

2025-11-16 12:41:01,344 - INFO - Model configuration: {'obj_in': 5, 'joint_in': 3, 'gat_hidden': 128, 'gat_out': 128, 'temporal_hidden': 128, 'num_classes': 10, 'dropout': 0.1, 'temporal_pooling': 'attn_pool'}


**Create Evaluate Function For Early Stopping**

In [5]:
def evaluate(model, dataset):
    import torch
    device = 'cpu'
    model.eval()
    correct = 0
    total = 0

    with torch.no_grad():
        for i in range(len(dataset)):
            sample = dataset[i]
            label = sample.label.to(device)

            # Move all graph tensors to device
            graphs_objects = [g.to(device) for g in sample.graphs_objects]
            graphs_joints = [g.to(device) for g in sample.graphs_joints]

            # Forward pass
            output = model(graphs_objects, graphs_joints)

            # Compute prediction
            if output.dim() == 1:
                predicted = torch.argmax(output).unsqueeze(0)
            else:
                _, predicted = torch.max(output, dim=1)

            correct += (predicted == label).sum().item()
            total += 1

    accuracy = 100 * correct / total
    return accuracy


In [6]:
early_stopping_params = EarlyStoppingParams(
    patience=20,
    min_delta=1e-3,
    mode='max',
    evaluation_function=evaluate,
    evaluation_dataset=validation_dataset,
)

In [7]:
from model.training_loop import WarmupSchedulerParams


train_history = train(
    model=har_model,
    video_dataset=train_dataset,
    device='cpu',
    epochs=70,
    lr=1e-4,
    early_stopping=early_stopping_params,
    warmup_scheduler_params=WarmupSchedulerParams(True, 800)
)

2025-11-16 12:41:01,427 - INFO - Starting training loop...
2025-11-16 12:41:01,430 - INFO - Using early stopping
Epoch 1/70: 100%|██████████| 1176/1176 [01:09<00:00, 16.99it/s]
2025-11-16 12:42:10,682 - INFO - Evaluating for early stopping...
2025-11-16 12:42:13,268 - INFO - Epoch 1/70, Loss: 2.0259
Epoch 2/70: 100%|██████████| 1176/1176 [01:08<00:00, 17.10it/s]
2025-11-16 12:43:22,055 - INFO - Evaluating for early stopping...
2025-11-16 12:43:24,553 - INFO - Epoch 2/70, Loss: 1.2793
Epoch 3/70: 100%|██████████| 1176/1176 [01:08<00:00, 17.21it/s]
2025-11-16 12:44:32,883 - INFO - Evaluating for early stopping...
2025-11-16 12:44:35,368 - INFO - Epoch 3/70, Loss: 1.0712
Epoch 4/70: 100%|██████████| 1176/1176 [01:09<00:00, 16.81it/s]
2025-11-16 12:45:45,321 - INFO - Evaluating for early stopping...
2025-11-16 12:45:47,807 - INFO - Epoch 4/70, Loss: 0.8856
Epoch 5/70: 100%|██████████| 1176/1176 [01:08<00:00, 17.24it/s]
2025-11-16 12:46:56,039 - INFO - Evaluating for early stopping...
2025-

## Training Model

**Early Stopping**

**Training**

**Saving Model**

In [8]:
har_model.save(
    training_history=train_history
)

2025-11-16 14:55:04,723 - INFO - Saving model to /Volumes/KODAK/masters/model/validation_datasets/NW-UCLA/model/har_model_v1.0.0_nw_ucla_20251116_145504.pht...
2025-11-16 14:55:05,644 - INFO - Model saved successfully.


## Running tests

**Accuracy**

In [9]:
accuracy_evaluation = evaluate(har_model, test_dataset)
print(f"Test Accuracy: {accuracy_evaluation:.2f}%")

Test Accuracy: 80.26%


## Debugging

In [10]:
import torch, torch.nn.functional as F
device = 'cpu'   # match training device
har_model.to(device)
har_model.eval()

sample = train_dataset[0]
graphs_objects = [g.to(device) for g in sample.graphs_objects]
graphs_joints = [g.to(device) for g in sample.graphs_joints]
label = sample.label.unsqueeze(0).to(device)

with torch.no_grad():
    out = har_model(graphs_objects, graphs_joints)   # expect [1, num_classes]
    probs = F.softmax(out, dim=-1)
    ent = -(probs * probs.log()).sum(dim=-1)     # entropy
    pred = torch.argmax(probs, dim=-1)

print("out.shape", out.shape)
print("out device", out.device, "label device", label.device)
print("logits:", out.cpu().numpy())
print("probs:", probs.cpu().numpy())
print("entropy:", ent.item())
print("pred:", pred.item(), "label:", label.item())


out.shape torch.Size([1, 10])
out device cpu label device cpu
logits: [[  9.104175    2.1412585  -0.6019546  -7.237774   -4.6213875  -0.2401329
  -17.461979  -14.522748   -9.532532  -13.011941 ]]
probs: [[9.9890530e-01 9.4529678e-04 6.0842354e-05 7.9855688e-08 1.0929202e-06
  8.7366097e-05 2.8972768e-12 5.4762365e-11 8.0483185e-09 2.4809463e-10]]
entropy: 0.00910070352256298
pred: 0 label: 0


**Mapping Consistency - Distribution**

In [11]:
from collections import Counter
def inspect_dataset(dataset, name):
    labels = [int(s.label) for s in dataset]
    print(f"=== {name} ===")
    print("len:", len(labels))
    print("unique labels:", sorted(set(labels)))
    print("counts:", Counter(labels))
    print("labels_map (sample):", getattr(dataset, "labels_map", None))
    print()

inspect_dataset(train_dataset, "TRAIN")
inspect_dataset(validation_dataset, "VAL")
inspect_dataset(test_dataset, "TEST")


=== TRAIN ===
len: 1176
unique labels: [0, 1, 2, 3, 4, 5, 6, 7, 8, 9]
counts: Counter({3: 138, 5: 119, 0: 118, 4: 118, 1: 116, 8: 116, 6: 113, 7: 113, 9: 113, 2: 112})
labels_map (sample): {'a01': 0, 'a02': 1, 'a03': 2, 'a04': 3, 'a05': 4, 'a06': 5, 'a08': 6, 'a09': 7, 'a11': 8, 'a12': 9}

=== VAL ===
len: 147
unique labels: [0, 1, 2, 3, 4, 5, 6, 7, 8, 9]
counts: Counter({3: 17, 0: 15, 1: 15, 4: 15, 5: 15, 2: 14, 6: 14, 7: 14, 8: 14, 9: 14})
labels_map (sample): {'a01': 0, 'a02': 1, 'a03': 2, 'a04': 3, 'a05': 4, 'a06': 5, 'a08': 6, 'a09': 7, 'a11': 8, 'a12': 9}

=== TEST ===
len: 152
unique labels: [0, 1, 2, 3, 4, 5, 6, 7, 8, 9]
counts: Counter({3: 18, 0: 15, 1: 15, 4: 15, 5: 15, 6: 15, 7: 15, 8: 15, 9: 15, 2: 14})
labels_map (sample): {'a01': 0, 'a02': 1, 'a03': 2, 'a04': 3, 'a05': 4, 'a06': 5, 'a08': 6, 'a09': 7, 'a11': 8, 'a12': 9}



**Prediction Distribution - Predicts only few classes**

In [12]:
from collections import Counter
import torch, torch.nn.functional as F

def pred_distribution(model, dataset, device='cpu'):
    model.eval()
    preds = []
    truths = []
    with torch.no_grad():
        for s in dataset:
            graphs_objects = [g.to(device) for g in s.graphs_objects]
            graphs_joints  = [g.to(device) for g in s.graphs_joints]
            out = model(graphs_objects, graphs_joints)  # [1, C]
            preds.append(int(torch.argmax(out, dim=-1)))
            truths.append(int(s.label))
    print("pred distribution:", Counter(preds))
    print("true distribution :", Counter(truths))
    return preds, truths

preds, truths = pred_distribution(har_model, validation_dataset, device='cpu')


pred distribution: Counter({8: 19, 7: 18, 1: 16, 3: 16, 0: 15, 5: 15, 9: 14, 2: 13, 4: 13, 6: 8})
true distribution : Counter({3: 17, 0: 15, 1: 15, 4: 15, 5: 15, 2: 14, 6: 14, 7: 14, 8: 14, 9: 14})


**Confusion Matrix - Per Class Prediction**

In [13]:
from sklearn.metrics import confusion_matrix, classification_report
cm = confusion_matrix(truths, preds)
print("Confusion matrix:\n", cm)
print(classification_report(truths, preds, digits=4))


Confusion matrix:
 [[14  0  1  0  0  0  0  0  0  0]
 [ 1 14  0  0  0  0  0  0  0  0]
 [ 0  0  9  2  0  0  0  0  2  1]
 [ 0  0  2 12  0  0  0  0  2  1]
 [ 0  1  0  0 13  0  1  0  0  0]
 [ 0  0  0  0  0 15  0  0  0  0]
 [ 0  1  0  0  0  0  7  5  1  0]
 [ 0  0  0  1  0  0  0 11  2  0]
 [ 0  0  1  0  0  0  0  2  9  2]
 [ 0  0  0  1  0  0  0  0  3 10]]
              precision    recall  f1-score   support

           0     0.9333    0.9333    0.9333        15
           1     0.8750    0.9333    0.9032        15
           2     0.6923    0.6429    0.6667        14
           3     0.7500    0.7059    0.7273        17
           4     1.0000    0.8667    0.9286        15
           5     1.0000    1.0000    1.0000        15
           6     0.8750    0.5000    0.6364        14
           7     0.6111    0.7857    0.6875        14
           8     0.4737    0.6429    0.5455        14
           9     0.7143    0.7143    0.7143        14

    accuracy                         0.7755       147
