# Test Model

In [1]:
import os
from pathlib import Path
import sys
sys.path.append(str(Path(os.getcwd()).parent))

from settings.global_settings import GlobalSettings

config = GlobalSettings.get_config(
    config_file = "../config.ini",
    secrets_file = "../secrets.ini"
)
from dataset.video_loader import VideoDataLoader
from dataset.video_dataset import VideoDataset
from model.training_loop import train, EarlyStoppingParams
from model.multimodal_har_model import MultiModalHARModel

Loading config...
Loading secrets...


2025-11-01 19:47:32,575 - INFO - Sentry DSN set to: https://f4f21cc936b3ba9f5dbc1464b7a40ea4@o4504168838070272.ingest.us.sentry.io/4506464560414720
2025-11-01 19:47:32,576 - INFO - Sentry initialized with environment: development


## Initializing Training

**Creating Dataloaders**

In [2]:
TRAIN_DIR = os.path.join(
    config.model_settings.video_data_dir,
    "train"
)
TEST_DIR = os.path.join(
    config.model_settings.video_data_dir,
    "validation"
)
VALIDATION_DIR = os.path.join(
    config.model_settings.video_data_dir,
    "test"
)

train_video_data_loader = VideoDataLoader(
    path=TRAIN_DIR,
)
test_video_data_loader = VideoDataLoader(
    path=TEST_DIR,
)
validation_video_data_loader = VideoDataLoader(
    path=VALIDATION_DIR,
)

train_dataset = VideoDataset(
    video_data_loader=train_video_data_loader,
    normalization_type="across_frames",
)
test_dataset = VideoDataset(
    video_data_loader=test_video_data_loader,
    normalization_type="across_frames",
)
validation_dataset = VideoDataset(
    video_data_loader=validation_video_data_loader,
    normalization_type="across_frames",
)

len(train_dataset)
for _ in train_dataset:
    pass
len(train_dataset.labels_map)

len(test_dataset)
for _ in test_dataset:
    pass

len(validation_dataset)
for _ in validation_dataset:
    pass


display(len(test_dataset.labels_map))
display(len(validation_dataset.labels_map))
display(len(train_dataset.labels_map))

2025-11-01 19:47:34,544 - INFO - [VideoDataLoader] Loding action videos for action: a01
2025-11-01 19:47:34,893 - INFO - [VideoDataLoader] Loding action videos for action: a02
2025-11-01 19:47:35,333 - INFO - [VideoDataLoader] Loding action videos for action: a03
2025-11-01 19:47:35,820 - INFO - [VideoDataLoader] Loding action videos for action: a04
2025-11-01 19:47:36,618 - INFO - [VideoDataLoader] Loding action videos for action: a05
2025-11-01 19:47:36,983 - INFO - [VideoDataLoader] Loding action videos for action: a06
2025-11-01 19:47:37,317 - INFO - [VideoDataLoader] Loding action videos for action: a08
2025-11-01 19:47:38,128 - INFO - [VideoDataLoader] Loding action videos for action: a09
2025-11-01 19:47:38,855 - INFO - [VideoDataLoader] Loding action videos for action: a11
2025-11-01 19:47:39,222 - INFO - [VideoDataLoader] Loding action videos for action: a12
2025-11-01 19:47:43,984 - INFO - [VideoDataLoader] Loding action videos for action: a01
2025-11-01 19:47:44,010 - INFO -

10

10

10

**Splitting Train and Test Datasets**

In [3]:
# num_total = len(train_dataset)
# num_train = int(0.8 * num_total)
# num_test = num_total - num_train
# train_dataset, test_dataset = random_split(train_dataset, [num_train, num_test])

**Creating Model**

In [4]:

har_model = MultiModalHARModel(
    obj_in=train_dataset[0].graphs_objects[0].x.shape[1],
    joint_in=train_dataset[0].graphs_joints[0].x.shape[1],
    gat_hidden=128,
    gat_out=128,
    temporal_hidden=128,
    num_classes=len(train_dataset.labels_map), 
    dropout=0.1,
)

**Create Evaluate Function For Early Stopping**

In [5]:
def evaluate(model, dataset):
    import torch
    device = 'cpu'
    model.eval()
    correct = 0
    total = 0

    with torch.no_grad():
        for i in range(len(dataset)):
            sample = dataset[i]
            label = sample.label.to(device)

            # Move all graph tensors to device
            graphs_objects = [g.to(device) for g in sample.graphs_objects]
            graphs_joints = [g.to(device) for g in sample.graphs_joints]

            # Forward pass
            output = model(graphs_objects, graphs_joints)

            # Compute prediction
            if output.dim() == 1:
                predicted = torch.argmax(output).unsqueeze(0)
            else:
                _, predicted = torch.max(output, dim=1)

            correct += (predicted == label).sum().item()
            total += 1

    accuracy = 100 * correct / total
    return accuracy


In [6]:
early_stopping_params = EarlyStoppingParams(
    patience=6,
    min_delta=1e-3,
    mode='max',
    evaluation_function=evaluate,
    evaluation_dataset=validation_dataset,
)

In [7]:
train_history = train(
    model=har_model,
    video_dataset=train_dataset,
    device='cpu',
    epochs=28,
    lr=5e-4,
    early_stopping=early_stopping_params,
)

2025-11-01 19:47:46,024 - INFO - Starting training loop...
2025-11-01 19:47:46,032 - INFO - Using early stopping
Epoch 1/28: 100%|██████████| 1176/1176 [01:04<00:00, 18.20it/s]
2025-11-01 19:48:50,687 - INFO - Evaluating for early stopping...
2025-11-01 19:48:53,094 - INFO - Epoch 1/28, Loss: 1.8430
Epoch 2/28: 100%|██████████| 1176/1176 [01:09<00:00, 16.82it/s]
2025-11-01 19:50:03,012 - INFO - Evaluating for early stopping...
2025-11-01 19:50:05,575 - INFO - Epoch 2/28, Loss: 1.2957
Epoch 3/28: 100%|██████████| 1176/1176 [01:11<00:00, 16.45it/s]
2025-11-01 19:51:17,087 - INFO - Evaluating for early stopping...
2025-11-01 19:51:19,771 - INFO - Epoch 3/28, Loss: 1.1165
Epoch 4/28: 100%|██████████| 1176/1176 [01:11<00:00, 16.41it/s]
2025-11-01 19:52:31,418 - INFO - Evaluating for early stopping...
2025-11-01 19:52:34,066 - INFO - Epoch 4/28, Loss: 0.9706
Epoch 5/28: 100%|██████████| 1176/1176 [01:10<00:00, 16.67it/s]
2025-11-01 19:53:44,594 - INFO - Evaluating for early stopping...
2025-

## Training Model

**Early Stopping**

**Training**

**Saving Model**

In [8]:
har_model.save(
    training_history=train_history
)

2025-11-01 20:24:58,735 - INFO - Saving model to /Volumes/KODAK/masters/model/validation_datasets/NW-UCLA/model/har_model_v1.0.0_nw_ucla_2025-11-01 20:24:58.735120.pht...
2025-11-01 20:24:58,807 - INFO - Model saved successfully.


## Running tests

**Accuracy**

In [9]:
accuracy_evaluation = evaluate(har_model, test_dataset)
print(f"Test Accuracy: {accuracy_evaluation:.2f}%")

Test Accuracy: 82.24%


## Debugging

In [10]:
import torch, torch.nn.functional as F
device = 'cpu'   # match training device
har_model.to(device)
har_model.eval()

sample = train_dataset[0]
graphs_objects = [g.to(device) for g in sample.graphs_objects]
graphs_joints = [g.to(device) for g in sample.graphs_joints]
label = sample.label.unsqueeze(0).to(device)

with torch.no_grad():
    out = har_model(graphs_objects, graphs_joints)   # expect [1, num_classes]
    probs = F.softmax(out, dim=-1)
    ent = -(probs * probs.log()).sum(dim=-1)     # entropy
    pred = torch.argmax(probs, dim=-1)

print("out.shape", out.shape)
print("out device", out.device, "label device", label.device)
print("logits:", out.cpu().numpy())
print("probs:", probs.cpu().numpy())
print("entropy:", ent.item())
print("pred:", pred.item(), "label:", label.item())


out.shape torch.Size([1, 10])
out device cpu label device cpu
logits: [[  4.810031    1.1734328   1.0486519  -4.461392   -8.576686   -8.094407
  -12.136481   -6.935664   -9.964235  -10.174203 ]]
probs: [[9.5265299e-01 2.5094599e-02 2.2150759e-02 8.9620371e-05 1.4627075e-06
  2.3692371e-06 4.1606192e-08 7.5482130e-06 3.6521820e-07 2.9604973e-07]]
entropy: 0.22406119108200073
pred: 0 label: 0


**Mapping Consistency - Distribution**

In [11]:
from collections import Counter
def inspect_dataset(dataset, name):
    labels = [int(s.label) for s in dataset]
    print(f"=== {name} ===")
    print("len:", len(labels))
    print("unique labels:", sorted(set(labels)))
    print("counts:", Counter(labels))
    print("labels_map (sample):", getattr(dataset, "labels_map", None))
    print()

inspect_dataset(train_dataset, "TRAIN")
inspect_dataset(validation_dataset, "VAL")
inspect_dataset(test_dataset, "TEST")


=== TRAIN ===
len: 1176
unique labels: [0, 1, 2, 3, 4, 5, 6, 7, 8, 9]
counts: Counter({3: 138, 5: 119, 0: 118, 4: 118, 1: 116, 8: 116, 6: 113, 7: 113, 9: 113, 2: 112})
labels_map (sample): {'a01': 0, 'a02': 1, 'a03': 2, 'a04': 3, 'a05': 4, 'a06': 5, 'a08': 6, 'a09': 7, 'a11': 8, 'a12': 9}

=== VAL ===
len: 147
unique labels: [0, 1, 2, 3, 4, 5, 6, 7, 8, 9]
counts: Counter({3: 17, 0: 15, 1: 15, 4: 15, 5: 15, 2: 14, 6: 14, 7: 14, 8: 14, 9: 14})
labels_map (sample): {'a01': 0, 'a02': 1, 'a03': 2, 'a04': 3, 'a05': 4, 'a06': 5, 'a08': 6, 'a09': 7, 'a11': 8, 'a12': 9}

=== TEST ===
len: 152
unique labels: [0, 1, 2, 3, 4, 5, 6, 7, 8, 9]
counts: Counter({3: 18, 0: 15, 1: 15, 4: 15, 5: 15, 6: 15, 7: 15, 8: 15, 9: 15, 2: 14})
labels_map (sample): {'a01': 0, 'a02': 1, 'a03': 2, 'a04': 3, 'a05': 4, 'a06': 5, 'a08': 6, 'a09': 7, 'a11': 8, 'a12': 9}



**Prediction Distribution - Predicts only few classes**

In [12]:
from collections import Counter
import torch, torch.nn.functional as F

def pred_distribution(model, dataset, device='cpu'):
    model.eval()
    preds = []
    truths = []
    with torch.no_grad():
        for s in dataset:
            graphs_objects = [g.to(device) for g in s.graphs_objects]
            graphs_joints  = [g.to(device) for g in s.graphs_joints]
            out = model(graphs_objects, graphs_joints)  # [1, C]
            preds.append(int(torch.argmax(out, dim=-1)))
            truths.append(int(s.label))
    print("pred distribution:", Counter(preds))
    print("true distribution :", Counter(truths))
    return preds, truths

preds, truths = pred_distribution(har_model, validation_dataset, device='cpu')


pred distribution: Counter({0: 19, 2: 16, 1: 15, 3: 15, 8: 15, 4: 15, 5: 14, 9: 14, 6: 13, 7: 11})
true distribution : Counter({3: 17, 0: 15, 1: 15, 4: 15, 5: 15, 2: 14, 6: 14, 7: 14, 8: 14, 9: 14})


**Confusion Matrix - Per Class Prediction**

In [13]:
from sklearn.metrics import confusion_matrix, classification_report
cm = confusion_matrix(truths, preds)
print("Confusion matrix:\n", cm)
print(classification_report(truths, preds, digits=4))


Confusion matrix:
 [[15  0  0  0  0  0  0  0  0  0]
 [ 1 14  0  0  0  0  0  0  0  0]
 [ 2  0  8  3  0  0  0  1  0  0]
 [ 0  0  6 10  0  0  0  0  1  0]
 [ 0  0  0  0 15  0  0  0  0  0]
 [ 1  0  0  0  0 14  0  0  0  0]
 [ 0  1  0  0  0  0 11  0  2  0]
 [ 0  0  0  2  0  0  2  9  1  0]
 [ 0  0  2  0  0  0  0  1 10  1]
 [ 0  0  0  0  0  0  0  0  1 13]]
              precision    recall  f1-score   support

           0     0.7895    1.0000    0.8824        15
           1     0.9333    0.9333    0.9333        15
           2     0.5000    0.5714    0.5333        14
           3     0.6667    0.5882    0.6250        17
           4     1.0000    1.0000    1.0000        15
           5     1.0000    0.9333    0.9655        15
           6     0.8462    0.7857    0.8148        14
           7     0.8182    0.6429    0.7200        14
           8     0.6667    0.7143    0.6897        14
           9     0.9286    0.9286    0.9286        14

    accuracy                         0.8095       147
