In [1]:
import os
from pathlib import Path
from typing import Dict
import sys
sys.path.append(str(Path(os.getcwd()).parent))

from settings.global_settings import GlobalSettings

config = GlobalSettings.get_config(
    config_file = "../config.ini",
    secrets_file = "../secrets.ini"
)
from dataset.video_loader import VideoDataLoader
from dataset.video_dataset import VideoDataset

2025-10-24 22:42:30,113 - INFO - Sentry DSN set to: https://f4f21cc936b3ba9f5dbc1464b7a40ea4@o4504168838070272.ingest.us.sentry.io/4506464560414720
2025-10-24 22:42:30,114 - INFO - Sentry initialized with environment: development


Loading config...
Loading secrets...


## Loading VideoDataLoader

In [2]:
TRAIN_DIR = "train"
TEST_DIR = "test"
VALIDATION_DIR = "validation"


train_video_data_loader = VideoDataLoader(
    path=os.path.join(config.model_settings.video_data_dir, TRAIN_DIR)
)
test_video_data_loader = VideoDataLoader(
    path=os.path.join(config.model_settings.video_data_dir, TEST_DIR)
)
validation_video_data_loader = VideoDataLoader(
    path=os.path.join(config.model_settings.video_data_dir, VALIDATION_DIR)
)

train_video_dataset = VideoDataset(
    video_data_loader=train_video_data_loader,
)
test_video_dataset = VideoDataset(
    video_data_loader=test_video_data_loader,
)
validation_video_dataset = VideoDataset(
    video_data_loader=validation_video_data_loader,
)

**Testing video_dataset**

In [3]:
print(len(train_video_dataset) + len(test_video_dataset) + len(validation_video_dataset))

2025-10-24 22:42:48,090 - INFO - [VideoDataLoader] Loding action videos for action: a01
2025-10-24 22:42:48,632 - INFO - [VideoDataLoader] Loding action videos for action: a02
2025-10-24 22:42:49,236 - INFO - [VideoDataLoader] Loding action videos for action: a03
2025-10-24 22:42:49,672 - INFO - [VideoDataLoader] Loding action videos for action: a04
2025-10-24 22:42:50,370 - INFO - [VideoDataLoader] Loding action videos for action: a05
2025-10-24 22:42:50,903 - INFO - [VideoDataLoader] Loding action videos for action: a06
2025-10-24 22:42:51,320 - INFO - [VideoDataLoader] Loding action videos for action: a08
2025-10-24 22:42:52,291 - INFO - [VideoDataLoader] Loding action videos for action: a09
2025-10-24 22:42:53,174 - INFO - [VideoDataLoader] Loding action videos for action: a11
2025-10-24 22:42:53,505 - INFO - [VideoDataLoader] Loding action videos for action: a12
2025-10-24 22:42:53,927 - INFO - [VideoDataLoader] Loding action videos for action: a01
2025-10-24 22:42:53,992 - INFO -

1475


In [None]:
print(train_video_dataset[0].graphs_objects)  # Print the first item in the dataset for verification

In [None]:
print(train_video_dataset[2].graphs_objects[0].edge_index)

In [None]:
print(train_video_dataset[2].graphs_objects[0].edge_index)

**Test from Dataloader:**

In [None]:
from torch.utils.data import DataLoader

loader = DataLoader(train_video_dataset, batch_size=2, shuffle=True, collate_fn=lambda x: x)
for batch in loader:
    print(batch)
    break

**Verifying Labels**

In [None]:
# print(video_dataset[1000].graphs_objects[0].edge_index)
for video_data in train_video_dataset:
    pass
print(len(train_video_dataset.labels_map))
print(train_video_dataset.labels_map)

**Verifying Feature Dimension**

In [None]:
print(train_video_dataset[0].graphs_objects[0].x.shape[1])
print(train_video_dataset[0].graphs_joints[0].x.shape[1])

## Analyzing Video Category Distribution

In [None]:
def get_videos_distribution(video_dataset: VideoDataset) -> Dict[str, int]:
    videos_distribution = {}
    for video_data in video_dataset:
        label = video_dataset.get_label_name_from_label_value(video_data.label)
        if label not in videos_distribution:
            videos_distribution[label] = 0
        videos_distribution[label] += 1
    return videos_distribution

In [None]:
train_videos_distribution = get_videos_distribution(train_video_dataset)
print("Train Videos Distribution:")
print(f"Total: {sum(train_videos_distribution.values())}")
print(train_videos_distribution)
test_videos_distribution = get_videos_distribution(test_video_dataset)
print("Test Videos Distribution:")
print(f"Total: {sum(test_videos_distribution.values())}")
print(test_videos_distribution)