In [None]:
import torch

# Load HAR data
train_data = torch.load('data/HAR/train.pt')
X_train = train_data['samples'][:10, :, :]  # [N, 9, 128]
Y_train = train_data['labels'][:10]   # [N]

N, C, L = X_train.shape  # e.g., N=7352, C=9, L=128
X_univariate = X_train.reshape(N * C, L).float()  # [NÃ—9, 128] = [66168, 128]

In [None]:
X_univariate.shape, Y_train.shape

In [None]:
from hierarchical_event_labeling import CompleteHierarchicalEventDataset

event_dataset = CompleteHierarchicalEventDataset(
    X_univariate,  # [66168, 128] - univariate
    use_spectral=True,
    use_wavelets=True,
    use_wavelet_peaks=True,
    use_changepoint=True,
    use_chaotic=False,
    verbose=True
)
# Creates 66168 univariate annotations (one per channel)

In [None]:
from data_loader import create_har_dataloader

train_loader = create_har_dataloader(
    original_data=X_train,            # [7352, 9, 128] - original multi-channel
    hierarchical_dataset=event_dataset,  # [66168] - univariate annotations
    activity_labels=Y_train,          # [7352] - HAR labels
    batch_size=8,
    n_channels=9,
    shuffle=True,
    verbose=True
)


In [None]:
from hierarchical_event_labeling import VOCAB

# Get one sample
sample = train_loader.dataset[1]
channel_0_intervals = sample['intervals'][0]  # First channel

# Print with label names
for start, end, label_id in channel_0_intervals:
    label_name = VOCAB.id_to_label(label_id)
    duration = end - start + 1
    print(f"[{start:3d}, {end:3d}] {label_name:<30} (duration: {duration:3d})")


In [None]:

# 2. Annotations for each channel (9 lists)
sample['intervals']  # List of 9 interval lists
# [
#   [(0, 15, 30), (16, 25, 20), ...],  # Channel 0 intervals
#   [(0, 12, 42), (13, 28, 31), ...],  # Channel 1 intervals
#   ...
#   [(0, 18, 25), (19, 30, 40), ...]   # Channel 8 intervals
# ]

# 3. HAR activity label (shared across all channels)
sample['har_label']  # 0 (WALKING)

# 4. Pix2Seq target sequence (all channels combined)
sample['target_sequence']  # [L] tokens
# [100,  # HAR class
#  230, 1000, 1117,  # Channel 0, interval 1
#  220, 1125, 1195,  # Channel 0, interval 2
#  ...
#  242, 1000, 1093,  # Channel 1, interval 1
#  ...
#  1]  # EOS

In [None]:


# 3. HAR activity label (shared across all channels)
sample['har_label']  # 0 (WALKING)

# 4. Pix2Seq target sequence (all channels combined)
sample['target_sequence']  # [L] tokens
# [100,  # HAR class
#  230, 1000, 1117,  # Channel 0, interval 1
#  220, 1125, 1195,  # Channel 0, interval 2
#  ...
#  242, 1000, 1093,  # Channel 1, interval 1
#  ...
#  1]  # EOS