In [26]:
import torch

# Load HAR data
train_data = torch.load('data/HAR/train.pt')
X_train = train_data['samples'][:10, :, :]  # [N, 9, 128]
Y_train = train_data['labels'][:10]   # [N]

N, C, L = X_train.shape  # e.g., N=7352, C=9, L=128
X_univariate = X_train.reshape(N * C, L).float()  # [N×9, 128] = [66168, 128]

In [27]:
X_univariate.shape, Y_train.shape

(torch.Size([90, 128]), torch.Size([10]))

In [28]:
from hierarchical_event_labeling import CompleteHierarchicalEventDataset

event_dataset = CompleteHierarchicalEventDataset(
    X_univariate,  # [66168, 128] - univariate
    use_spectral=True,
    use_wavelets=True,
    use_wavelet_peaks=True,
    use_changepoint=True,
    use_chaotic=False,
    verbose=True
)
# Creates 66168 univariate annotations (one per channel)


INITIALIZING COMPLETE HIERARCHICAL EVENT DATASET
Sequences: 90
Length: 128
Spectral features: ✓
Entropy features: ✓
Wavelet features: ✓
Wavelet-based peaks: ✓
Change point detection: ✓
Chaotic segment detection: ✗

[1/4] Extracting enhanced multi-scale features...
      ✓ Computed 59 feature types
[2/4] Encoding step-wise labels...
      ✓ Encoded 11520 timesteps
[3/4] Detecting events with ALL detectors...
      Processing sequence 0/90...
      Processing sequence 50/90...
[4/4] Computing statistics...
      Total events: 3559
      Avg per sequence: 39.5
      Unique labels used: 17/64
      Top 10 labels:
        LOCAL_TROUGH..................    584
        LOCAL_PEAK....................    557
        NORMAL_VOLATILITY.............    336
        SHARP_TROUGH..................    312
        SHARP_PEAK....................    310
        DOWNTREND_SHORT...............    299
        UPTREND_SHORT.................    272
        LOW_VOLATILITY................    203
        MEAN_S

In [29]:
from data_loader import create_shuffled_dataloader

# Step 4: Create shuffled dataloader
train_loader = create_shuffled_dataloader(
    original_data=X_train,
    hierarchical_dataset=event_dataset,
    batch_size=10,
    shuffle_samples=True,
    verbose=True
)



CREATING SHUFFLED INTERVAL PIX2SEQ DATASET
Samples: 10
Channels: 9
Sequence length: 128
Annotations: 90
Vocab size: 195

Extracting intervals from annotations...
  Processing sample 0/10...

✓ DATASET READY
Samples: 10
Total intervals: 3559
Avg intervals per sample: 355.9
Avg intervals per channel: 39.5
Estimated avg sequence length: 1069.7 tokens

Performing initial shuffle...

✓ SHUFFLED DATALOADER READY
Batches per epoch: 1
Intervals shuffled: Per epoch, per channel


In [30]:
from hierarchical_event_labeling import VOCAB

# Get one sample
sample = train_loader.dataset[1]
channel_0_intervals = sample['intervals'][0]  # First channel


# Print with label names
for start, end, label_id in channel_0_intervals:
    label_name = VOCAB.id_to_label(label_id)
    duration = end - start + 1
    print(f"[{start:3d}, {end:3d}] {label_name:<30} (duration: {duration:3d})")


[ 56,  56] SHARP_TROUGH                   (duration:   1)
[119, 119] LOCAL_TROUGH                   (duration:   1)
[ 23,  37] DOWNTREND_SHORT                (duration:  15)
[108, 118] LOW_VOLATILITY                 (duration:  11)
[ 11,  11] MEAN_SHIFT_DOWN                (duration:   1)
[ 22,  22] MEAN_SHIFT_DOWN                (duration:   1)
[119, 119] SHARP_TROUGH                   (duration:   1)
[ 75,  83] NORMAL_VOLATILITY              (duration:   9)
[ 38,  51] UPTREND_SHORT                  (duration:  14)
[114, 114] MEAN_SHIFT_DOWN                (duration:   1)
[ 23,  23] LOCAL_PEAK                     (duration:   1)
[ 56,  56] LOCAL_TROUGH                   (duration:   1)
[ 73,  73] LOCAL_PEAK                     (duration:   1)
[ 37,  37] MEAN_SHIFT_UP                  (duration:   1)
[ 84,  98] DOWNTREND_SHORT                (duration:  15)
[  0,  19] FLAT_SEGMENT                   (duration:  20)
[ 12,  12] SHARP_PEAK                     (duration:   1)
[ 26,  26] LOC

In [35]:
# Step 5: Training (intervals shuffled each epoch!)
for epoch in range(2):
    print(f"\n=== Epoch {epoch} ===")
    
    for batch in train_loader:
        timeseries = batch['timeseries']        # [32, 9, 128] - FIXED
        target_seq = batch['target_sequence']   # [32, max_len] - SHUFFLED
        print("Timeseries shape:", timeseries)
        print(target_seq)


=== Epoch 0 ===
Timeseries shape: tensor([[[-2.8233e-01, -2.2980e-01, -1.0485e-01,  ..., -1.4684e-01,
          -6.6278e-02,  6.7344e-02],
         [ 1.1392e-01,  9.3244e-02,  9.3909e-02,  ...,  3.3790e-01,
           3.6800e-01,  3.7839e-01],
         [ 3.8689e-02, -1.8090e-02, -3.6902e-02,  ...,  2.6060e-02,
           5.4099e-02,  7.7434e-02],
         ...,
         [ 6.1260e-01,  6.6497e-01,  7.8970e-01,  ...,  7.4186e-01,
           8.2214e-01,  9.5540e-01],
         [-2.8010e-01, -3.0221e-01, -3.0298e-01,  ..., -6.1126e-02,
          -3.2162e-02, -2.2901e-02],
         [-2.6934e-01, -3.2591e-01, -3.4449e-01,  ..., -2.7576e-01,
          -2.4760e-01, -2.2416e-01]],

        [[-2.7014e-03,  8.9883e-04,  3.5960e-03,  ..., -2.4880e-03,
          -1.1989e-03, -6.1293e-04],
         [ 1.2653e-02,  1.0769e-02,  7.5920e-03,  ..., -8.0178e-03,
           1.4468e-03,  7.4841e-03],
         [ 3.8694e-03,  3.8150e-03,  4.0854e-03,  ..., -2.2938e-02,
          -2.1546e-02, -2.1118e-02],
    

In [39]:
target_seq[2]

tensor([  1, 295, 300,  ...,   0,   0,   0])