In [1]:

import numpy as np

def generate_windows(feature_length, base_length, scales):
    """
    Generate temporal anchor windows centered on each feature position.

    Args:
        feature_length (int): Length of backbone feature sequence (T)
        base_length (float): Base window length (n)
        scales (list): List of scale factors (e.g., [1, 2, 4])

    Returns:
        windows (np.ndarray): shape (num_windows, 2)
                              each row = [center, length]
    """
    windows = []

    for center in range(feature_length):
        for s in scales:
            l1 = base_length * np.sqrt(s)
            l2 = base_length / np.sqrt(s)

            windows.append([center, l1])
            windows.append([center, l2])

    return np.array(windows)


In [None]:
dummy = tf.random.normal((1, input_length, C_in))
out = backbone(dummy)
print(out.shape)


In [None]:
import tensorflow.keras as keras
from tensorflow.keras import layers
import numpy as np
import os
import tensorflow as tf

os.environ['CUDA_VISIBLE_DEVICES'] = '0'



# ──────────────────────────────────────────────
# 2. Data loading
# ──────────────────────────────────────────────
print('==> Preparing data..')

DATA_DIR = '/home/gaowenbing/desktop/dd/Torch_Har_cbam/HAR_Dataset/uci_har/'

train_x = np.load(os.path.join(DATA_DIR, 'np_train_x.npy')).astype(np.float32)
train_x = train_x.reshape(train_x.shape[0], train_x.shape[1], train_x.shape[2], 1)   # (N, H, W, C)  – channels-last

train_y = np.load(os.path.join(DATA_DIR, 'np_train_y.npy')).astype(np.float32)

test_x = np.load(os.path.join(DATA_DIR, 'np_test_x.npy')).astype(np.float32)
test_x = test_x.reshape(test_x.shape[0], test_x.shape[1], test_x.shape[2], 1)

test_y = np.load(os.path.join(DATA_DIR, 'np_test_y.npy')).astype(np.float32)

print('train_x:', train_x.shape, '  train_y:', train_y.shape)
print('test_x: ', test_x.shape,  '  test_y: ', test_y.shape)

# tf.data pipelines
BATCH_SIZE = 256
TEST_BATCH  = 2947

train_dataset = (
    tf.data.Dataset.from_tensor_slices((train_x, train_y))
    .shuffle(buffer_size=10000)
    .batch(BATCH_SIZE)
    .prefetch(tf.data.AUTOTUNE)
)

test_dataset = (
    tf.data.Dataset.from_tensor_slices((test_x, test_y))
    .batch(TEST_BATCH)
    .prefetch(tf.data.AUTOTUNE)
)

# ──────────────────────────────────────────────
# 3. SKConv layer
#    PyTorch layout: (N, C, H, W)  →  TF layout: (N, H, W, C)
#    PyTorch kernel=(3,1), padding=(1+i, 1), dilation=(1+i, 1)
#    → TF Conv2D with kernel_size=(3,1), padding='same' per branch
# ──────────────────────────────────────────────

class SKConv(layers.Layer):
    """Selective-Kernel Convolution block (TensorFlow/Keras, channels-last)."""

    def __init__(self, features, M=3, G=32, r=32, stride=1, L=32, **kwargs):
        super().__init__(**kwargs)
        d = max(int(features / r), L)
        self.M = M
        self.features = features

        # M parallel branches with different dilation rates
        self.branches = []
        for i in range(M):
            branch = keras.Sequential([
                # groups → use DepthwiseConv2D trick or just Conv2D
                # PyTorch groups=G  ≈  TF Conv2D with groups=G (TF 2.x supports it)
                layers.Conv2D(99999999999999999999999999999999999999999999999999999999999999999999999999999999999999999999999999
                    filters=features,
                    kernel_size=(3, 1),
                    strides=(stride, 1),
                    padding='same',
                    dilation_rate=(1 + i, 1),
                    groups=G,
                    use_bias=False,
                ),
                layers.BatchNormalization(),
                layers.ReLU(),
            ], name=f'branch_{i}')
            self.branches.append(branch)

        # Global Average Pool + FC (implemented as Conv2D(1,1) to keep 4-D tensor)
        self.gap = layers.GlobalAveragePooling2D(keepdims=True)   # → (N, 1, 1, C)
        self.fc = keras.Sequential([
            layers.Conv2D(d, kernel_size=1, use_bias=False),
            layers.BatchNormalization(),
            layers.ReLU(),
        ], name='fc_z')

        # One 1×1 conv per branch to produce attention logits
        self.fcs = [
            layers.Conv2D(features, kernel_size=1, name=f'fc_attn_{i}')
            for i in range(M)
        ]
        self.softmax = layers.Softmax(axis=1)   # softmax across branch dimension

    def call(self, x, training=False):
        batch_size = tf.shape(x)[0]

        # Each branch output: (N, H, W, features)
        branch_feats = [branch(x, training=training) for branch in self.branches]

        # Stack → (N, M, H, W, features)
        feats = tf.stack(branch_feats, axis=1)

        # Fuse: element-wise sum across branches → (N, H, W, features)
        feats_U = tf.reduce_sum(feats, axis=1)

        # Channel descriptor via GAP → (N, 1, 1, features)
        feats_S = self.gap(feats_U)

        # Compact feature → (N, 1, 1, d)
        feats_Z = self.fc(feats_S, training=training)

        # Per-branch attention vectors → list of (N, 1, 1, features)
        attn_vectors = [fc(feats_Z) for fc in self.fcs]

        # Stack → (N, M, 1, 1, features), then softmax over branch dim
        attn_vectors = tf.stack(attn_vectors, axis=1)
        attn_vectors = self.softmax(attn_vectors)           # (N, M, 1, 1, features)

        # Weighted sum: (N, M, H, W, features) * (N, M, 1, 1, features)
        feats_V = tf.reduce_sum(feats * attn_vectors, axis=1)  # (N, H, W, features)

        return feats_V

    def get_config(self):
        config = super().get_config()
        config.update(dict(features=self.features, M=self.M))
        return config


# ──────────────────────────────────────────────
# 4. SKNet model
# ──────────────────────────────────────────────

def build_sknet(M=3, G=32, r=32, stride=1, L=32, num_classes=6):
    """Returns a compiled Keras Model equivalent to the PyTorch SKNet."""
    inputs = keras.Input(shape=(None, None, 1), name='input')   # (H, W, 1)

    # conv1: PyTorch Conv2d(1, 64, (5,1), stride=(3,1), padding=(1,0))
    # channels-last TF equivalent:
    x = layers.Conv2D(64, kernel_size=(5, 1), strides=(3, 1), padding='same', use_bias=True)(inputs)
    x = layers.BatchNormalization()(x)
    x = layers.ReLU()(x)

    # NOTE: PyTorch conv2_sk expects 128 input features but conv1 outputs 64.
    # The original code passes 64-channel output to SKConv(128, ...) which would
    # error in PyTorch too unless conv1 produces 128. We mirror the code as-is
    # and set SKConv to match actual channel count (64 → 64, then 64 → 128).
    # Adjust these numbers to match your actual checkpoint / intent.
    x = SKConv(64,  M=M, G=G, r=r, stride=stride, L=L, name='skconv1')(x)
    x = SKConv(128, M=M, G=G, r=r, stride=stride, L=L, name='skconv2')(x)

    # Flatten
    x = layers.Flatten()(x)

    # FC: 6-class output
    # PyTorch uses nn.LayerNorm on the logits; replicate with LayerNormalization
    x = layers.Dense(num_classes, name='fc')(x)
    x = layers.LayerNormalization(name='layer_norm')(x)

    model = keras.Model(inputs, x, name='SKNet')
    return model


# ──────────────────────────────────────────────
# 5. Compile
# ──────────────────────────────────────────────
print('==> Building model..')

model = build_sknet()
model.summary()

WD = 1e-4
optimizer = keras.optimizers.Adam(learning_rate=0.001, weight_decay=WD)

loss_fn = keras.losses.CategoricalCrossentropy(from_logits=True)
train_x: (7352, 128, 9)   train_y: (7352,)
test_x:  (2947, 128, 9)   test_y:  (2947,)
model.compile(
    optimizer=optimizer,
    loss=loss_fn,
    metrics=['accuracy'],
)

# Learning-rate scheduler: StepLR(step_size=50, gamma=0.1)
# In Keras this is done via a LearningRateScheduler callback
def step_lr_schedule(epoch, lr):
    """Halve the LR by ×0.1 every 50 epochs."""
    if epoch > 0 and epoch % 50 == 0:
        return lr * 0.1
    return lr

lr_callback = keras.callbacks.LearningRateScheduler(step_lr_schedule, verbose=1)

# ──────────────────────────────────────────────
# 6. Training loop
# ──────────────────────────────────────────────
EPOCHS = 500

start_epoch = 0let

epoch_list  = []
error_list  = []

# Custom training loop to replicate per-epoch logging
for epoch in range(start_epoch, start_epoch + EPOCHS):
    print(f'\nEpoch: {epoch}')

    # --- Train ---
    train_results = model.fit(
        train_dataset,
        epochs=1,
        verbose=1,
        callbacks=[lr_callback],
    )

    # --- Test ---
    test_results = model.evaluate(test_dataset, verbose=0)
    test_loss    = test_results[0]
    test_acc     = test_results[1]
    test_error   = 1.0 - test_acc

    print(f'test: {test_acc:.4f} || {test_error:.4f}')

    epoch_list.append(epoch)
    error_list.append(test_error)

    if test_acc > best_acc:
        best_acc = test_acc
        model.save_weights('best_sknet_weights.h5')
        print(f'  >> New best accuracy: {best_acc:.4f}  (weights saved)')

        print(f'\nTraining complete. Best test accuracy: {best_acc:.4f}')

# ──────────────────────────────────────────────
# 7. Model statistics  (replaces torchstat)
#    Print param count and a summary for input (128, 9, 1).
# ──────────────────────────────────────────────
stat_model = build_sknet()
stat_model.build(input_shape=(None, 128, 9, 1))
stat_model.summary()

total_params     = stat_model.count_params()
trainable_params = sum(
    tf.size(w).numpy() for w in stat_model.trainable_weights
)
print(f'Total parameters:     {total_params:,}')
print(f'Trainable parameters: {trainable_params:,}')

tf.reduce_mean(t, axis=2, name='reduce_sensors')(x)
    # x shape: (B, T_feat, 256)

    # ── Recognition & Segmentation heads (Conv1D, Table I) ───────────────────
    # Class head: predicts (num_classes+1) logits per anchor per time step
cls = layers.Conv1D(n_anchors_per_pos * (num_classes + 1),
                        kernel_size=3, padding='same',
                        name='cls_conv')(x)         # (B, T_feat, A*(K+1))
    # Loc head: predicts 2 offsets per anchor per time step
loc = layers.Conv1D(n_anchors_per_pos * 2,
                        kernel_size=3, padding='same',
                        name='loc_conv')(x)         # (B, T_feat, A*2)

    # Reshape to (B, total_anchors, K+1) and (B, total_anchors, 2)
cls_out = layers.Reshape((-1, num_classes + 1), name='cls_out')(cls)
loc_out = layers.Reshape((-1, 2),               name='loc_out')(loc) 
    # cls_out shape: (B, total_anchors, K+1)
    # loc_out shape: (B, total_anchors, 2)


2026-02-18 15:34:06.294487: I external/local_xla/xla/tsl/cuda/cudart_stub.cc:31] Could not find cuda drivers on your machine, GPU will not be used.
2026-02-18 15:34:18.088076: I tensorflow/core/platform/cpu_feature_guard.cc:210] This TensorFlow binary is optimized to use available CPU instructions in performance-critical operations.
To enable the following instructions: AVX2 FMA, in other operations, rebuild TensorFlow with the appropriate compiler flags.
2026-02-18 15:34:39.413079: I external/local_xla/xla/tsl/cuda/cudart_stub.cc:31] Could not find cuda drivers on your machine, GPU will not be used.


==> Preparing data..


FileNotFoundError: [Errno 2] No such file or directory: '/home/gaowenbing/desktop/dd/Torch_Har_cbam/HAR_Dataset/uci_har/np_train_x.npy'

In [None]:
# ──────────────────────────────────────────────
# 1. GPU configuration
# ──────────────────────────────────────────────
gpus = tf.config.list_physical_devices('GPU')
if gpus:
    for gpu in gpus:
        tf.config.experimental.set_memory_growth(gpu, True)

best_acc = 0.0
start_epoch = 0

In [None]:
# ─────────────────────────────────────────────────────────────────────────────
# 5.  MULTI-TASK LOSS  (Section III-E, equations 5-8)
# ─────────────────────────────────────────────────────────────────────────────

def smooth_l1(x):
    """Smooth-L1 / Huber loss (eq. 6)."""
    return tf.where(tf.abs(x) < 1.0, 0.5 * tf.square(x), tf.abs(x) - 0.5)


def mthars_loss(cls_pred, loc_pred, cls_true, loc_true, pos_mask,
                alpha=ALPHA, beta=BETA, neg_pos_ratio=NEG_POS_RATIO):
    """
    Combined multi-task loss (eq. 8).

    Parameters
    ----------
    cls_pred  : (B, N, K+1)  – raw logits
    loc_pred  : (B, N, 2)
    cls_true  : (B, N)       – int class labels (0 = background)
    loc_true  : (B, N, 2)
    pos_mask  : (B, N)       – bool, True for positive anchors
    """
    batch = tf.shape(cls_pred)[0]
    N     = tf.shape(cls_pred)[1]

    # ── Localisation loss (positive anchors only) ─────────────────────────────
    loc_diff  = loc_pred - loc_true                             # (B, N, 2)
    loc_loss_ = tf.reduce_sum(smooth_l1(loc_diff), axis=-1)    # (B, N)
    pos_float = tf.cast(pos_mask, tf.float32)
    n_pos     = tf.maximum(tf.reduce_sum(pos_float), 1.0)
    loc_loss  = tf.reduce_sum(loc_loss_ * pos_float) / n_pos

    # ── Classification loss (hard-negative mining) ────────────────────────────
    cls_true_oh  = tf.one_hot(cls_true, tf.shape(cls_pred)[-1])
    xentropy     = tf.nn.softmax_cross_entropy_with_logits(
                       labels=cls_true_oh, logits=cls_pred)     # (B, N)

    # Positive loss
    pos_cls_loss = tf.reduce_sum(xentropy * pos_float) / n_pos

    # Hard-negative mining: pick top neg_pos_ratio * n_pos negatives
    neg_mask     = tf.logical_not(pos_mask)
    neg_float    = tf.cast(neg_mask, tf.float32)
    neg_loss_all = xentropy * neg_float                        # zero out positives

    # Sort negatives by descending loss for each batch item
    n_neg_keep   = tf.cast(n_pos * neg_pos_ratio, tf.int32)
    n_neg_keep   = tf.minimum(n_neg_keep, tf.reduce_sum(tf.cast(neg_mask, tf.int32)))

    neg_loss_flat   = tf.reshape(neg_loss_all, [-1])           # (B*N,)
    _, top_idx      = tf.math.top_k(neg_loss_flat, k=n_neg_keep)
    hard_neg_mask   = tf.zeros_like(neg_loss_flat)
    hard_neg_mask   = tf.tensor_scatter_nd_update(
                          hard_neg_mask,
                          tf.expand_dims(top_idx, 1),
                          tf.ones(n_neg_keep, dtype=tf.float32))
    hard_neg_mask   = tf.reshape(hard_neg_mask, tf.shape(neg_loss_all))
    neg_cls_loss    = tf.reduce_sum(xentropy * hard_neg_mask) / tf.cast(n_neg_keep + 1, tf.float32)

    conf_loss = pos_cls_loss + neg_cls_loss
    total     = (alpha * conf_loss + beta * loc_loss) / tf.cast(batch, tf.float32)
    return total, conf_loss, loc_loss










In [None]:
# ─────────────────────────────────────────────────────────────────────────────
# 6.  NON-MAXIMUM SUPPRESSION  (Section III-B)
# ─────────────────────────────────────────────────────────────────────────────

def nms_1d(windows, scores, iou_threshold=0.4):
    """
    1-D NMS.

    Parameters
    ----------
    windows : (n, 2)  [centre, length]
    scores  : (n,)    class probability for the predicted class

    Returns
    -------
    keep : list of int indices to retain
    """
    order = np.argsort(scores)[::-1]
    keep  = []
    while len(order) > 0:
        i = order[0]
        keep.append(i)
        ious = np.array([compute_iou_1d(windows[i,0], windows[i,1],
                                         windows[j,0], windows[j,1])
                         for j in order[1:]])
        order = order[1:][ious <= iou_threshold]
    return keep


In [None]:
# ─────────────────────────────────────────────────────────────────────────────
# 7.  CONCATENATION / PREDICTION  (Algorithm 1)
# ─────────────────────────────────────────────────────────────────────────────

def concatenate_segments(predicted_boundaries):
    """
    Algorithm 1: merge adjacent same-class windows into activity spans.

    Parameters
    ----------
    predicted_boundaries : list of {'activity', 'start', 'end'} sorted by start

    Returns
    -------
    list of {'activity', 'start', 'end'}
    """
    if not predicted_boundaries:
        return []
    predicted_boundaries = sorted(predicted_boundaries, key=lambda b: b['start'])
    merged = []
    cur = predicted_boundaries[0].copy()
    for b in predicted_boundaries[1:]:
        if b['activity'] == cur['activity']:
            cur['end'] = max(cur['end'], b['end'])
        else:
            merged.append(cur)
            cur = b.copy()
    merged.append(cur)
    return merged


In [None]:
# ─────────────────────────────────────────────────────────────────────────────
# 8.  EVALUATION METRICS  (Section IV-B)
# ─────────────────────────────────────────────────────────────────────────────

def levenshtein_distance(s1, s2):
    """Edit distance between two label sequences."""
    m, n = len(s1), len(s2)
    dp = np.arange(n + 1, dtype=float)
    for i in range(1, m + 1):
        prev = dp.copy()
        dp[0] = i
        for j in range(1, n + 1):
            dp[j] = min(prev[j] + 1,
                        dp[j - 1] + 1,
                        prev[j - 1] + (0 if s1[i-1] == s2[j-1] else 1))
    return dp[n]


def normalized_edit_distance(pred_seq, true_seq):
    """NED (eq. 9-10 in paper). Lower is better."""
    lev = levenshtein_distance(pred_seq, true_seq)
    return lev / max(len(true_seq), 1)


def weighted_f1(y_true, y_pred):
    """Weighted F1 score (eq. 11)."""
    return f1_score(y_true, y_pred, average='weighted', zero_division=0)




In [None]:
# ─────────────────────────────────────────────────────────────────────────────
# 9.  TRAINING
# ─────────────────────────────────────────────────────────────────────────────

@tf.function
def train_step(model, optimizer, X_batch, cls_batch, loc_batch, pos_batch):
    with tf.GradientTape() as tape:
        cls_pred, loc_pred = model(X_batch, training=True)
        loss, conf_l, loc_l = mthars_loss(
            cls_pred, loc_pred, cls_batch, loc_batch, pos_batch,
            alpha=ALPHA, beta=BETA)
    grads = tape.gradient(loss, model.trainable_variables)
    optimizer.apply_gradients(zip(grads, model.trainable_variables))
    return loss, conf_l, loc_l


@tf.function
def val_step(model, X_batch, cls_batch, loc_batch, pos_batch):
    cls_pred, loc_pred = model(X_batch, training=False)
    loss, conf_l, loc_l = mthars_loss(
        cls_pred, loc_pred, cls_batch, loc_batch, pos_batch,
        alpha=ALPHA, beta=BETA)
    return loss, conf_l, loc_l


def train_model(model, optimizer, train_ds, val_ds, epochs, scheduler=None):
    history = {'train_loss': [], 'val_loss': [], 'val_conf': [], 'val_loc': []}

    for epoch in range(epochs):
        # ── Training ──────────────────────────────────────────────────────────
        t_losses = []
        for X_b, cls_b, loc_b, pos_b in train_ds:
            l, cl, ll = train_step(model, optimizer, X_b, cls_b, loc_b, pos_b)
            t_losses.append(l.numpy())

        # ── Validation ────────────────────────────────────────────────────────
        v_losses, v_conf, v_loc = [], [], []
        for X_b, cls_b, loc_b, pos_b in val_ds:
            l, cl, ll = val_step(model, X_b, cls_b, loc_b, pos_b)
            v_losses.append(l.numpy())
            v_conf.append(cl.numpy())
            v_loc.append(ll.numpy())

        if scheduler is not None:
            scheduler.step()

        history['train_loss'].append(np.mean(t_losses))
        history['val_loss'].append(np.mean(v_losses))
        history['val_conf'].append(np.mean(v_conf))
        history['val_loc'].append(np.mean(v_loc))

        print(f'Epoch {epoch+1:3d}/{epochs}  '
              f'train_loss={history["train_loss"][-1]:.4f}  '
              f'val_loss={history["val_loss"][-1]:.4f}  '
              f'(conf={history["val_conf"][-1]:.4f}, '
              f'loc={history["val_loc"][-1]:.4f})')

    return history


In [None]:
# ─────────────────────────────────────────────────────────────────────────────
# 10. INFERENCE PIPELINE (end-to-end segmentation + recognition)
# ─────────────────────────────────────────────────────────────────────────────

def predict_stream(model, stream, feature_seq_len, scales,
                   fixed_window_samples=600, step=300,
                   nms_thresh=NMS_IOU_THRESHOLD,
                   score_thresh=0.3):
    """
    Run the full MTHARS inference pipeline on a continuous stream.

    Returns
    -------
    all_segments : list of {'activity', 'start', 'end'}
    """
    windows_template = generate_windows(feature_seq_len, scales)
    scale_factor     = fixed_window_samples / feature_seq_len
    scaled_windows   = windows_template.copy()
    scaled_windows[:, 0] *= scale_factor
    scaled_windows[:, 1] *= scale_factor

    T = len(stream)
    raw_preds = []

    for start in range(0, T - fixed_window_samples + 1, step):
        chunk  = stream[start : start + fixed_window_samples]
        X      = chunk[np.newaxis, :, :, np.newaxis].astype(np.float32)
        cls_p, loc_p = model(X, training=False)
        cls_p  = cls_p.numpy()[0]    # (n_anchors, K+1)
        loc_p  = loc_p.numpy()[0]    # (n_anchors, 2)

        probs    = tf.nn.softmax(cls_p, axis=-1).numpy()   # (n_anchors, K+1)
        pred_cls = np.argmax(probs, axis=-1)               # (n_anchors,)
        pred_scr = probs[np.arange(len(pred_cls)), pred_cls]

        # Filter background (class 0) and low-score anchors
        fg_mask  = (pred_cls > 0) & (pred_scr >= score_thresh)
        if not np.any(fg_mask):
            continue

        fg_wins  = scaled_windows[fg_mask]
        fg_cls   = pred_cls[fg_mask]
        fg_scr   = pred_scr[fg_mask]
        fg_loc   = loc_p[fg_mask]

        # Decode offsets → predicted boundaries
        decoded  = []
        for i in range(len(fg_wins)):
            tx, tl = decode_offsets(fg_loc[i, 0], fg_loc[i, 1],
                                    fg_wins[i, 0], fg_wins[i, 1])
            decoded.append({'tx': tx, 'tl': tl,
                            'activity': fg_cls[i], 'score': fg_scr[i]})

        # Per-class NMS
        for act_id in np.unique(fg_cls):
            idx    = [i for i, d in enumerate(decoded) if d['activity'] == act_id]
            w_arr  = np.array([[decoded[i]['tx'], decoded[i]['tl']] for i in idx])
            s_arr  = np.array([decoded[i]['score'] for i in idx])
            keep   = nms_1d(w_arr, s_arr, nms_thresh)
            for k in keep:
                d = decoded[idx[k]]
                seg_start = max(0, int(d['tx'] - d['tl'] / 2) + start)
                seg_end   = min(T - 1, int(d['tx'] + d['tl'] / 2) + start)
                raw_preds.append({'activity': int(d['activity']),
                                  'start': seg_start, 'end': seg_end})

    # Concatenate adjacent same-class segments (Algorithm 1)
    final_segments = concatenate_segments(raw_preds)
    return final_segments




In [None]:
# ─────────────────────────────────────────────────────────────────────────────
# 11. EVALUATION AGAINST GROUND TRUTH
# ─────────────────────────────────────────────────────────────────────────────

def evaluate(model, reconstructed_test, feature_seq_len, scales,
             fixed_window_samples=600, step=300):
    """
    Compute NED (segmentation) and weighted-F1 (recognition) on the test set.
    """
    all_true_cls, all_pred_cls = [], []
    ned_scores = []

    for sub, data in reconstructed_test.items():
        stream     = data['signal']
        gt_bounds  = data['boundaries']
        true_seq   = [b['activity'] for b in sorted(gt_bounds, key=lambda b: b['start'])]

        pred_segs  = predict_stream(model, stream, feature_seq_len, scales,
                                    fixed_window_samples, step)
        pred_seq   = [s['activity'] for s in pred_segs]

        ned = normalized_edit_distance(pred_seq, true_seq)
        ned_scores.append(ned)

        # Per-sample classification labels for F1
        T = len(stream)
        true_lbl = data['labels']                     # (T,)
        pred_lbl = np.zeros(T, dtype=np.int32)
        for seg in pred_segs:
            pred_lbl[seg['start']:seg['end'] + 1] = seg['activity']

        all_true_cls.extend(true_lbl.tolist())
        all_pred_cls.extend(pred_lbl.tolist())

    mean_ned = np.mean(ned_scores)
    f1       = weighted_f1(all_true_cls, all_pred_cls)

    print(f'\n── Evaluation Results ───────────────────────────')
    print(f'  Mean NED (↓ better) : {mean_ned:.4f}')
    print(f'  Weighted F1 (↑)     : {f1:.4f}')
    print(f'─────────────────────────────────────────────────')
    print(classification_report(all_true_cls, all_pred_cls,
                                 target_names=list(ACTIVITY_NAMES.values()),
                                 zero_division=0))
    return mean_ned, f1




In [None]:
# ─────────────────────────────────────────────────────────────────────────────
# 12. PLOTTING
# ─────────────────────────────────────────────────────────────────────────────

def plot_training_history(history, save_path=None):
    fig, axes = plt.subplots(1, 2, figsize=(12, 4))
    axes[0].plot(history['train_loss'], label='Train')
    axes[0].plot(history['val_loss'],   label='Val')
    axes[0].set_title('Total Loss'); axes[0].legend(); axes[0].set_xlabel('Epoch')
    axes[1].plot(history['val_conf'], label='Conf loss (val)')
    axes[1].plot(history['val_loc'],  label='Loc loss  (val)')
    axes[1].set_title('Val Loss Breakdown'); axes[1].legend(); axes[1].set_xlabel('Epoch')
    plt.tight_layout()
    if save_path:
        plt.savefig(save_path, dpi=150)
    plt.show()


def visualize_segmentation(stream, gt_bounds, pred_segs, channel=0,
                            title='Segmentation Result', save_path=None):
    """Plot raw signal overlaid with ground-truth and predicted boundaries."""
    T = len(stream)
    fig, ax = plt.subplots(figsize=(16, 4))
    ax.plot(stream[:, channel], color='steelblue', lw=0.6, label='Signal')

    colors = plt.cm.tab10.colors
    for b in gt_bounds:
        ax.axvspan(b['start'], b['end'], alpha=0.15,
                   color=colors[(b['activity'] - 1) % 10])
        ax.text((b['start'] + b['end']) / 2, ax.get_ylim()[1] * 0.9,
                ACTIVITY_NAMES.get(b['activity'], str(b['activity'])),
                ha='center', fontsize=6, color='black')

    for s in pred_segs:
        ax.axvline(s['start'], color='red',   lw=1.2, ls='--', alpha=0.7)
        ax.axvline(s['end'],   color='orange', lw=1.2, ls='--', alpha=0.7)

    ax.set_title(title); ax.set_xlabel('Sample index'); ax.legend()
    plt.tight_layout()
    if save_path:
        plt.savefig(save_path, dpi=150)
    plt.show()




In [None]:
 # ── Build datasets ────────────────────────────────────────────────────────
# ─────────────────────────────────────────────────────────────────────────────
# 13. MAIN
# ─────────────────────────────────────────────────────────────────────────────

def main():
    # ── GPU setup ─────────────────────────────────────────────────────────────
    for gpu in tf.config.list_physical_devices('GPU'):
        tf.config.experimental.set_memory_growth(gpu, True)

    # ── Load raw windows ──────────────────────────────────────────────────────
    print('==> Loading raw inertial signals …')
    train_sig_dir = os.path.join(DATA_ROOT, 'train', 'Inertial Signals')
    test_sig_dir  = os.path.join(DATA_ROOT, 'test',  'Inertial Signals')

    X_train_w = load_raw_signals(train_sig_dir, SIGNAL_NAMES_TRAIN)  # (7352,128,9)
    X_test_w  = load_raw_signals(test_sig_dir,  SIGNAL_NAMES_TEST)   # (2947,128,9)

    y_train_w = (np.loadtxt(
        os.path.join(DATA_ROOT, 'train', 'y_train.txt'))).astype(np.int32)
    y_test_w  = (np.loadtxt(
        os.path.join(DATA_ROOT, 'test',  'y_test.txt'))).astype(np.int32)

    sub_train = np.loadtxt(
        os.path.join(DATA_ROOT, 'train', 'subject_train.txt')).astype(np.int32)
    sub_test  = np.loadtxt(
        os.path.join(DATA_ROOT, 'test',  'subject_test.txt')).astype(np.int32)

    print(f'  X_train_w: {X_train_w.shape}  y_train_w: {y_train_w.shape}')
    print(f'  X_test_w:  {X_test_w.shape}   y_test_w:  {y_test_w.shape}')

    # ── Reconstruct continuous streams ────────────────────────────────────────
    print('==> Reconstructing continuous streams per subject …')
    rec_train = reconstruct_continuous_stream(X_train_w, y_train_w, sub_train)
    rec_test  = reconstruct_continuous_stream(X_test_w,  y_test_w,  sub_test)
    print(f'  Train subjects: {len(rec_train)}  '
          f'Test subjects: {len(rec_test)}')

    # ── Derive feature-sequence length from backbone output ───────────────────
    # A chunk of 600 samples → conv1 with stride 3 → ~200 time steps
    FIXED_WINDOW  = 600
    STEP          = 300
    FEAT_SEQ_LEN  = FIXED_WINDOW // 3   # ≈ 200

    n_anchors_per_pos = len(SCALES) * 2
    n_anchors_total   = FEAT_SEQ_LEN * n_anchors_per_pos

       
    print('==> Building anchor-labelled datasets …')
    X_tr, cls_tr, loc_tr, pos_tr = build_dataset(
        rec_train, FEAT_SEQ_LEN, SCALES, NUM_CLASSES, FIXED_WINDOW, STEP)
    X_te, cls_te, loc_te, pos_te = build_dataset(
        rec_test,  FEAT_SEQ_LEN, SCALES, NUM_CLASSES, FIXED_WINDOW, STEP)

    print(f'  Train chunks: {len(X_tr)}  anchors/chunk: {n_anchors_total}')
    print(f'  Test  chunks: {len(X_te)}')

    # Normalise signal across training set
    mu  = X_tr.mean(axis=(0, 1, 3), keepdims=True)
    std = X_tr.std(axis=(0, 1, 3), keepdims=True) + 1e-6
    X_tr = (X_tr - mu) / std
    X_te = (X_te - mu) / std

    # tf.data pipelines
    def make_ds(X, cls, loc, pos, shuffle=False):
        ds = tf.data.Dataset.from_tensor_slices(
            (X,
             tf.cast(cls, tf.int32),
             tf.cast(loc, tf.float32),
             tf.cast(pos, tf.bool))
        )
        if shuffle:
            ds = ds.shuffle(buffer_size=1024)
        return ds.batch(BATCH_SIZE).prefetch(tf.data.AUTOTUNE)

    train_ds = make_ds(X_tr, cls_tr, loc_tr, pos_tr, shuffle=True)
    val_ds   = make_ds(X_te, cls_te, loc_te, pos_te)

    # ── Build model ───────────────────────────────────────────────────────────
    print('==> Building MTHARS model …')
    INPUT_SHAPE = (FIXED_WINDOW, 9, 1)
    model = build_mthars(INPUT_SHAPE, NUM_CLASSES, n_anchors_per_pos,
                          M=3, G=32, r=32, L=32)
    model.summary()

    # ── Optimizer + StepLR scheduler ─────────────────────────────────────────
    # StepLR(step_size=50, gamma=0.1)  → implemented via LambdaCallback
    lr_schedule = keras.optimizers.schedules.PiecewiseConstantDecay(
        boundaries=[50 * (len(X_tr) // BATCH_SIZE),
                    100 * (len(X_tr) // BATCH_SIZE)],
        values=[LR, LR * 0.1, LR * 0.01]
    )
    optimizer = keras.optimizers.Adam(learning_rate=lr_schedule,
                                       weight_decay=WD)

    # ── Train ─────────────────────────────────────────────────────────────────
    print('==> Training …')
    history = train_model(model, optimizer, train_ds, val_ds, EPOCHS)

    # ── Save weights ──────────────────────────────────────────────────────────
    model.save_weights('mthars_best.weights.h5')
    print('Weights saved → mthars_best.weights.h5')

    # ── Evaluate ─────────────────────────────────────────────────────────────
    print('==> Evaluating on test set …')
    mean_ned, f1 = evaluate(model, rec_test, FEAT_SEQ_LEN, SCALES,
                             FIXED_WINDOW, STEP)

    # ── Plots ─────────────────────────────────────────────────────────────────
    plot_training_history(history, save_path='mthars_training.png')

    # Visualise one test subject
    sub_id = list(rec_test.keys())[0]
    stream = rec_test[sub_id]['signal']
    preds  = predict_stream(model, stream, FEAT_SEQ_LEN, SCALES, FIXED_WINDOW, STEP)
    visualize_segmentation(stream, rec_test[sub_id]['boundaries'], preds,
                            title=f'Subject {sub_id} – Predicted vs GT',
                            save_path='mthars_segmentation.png')


if __name__ == '__main__':
    main()
