In [None]:
import torch
from hierarchical_event_labeling import EnhancedHierarchicalEventDataset

# Load HAR data
train_data = torch.load('data/HAR/train.pt')
val_data = torch.load('data/HAR/val.pt')
test_data = torch.load('data/HAR/test.pt')
X_train = train_data['samples']  # [N, 9, 128] # N roughly 59,000
X_val = val_data['samples']      # [N, 9, 128] # N roughly 14,000
X_test = test_data['samples']     # [N, 9, 128] # N roughly 14,000

# Reshape: treat each channel as separate univariate sequence
X_train_univariate = X_train.reshape(-1, 128).float()
X_val_univariate = X_val.reshape(-1, 128).float()


# Your data
train_ann_dataset = EnhancedHierarchicalEventDataset(X_train_univariate)
print(f"Dataset size: {len(train_ann_dataset)} samples")
torch.save(train_ann_dataset, 'data/HAR/har_ann_train_dataset.pt')

val_ann_dataset = EnhancedHierarchicalEventDataset(X_val_univariate)
print(f"Dataset size: {len(val_ann_dataset)} samples")
torch.save(val_ann_dataset, 'data/HAR/har_ann_val_dataset.pt')

In [None]:
# check_checkpoint. py
import tensorflow as tf
import sys

checkpoint_path = '/projects/pix2seqdata/tmp/ts_model'

# Get latest checkpoint
ckpt = tf. train.latest_checkpoint(checkpoint_path)
print(f"Latest checkpoint: {ckpt}")

if ckpt:
    # Load checkpoint
    reader = tf.train.load_checkpoint(ckpt)
    
    # List all variables
    var_names = reader.get_variable_to_shape_map()
    
    print(f"\nCheckpoint contains {len(var_names)} variables")
    print("\nSample encoder variables:")
    for name in sorted(var_names. keys())[:20]: 
        print(f"  {name}: {var_names[name]}")
    
    # Check critical architecture indicators
    encoder_vars = [k for k in var_names.keys() if 'encoder' in k.lower()]
    print(f"\nEncoder variables: {len(encoder_vars)}")
    
    # Try to infer architecture
    print("\nArchitecture hints:")
    
    # Check number of encoder layers
    layer_vars = [k for k in encoder_vars if 'layer_' in k or 'block_' in k]
    if layer_vars:
        # Extract layer numbers
        import re
        layer_nums = set()
        for var in layer_vars:
            match = re.search(r'layer_(\d+)|block_(\d+)', var)
            if match:
                num = match.group(1) or match.group(2)
                layer_nums.add(int(num))
        print(f"  Detected encoder layers: {sorted(layer_nums)}")
        print(f"  → num_encoder_layers = {len(layer_nums)}")
    
    # Check attention dimensions
    attn_vars = [k for k in var_names.keys() if 'attention' in k.lower() and 'kernel' in k]
    if attn_vars:
        sample_var = attn_vars[0]
        shape = var_names[sample_var]
        print(f"  Sample attention shape: {shape}")
        print(f"  → dim_att might be {shape[-1] if len(shape) > 0 else 'unknown'}")

else:
    print("No checkpoint found!")

In [None]:
# check_config.py
import sys
sys.path.insert(0, '/home/yourusername/ts2seq')

from configs import config_det_finetune

# Load config
config = config_det_finetune.get_config()

print("="*60)
print("CURRENT CONFIG (before CLI overrides)")
print("="*60)

print("\nModel architecture:")
print(f"  name: {config.model.name}")
print(f"  image_size: {config.model. get('image_size', 'NOT SET')}")

# Check all model params
model_keys = [
    'num_encoder_layers', 'num_decoder_layers',
    'dim_att', 'dim_att_dec', 
    'dim_mlp', 'dim_mlp_dec',
    'num_heads', 'num_heads_dec',
    'encoder_variant', 'patch_size',
]

for key in model_keys: 
    value = config.model.get(key, 'NOT SET')
    print(f"  {key}: {value}")

print("\n" + "="*60)

In [None]:
# compute_val_loss.py

import tensorflow as tf
import sys
sys.path.insert(0, '.')

from configs import config_ts_224
from data import coco as coco_data
from models import model as model_lib
from tasks import task as task_lib

def compute_validation_loss(model_dir, config):
    """Compute validation loss for a trained model."""
    
    # Load model and data
    strategy = tf.distribute.MirroredStrategy()
    
    with strategy.scope():
        # Create trainer
        trainer = model_lib.TrainerRegistry.lookup(config.model.name)(
            config, model_dir=model_dir
        )
        
        # Load validation data
        val_dataset = coco_data.CocoObjectDetectionTFRecordDataset(config)
        val_dataset = val_dataset.load_dataset(training=False)
        val_dataset = val_dataset.batch(config.eval.batch_size)
        
        # Create task
        task = task_lib. TaskRegistry.lookup(config.task. name)(config)
        
        # Compute loss on validation set
        total_loss = 0.0
        num_batches = 0
        
        for batch in val_dataset. take(100):  # Sample 100 batches
            # Preprocess
            preprocessed = task.preprocess_batched(batch, training=False)
            
            # For detection:  (images, input_seq, target_seq, token_weights)
            if len(preprocessed) == 4:
                images, input_seq, target_seq, token_weights = preprocessed
                
                # Forward pass
                logits = trainer.model(images, input_seq, training=False)
                
                # Compute loss
                loss_per_token = tf.nn.sparse_softmax_cross_entropy_with_logits(
                    labels=target_seq,
                    logits=logits
                )
                loss = tf.reduce_sum(loss_per_token * token_weights) / tf.reduce_sum(token_weights)
                
                total_loss += loss.numpy()
                num_batches += 1
        
        avg_loss = total_loss / num_batches
        print(f"Validation Loss: {avg_loss:.4f}")
        return avg_loss

if __name__ == "__main__": 
    config = config_ts_224.get_config()
    model_dir = "/projects/pix2seqdata/tmp/ts_model"
    
    val_loss = compute_validation_loss(model_dir, config)

In [None]:
"""
DIAGNOSE ANNOTATION ISSUES
"""

import json
import numpy as np
import matplotlib.pyplot as plt
from collections import Counter

def diagnose_annotations(annotation_file):
    """Analyze annotation statistics."""
    
    with open(annotation_file, 'r') as f:
        data = json.load(f)
    
    print("="*80)
    print("ANNOTATION DIAGNOSTICS")
    print("="*80)
    
    # Basic stats
    num_images = len(data['images'])
    num_annotations = len(data['annotations'])
    num_categories = len(data['categories'])
    
    print(f"\nDataset Statistics:")
    print(f"  Images: {num_images}")
    print(f"  Annotations:  {num_annotations}")
    print(f"  Categories: {num_categories}")
    print(f"  Avg annotations per image: {num_annotations/num_images:.2f}")
    
    # Extract annotation data
    widths = []
    heights = []
    areas = []
    category_counts = Counter()
    annotations_per_image = Counter()
    
    for ann in data['annotations']:
        bbox = ann['bbox']  # [start, end] for time series
        # width = bbox[1] - bbox[0]
        width = bbox[2]  # bbox = [x, y, width, height]
        widths.append(width)
        
        category_counts[ann['category_id']] += 1
        annotations_per_image[ann['image_id']] += 1
    
    widths = np.array(widths)
    
    # Width statistics
    print(f"\nBox Width Statistics:")
    print(f"  Min: {widths.min()}")
    print(f"  Max: {widths.max()}")
    print(f"  Mean: {widths.mean():.2f}")
    print(f"  Median: {np.median(widths):.2f}")
    print(f"  Std: {widths. std():.2f}")
    
    # Width distribution
    print(f"\n  Width percentiles:")
    for p in [10, 25, 50, 75, 90, 95, 99]: 
        val = np.percentile(widths, p)
        print(f"    {p}th:  {val:.1f}")
    
    # Zero-width boxes
    zero_width = (widths == 0).sum()
    if zero_width > 0:
        print(f"\n  ⚠️ WARNING: {zero_width} ({zero_width/len(widths)*100:.1f}%) boxes have ZERO width!")
    
    # Small boxes (COCO definition:  area < 32^2 = 1024, for 1D: width < 32)
    small_boxes = (widths < 32).sum()
    medium_boxes = ((widths >= 32) & (widths < 96)).sum()
    large_boxes = (widths >= 96).sum()
    
    print(f"\n  Box size distribution (COCO thresholds):")
    print(f"    Small (width < 32): {small_boxes} ({small_boxes/len(widths)*100:.1f}%)")
    print(f"    Medium (32 ≤ width < 96): {medium_boxes} ({medium_boxes/len(widths)*100:.1f}%)")
    print(f"    Large (width ≥ 96): {large_boxes} ({large_boxes/len(widths)*100:.1f}%)")
    
    # Category distribution
    print(f"\nCategory Distribution:")
    for cat_id, count in category_counts.most_common():
        cat_name = next((c['name'] for c in data['categories'] if c['id'] == cat_id), 'Unknown')
        print(f"  {cat_name} (ID {cat_id}): {count} ({count/num_annotations*100:.1f}%)")
    
    # Annotations per image
    ann_counts = list(annotations_per_image.values())
    print(f"\nAnnotations per Image:")
    print(f"  Min: {min(ann_counts)}")
    print(f"  Max: {max(ann_counts)}")
    print(f"  Mean:  {np.mean(ann_counts):.2f}")
    print(f"  Median: {np.median(ann_counts):.2f}")
    
    # Plot distributions
    fig, axes = plt. subplots(2, 2, figsize=(12, 10))
    
    # Width distribution
    axes[0, 0].hist(widths, bins=50, edgecolor='black')
    axes[0, 0].set_xlabel('Box Width')
    axes[0, 0].set_ylabel('Count')
    axes[0, 0].set_title('Box Width Distribution')
    axes[0, 0].axvline(32, color='r', linestyle='--', label='Small/Medium threshold')
    axes[0, 0].axvline(96, color='orange', linestyle='--', label='Medium/Large threshold')
    axes[0, 0].legend()
    
    # Category distribution
    cat_names = [c['name'][:15] for c in sorted(data['categories'], key=lambda x: x['id'])]
    cat_counts = [category_counts[c['id']] for c in sorted(data['categories'], key=lambda x: x['id'])]
    axes[0, 1].bar(range(len(cat_names)), cat_counts)
    axes[0, 1].set_xticks(range(len(cat_names)))
    axes[0, 1].set_xticklabels(cat_names, rotation=45, ha='right')
    axes[0, 1].set_ylabel('Count')
    axes[0, 1].set_title('Category Distribution')
    
    # Annotations per image
    axes[1, 0].hist(ann_counts, bins=30, edgecolor='black')
    axes[1, 0]. set_xlabel('Annotations per Image')
    axes[1, 0].set_ylabel('Count')
    axes[1, 0].set_title('Annotations per Image Distribution')
    
    # Cumulative width distribution
    sorted_widths = np.sort(widths)
    cumulative = np.arange(1, len(sorted_widths) + 1) / len(sorted_widths)
    axes[1, 1]. plot(sorted_widths, cumulative)
    axes[1, 1]. set_xlabel('Box Width')
    axes[1, 1]. set_ylabel('Cumulative Proportion')
    axes[1, 1].set_title('Cumulative Width Distribution')
    axes[1, 1].axvline(32, color='r', linestyle='--', alpha=0.5)
    axes[1, 1].axvline(96, color='orange', linestyle='--', alpha=0.5)
    axes[1, 1].grid(True, alpha=0.3)
    
    plt.tight_layout()
    plt.savefig('annotation_diagnostics.png', dpi=150, bbox_inches='tight')
    print(f"\n✓ Saved diagnostics plot to annotation_diagnostics.png")
    
    return {
        'num_annotations': num_annotations,
        'num_images': num_images,
        'avg_per_image': num_annotations / num_images,
        'zero_width':  zero_width,
        'small_ratio': small_boxes / len(widths),
        'category_counts': category_counts
    }

# Run diagnostics
stats = diagnose_annotations('/projects/pix2seqdata/tmp/ts_coco/annotations/instances_val.json')