## Package Installation

In [None]:
import os
import sys
import types

# Install core dependencies
!pip install easy-tpp lightning pytorch-lightning hydra-core omegaconf torchmetrics stribor -q

print("✓ Core dependencies installed")

[2K     [90m━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━[0m [32m44.9/44.9 kB[0m [31m2.2 MB/s[0m eta [36m0:00:00[0m
[2K   [90m━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━[0m [32m99.2/99.2 kB[0m [31m5.4 MB/s[0m eta [36m0:00:00[0m
[2K   [90m━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━[0m [32m846.0/846.0 kB[0m [31m23.6 MB/s[0m eta [36m0:00:00[0m
[2K   [90m━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━[0m [32m849.5/849.5 kB[0m [31m64.3 MB/s[0m eta [36m0:00:00[0m
[2K   [90m━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━[0m [32m154.5/154.5 kB[0m [31m16.4 MB/s[0m eta [36m0:00:00[0m
[2K   [90m━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━[0m [32m983.2/983.2 kB[0m [31m66.5 MB/s[0m eta [36m0:00:00[0m
[2K   [90m━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━[0m [32m57.7/57.7 kB[0m [31m5.8 MB/s[0m eta [36m0:00:00[0m
[?25h✓ Core dependencies installed


In [None]:
# Import required libraries
import os

# Set PyTorch memory allocation config BEFORE importing torch
# This helps prevent OOM errors with large models like AttNHP
os.environ['PYTORCH_CUDA_ALLOC_CONF'] = 'expandable_segments:True'

import yaml
import pandas as pd
import numpy as np
from datetime import datetime
from easy_tpp.config_factory import Config
from easy_tpp.runner import Runner
from google.colab import drive
import json
from IPython.display import display, clear_output
import time

In [None]:
# ==============================================================================
# FULLYNN GRADIENT FIX - Monkey Patch
# ==============================================================================
# FullyNN uses torch.autograd.grad() to compute intensity derivatives, but during
# validation/evaluation, EasyTPP runs under torch.no_grad() context which disables
# gradient tracking. This patch wraps the computation in torch.enable_grad().
# ==============================================================================

import torch
from easy_tpp.model.torch_model import torch_fullynn

def patched_compute_intensities_at_sample_times(self, time_seqs, time_delta_seqs, type_seqs, sample_dtimes, **kwargs):
    """Patched version that enables gradients during intensity computation.

    FullyNN requires gradient computation for intensity derivatives via autograd.grad().
    During validation, EasyTPP disables gradients with torch.no_grad(), causing failures.
    This patch wraps the computation in torch.enable_grad() to fix the issue.
    """
    compute_last_step_only = kwargs.get('compute_last_step_only', False)

    # Enable gradients for this computation even during evaluation
    with torch.enable_grad():
        # Forward pass to get hidden states
        hidden_states = self.forward(
            time_seqs=time_seqs,
            time_delta_seqs=time_delta_seqs,
            type_seqs=type_seqs,
        )

        num_samples = sample_dtimes.size()[-1]
        batch_size, seq_len, hidden_size = hidden_states.shape

        # Expand hidden states for all sample times
        hidden_states_ = hidden_states[..., None, :].expand(batch_size, seq_len, num_samples, hidden_size)

        # Clone sample_dtimes to avoid in-place modification issues
        sample_dtimes_grad = sample_dtimes.clone().detach().requires_grad_(True)

        # Compute intensities (this uses autograd.grad internally)
        _, derivative_integral_lambda = self.layer_intensity.forward(
            hidden_states=hidden_states_,
            time_delta_seqs=sample_dtimes_grad,
        )

    # Detach the result since we don't need gradients flowing back
    derivative_integral_lambda = derivative_integral_lambda.detach()

    if compute_last_step_only:
        return derivative_integral_lambda[:, -1:, :, :]
    return derivative_integral_lambda

# Apply the monkey patch
torch_fullynn.FullyNN.compute_intensities_at_sample_times = patched_compute_intensities_at_sample_times
print("✓ FullyNN patched for gradient computation during evaluation")
print("  This fixes the 'element 0 of tensors does not require grad' error")

✓ FullyNN patched for gradient computation during evaluation
  This fixes the 'element 0 of tensors does not require grad' error


In [None]:
# Mount Google Drive
drive.mount('/content/drive')

# Base directory (your Drive location)
BASE_DIR = '/content/drive/MyDrive/Colab Notebooks/MilestoneFall2025'
DATASET_DIR = os.path.join(BASE_DIR, 'Datasets')
CHECKPOINT_DIR = os.path.join(BASE_DIR, 'checkpoints')
RESULTS_DIR = os.path.join(BASE_DIR, 'results/Task3')

# Create results directory if it doesn't exist
os.makedirs(RESULTS_DIR, exist_ok=True)

print(f"✓ Base Directory: {BASE_DIR}")
print(f"✓ Dataset Directory: {DATASET_DIR}")
print(f"✓ Checkpoint Directory: {CHECKPOINT_DIR}")
print(f"✓ Results Directory: {RESULTS_DIR}")

Mounted at /content/drive
✓ Base Directory: /content/drive/MyDrive/Colab Notebooks/MilestoneFall2025
✓ Dataset Directory: /content/drive/MyDrive/Colab Notebooks/MilestoneFall2025/Datasets
✓ Checkpoint Directory: /content/drive/MyDrive/Colab Notebooks/MilestoneFall2025/checkpoints
✓ Results Directory: /content/drive/MyDrive/Colab Notebooks/MilestoneFall2025/results/Task3


## Configuration Dictionary

### Data Configuration

In [None]:
# Data specifications for each dataset
dataset_name = 'nonstationary_hawkes'
data_spec_dict = {
    dataset_name: {
        "data_format": "pkl",
        "train_dir": os.path.join(DATASET_DIR, dataset_name, "train.pkl"),
        "valid_dir": os.path.join(DATASET_DIR, dataset_name, "dev.pkl"),
        "test_dir": os.path.join(DATASET_DIR, dataset_name, "test.pkl"),
        "data_specs": {
            "num_event_types": 2,
            "pad_token_id": 2,
            "padding_side": "right",
            "max_seq_len": 20,
            "strict_pad_leng": True
        }
    }
}

print("✓ Data specifications loaded for Hawkes Stationary")

✓ Data specifications loaded for Hawkes Stationary


### Model Configuration

In [None]:
# Model specifications
model_spec_dict = {
    "RMTPP": {
        "model_id": "RMTPP",
        "hidden_size": 32,
        "time_embed_size": 16,
        "num_layers": 2,
        "dropout": 0.0,
        "use_ln": False,
        "loss_integral_num_sample_per_step": 20,
        "mc_num_sample_per_step": 20,
        "num_heads": 2,
        "seed": 2019,
        "thinning": {
            "num_seq": 10,
            "num_sample": 1,
            "num_exp": 500,
            "look_ahead_time": 10,
            "patience_counter": 5,
            "over_sample_rate": 5,
            "num_samples_boundary": 5,
            "dtime_max": 5
        }
    },
    "NHP": {
        "model_id": "NHP",
        "hidden_size": 32,
        "time_embed_size": 16,
        "num_layers": 2,
        "dropout": 0.0,
        "use_ln": False,
        "loss_integral_num_sample_per_step": 20,
        "mc_num_sample_per_step": 20,
        "num_heads": 2,
        "seed": 2019,
        "thinning": {
            "num_seq": 10,
            "num_sample": 1,
            "num_exp": 500,
            "look_ahead_time": 10,
            "patience_counter": 5,
            "over_sample_rate": 5,
            "num_samples_boundary": 5,
            "dtime_max": 5
        }
    },
    "FullyNN": {
        "model_id": "FullyNN",
        "hidden_size": 32,
        "time_embed_size": 16,
        "num_layers": 2,
        "dropout": 0.0,
        "use_ln": False,
        "num_heads": 2,
        "seed": 2019,
        "model_specs": {
            "num_mlp_layers": 3,
            "proper_marked_intensities": True
        },
        "thinning": {
            "num_seq": 10,
            "num_sample": 1,
            "num_exp": 500,
            "look_ahead_time": 10,
            "patience_counter": 5,
            "over_sample_rate": 5,
            "num_samples_boundary": 5,
            "dtime_max": 5
        }
    },
    "SAHP": {
        "model_id": "SAHP",
        "hidden_size": 32,
        "time_embed_size": 16,
        "num_layers": 2,
        "num_heads": 2,
        "mc_num_sample_per_step": 20,
        "sharing_param_layer": False,
        "loss_integral_num_sample_per_step": 20,
        "dropout": 0.0,
        "use_ln": False,
        "seed": 2019,
        "thinning": {
            "num_seq": 10,
            "num_sample": 1,
            "num_exp": 500,
            "look_ahead_time": 10,
            "patience_counter": 5,
            "over_sample_rate": 5,
            "num_samples_boundary": 5,
            "dtime_max": 5
        }
    },
    "THP": {
        "model_id": "THP",
        "hidden_size": 32,
        "time_embed_size": 16,
        "num_layers": 2,
        "num_heads": 2,
        "mc_num_sample_per_step": 20,
        "sharing_param_layer": False,
        "loss_integral_num_sample_per_step": 20,
        "dropout": 0.0,
        "use_ln": False,
        "seed": 2019,
        "thinning": {
            "num_seq": 10,
            "num_sample": 1,
            "num_exp": 500,
            "look_ahead_time": 10,
            "patience_counter": 5,
            "over_sample_rate": 5,
            "num_samples_boundary": 5,
            "dtime_max": 5
        }
    },
    "IntensityFree": {
        "model_id": "IntensityFree",
        "hidden_size": 32,
        "time_embed_size": 16,
        "num_layers": 2,
        "dropout": 0.0,
        "use_ln": False,
        "num_heads": 2,
        "seed": 2019,
        "sharing_param_layer": False,
        "loss_integral_num_sample_per_step": 20,
        "mc_num_sample_per_step": 20,
        "num_mix_components": 3,
        "model_specs": {
            "num_mix_components": 3
        },
        "thinning": {
            "num_seq": 10,
            "num_sample": 1,
            "num_exp": 500,
            "look_ahead_time": 10,
            "patience_counter": 5,
            "over_sample_rate": 5,
            "num_samples_boundary": 5,
            "dtime_max": 5,
            "num_step_gen": 10
        }
    },
    "AttNHP": {
        "model_id": "AttNHP",
        "hidden_size": 32,
        "time_embed_size": 16,
        "num_layers": 2,
        "num_heads": 4,
        "mc_num_sample_per_step": 20,
        "sharing_param_layer": False,
        "loss_integral_num_sample_per_step": 20,
        "dropout": 0.0,
        "use_ln": False,
        "seed": 2019,
        "thinning": {
            "num_seq": 10,
            "num_sample": 1,
            "num_exp": 500,
            "look_ahead_time": 10,
            "patience_counter": 5,
            "over_sample_rate": 5,
            "num_samples_boundary": 5,
            "dtime_max": 5
        }
    }
}

print("✓ Model specifications loaded for 7 models")

✓ Model specifications loaded for 7 models


### Training Configuration

In [None]:
# Trainer configuration (shared across all experiments)
trainer_config = {
    "batch_size": 256,
    "max_epoch": 200,
    "shuffle": False,
    "optimizer": "adam",
    "learning_rate": 1e-3,
    "valid_freq": 1,
    "use_tfb": False,
    "metrics": ["acc", "rmse"],
    "seed": 2019,
    "gpu": 0
}

print("✓ Trainer configuration loaded")
print(f"  - Batch size: {trainer_config['batch_size']}")
print(f"  - Max epochs: {trainer_config['max_epoch']}")
print(f"  - Learning rate: {trainer_config['learning_rate']}")

✓ Trainer configuration loaded
  - Batch size: 256
  - Max epochs: 200
  - Learning rate: 0.001


## Helper Functions

In [None]:
def create_experiment_config(model_id, data_id, data_spec, model_spec, trainer_cfg):
    """Create a complete experiment configuration."""
    experiment_id = f"{model_id}_{data_id}_train"

    config = {
        "pipeline_config_id": "runner_config",
        "data": {
            data_id: data_spec
        },
        experiment_id: {
            "base_config": {
                "stage": "train",
                "backend": "torch",
                "dataset_id": data_id,
                "runner_id": "std_tpp",
                "model_id": model_id,
                "base_dir": CHECKPOINT_DIR
            },
            "trainer_config": trainer_cfg,
            "model_config": model_spec
        }
    }

    return config, experiment_id

def extract_results_from_logs(log_path):
    """Extract best results from training logs.

    Args:
        log_path: Can be either a log file or a directory containing log files
    """
    try:
        # Determine if log_path is a file or directory
        if os.path.isfile(log_path):
            # It's a file - read it directly
            log_file = log_path
        elif os.path.isdir(log_path):
            # It's a directory - find .log files inside
            log_files = [f for f in os.listdir(log_path) if f.endswith('.log')]
            if not log_files:
                return None
            log_file = os.path.join(log_path, log_files[0])
        else:
            print(f"  Warning: Log path does not exist: {log_path}")
            return None

        # Read the log file
        with open(log_file, 'r') as f:
            lines = f.readlines()

        # Find the LAST test metrics (final epoch results)
        best_ll = None
        best_acc = None
        best_rmse = None

        for line in lines:
            if 'test loglike is' in line:
                try:
                    parts = line.split(',')
                    # Extract log-likelihood
                    ll_part = [p for p in parts if 'test loglike is' in p][0]
                    best_ll = float(ll_part.split('test loglike is')[1].strip())

                    # Extract accuracy
                    acc_parts = [p for p in parts if 'acc is' in p]
                    if acc_parts:
                        best_acc = float(acc_parts[0].split('acc is')[1].strip())

                    # Extract RMSE
                    rmse_parts = [p for p in parts if 'rmse is' in p]
                    if rmse_parts:
                        best_rmse = float(rmse_parts[0].split('rmse is')[1].strip())
                except (IndexError, ValueError) as e:
                    print(f"  Warning: Could not parse line: {e}")
                    continue

        if best_ll is None:
            return None

        return {
            'log_likelihood': best_ll,
            'accuracy': best_acc,
            'rmse': best_rmse
        }
    except Exception as e:
        print(f"  Warning: Could not extract results from logs: {e}")
        return None
def find_latest_checkpoint_dir(base_dir, model_id, data_id):
    """Find the most recent checkpoint directory for an experiment."""
    try:
        # EasyTPP creates directories with random names but puts
        # {model_id}_{data_id}_train_output.yaml inside them
        target_yaml = f"{model_id}_{data_id}_train_output.yaml"

        matching_dirs = []
        for d in os.listdir(base_dir):
            dir_path = os.path.join(base_dir, d)
            if os.path.isdir(dir_path):
                # Check if the target yaml file exists in this directory
                yaml_path = os.path.join(dir_path, target_yaml)
                if os.path.exists(yaml_path):
                    matching_dirs.append(dir_path)

        if not matching_dirs:
            return None

        # Return the most recently modified directory
        latest = max(matching_dirs, key=os.path.getmtime)
        return latest
    except Exception as e:
        print(f"  Warning: Error finding checkpoint dir: {e}")
        return None
print("✓ Helper functions loaded")

✓ Helper functions loaded


In [None]:
def run_model_on_all_datasets(
    model_id,
    model_number,
    total_models,
    aggressive_gpu_clearing=False
):
    """
    Run a single TPP model on all configured datasets.

    This function encapsulates the complete workflow for running a model:
    - Creates and saves experiment configs for each dataset
    - Runs training and evaluation
    - Extracts metrics from logs
    - Saves results (both intermediate and model-specific)
    - Handles errors gracefully
    - Manages GPU memory

    Parameters
    ----------
    model_id : str
        The model identifier (e.g., 'RMTPP', 'NHP', 'AttNHP')
        Must exist as a key in model_spec_dict
    model_number : int
        Current model number (1-7) for progress tracking
    total_models : int
        Total number of models (typically 7) for progress tracking
    aggressive_gpu_clearing : bool, default=False
        If True, performs GPU memory clearing after EACH dataset.
        Use this for memory-intensive models like AttNHP.
        If False, only clears GPU memory after all datasets complete.

    Returns
    -------
    model_results : list of dict
        List containing result dictionaries for each dataset.
        Each dict contains: model, dataset, log_likelihood, accuracy,
        rmse, status, and time_seconds

    Side Effects
    ------------
    - Appends results to the global results_list
    - Saves intermediate results to 'intermediate_results.csv' after each dataset
    - Saves model-specific results to '{model_id}_results.csv'
    - Creates experiment config files in the configs directory
    - Clears GPU memory (timing depends on aggressive_gpu_clearing flag)

    Notes
    -----
    This function relies on several global variables and functions:
    - data_spec_dict: Dictionary of dataset configurations
    - model_spec_dict: Dictionary of model configurations
    - trainer_config: Training configuration
    - results_list: Global list to store all results
    - total_experiments: Total number of experiments
    - BASE_DIR, CHECKPOINT_DIR, RESULTS_DIR: Directory paths
    - create_experiment_config(): Function to create config dicts
    - find_latest_checkpoint_dir(): Function to locate checkpoints
    - extract_results_from_logs(): Function to parse training logs

    Example
    -------
    >>> # Run RMTPP on all datasets (standard memory management)
    >>> rmtpp_results = run_model_on_all_datasets('RMTPP', 1, 7)

    >>> # Run AttNHP with aggressive memory clearing
    >>> attnhp_results = run_model_on_all_datasets('AttNHP', 7, 7,
    ...                                             aggressive_gpu_clearing=True)
    """
    import time
    import os
    import yaml
    import pandas as pd
    import torch
    import gc
    from easy_tpp.config_factory import Config
    from easy_tpp.runner import Runner

    print("\n" + "=" * 70)
    print(f"[MODEL {model_number}/{total_models}] Running {model_id} on all datasets")
    print("=" * 70)

    model_start_time = time.time()
    model_results = []

    for data_id, data_spec in data_spec_dict.items():
        experiment_name = f"{model_id}_{data_id}"
        experiment_num = len(results_list) + 1

        print(f"\n[{experiment_num}/{total_experiments}] Running {model_id} on {data_id} dataset")
        print("=" * 70)

        try:
            # Create config
            config_dict, experiment_id = create_experiment_config(
                model_id=model_id,
                data_id=data_id,
                data_spec=data_spec,
                model_spec=model_spec_dict[model_id],
                trainer_cfg=trainer_config
            )

            # Save config
            CONFIG_DIR = os.path.join(BASE_DIR, 'configs')
            os.makedirs(CONFIG_DIR, exist_ok=True)
            config_path = os.path.join(CONFIG_DIR, f"{experiment_name}.yaml")
            with open(config_path, 'w') as f:
                yaml.dump(config_dict, f)

            # Build and run
            config = Config.build_from_yaml_file(config_path, experiment_id=experiment_id)
            runner = Runner.build_from_config(config)
            runner.run()

            # Find checkpoint directory
            checkpoint_dir = find_latest_checkpoint_dir(CHECKPOINT_DIR, model_id, data_id)

            # Extract results from logs
            if checkpoint_dir:
                log_dir = os.path.join(checkpoint_dir, 'log')
                metrics = extract_results_from_logs(log_dir)

                if metrics:
                    result = {
                        'model': model_id,
                        'dataset': data_id,
                        'log_likelihood': metrics['log_likelihood'],
                        'accuracy': metrics['accuracy'],
                        'rmse': metrics['rmse'],
                        'status': 'success',
                        'time_seconds': time.time() - model_start_time
                    }
                    print(f"✓ {model_id} on {data_id} completed successfully!")
                    print(f"  - Log-Likelihood: {metrics['log_likelihood']:.4f}")
                    if metrics['accuracy'] and metrics['accuracy'] > 0:
                        print(f"  - Accuracy: {metrics['accuracy']:.4f}")
                    if metrics['rmse'] and metrics['rmse'] > 0:
                        print(f"  - RMSE: {metrics['rmse']:.4f}")
                else:
                    result = {
                        'model': model_id,
                        'dataset': data_id,
                        'log_likelihood': None,
                        'accuracy': None,
                        'rmse': None,
                        'status': 'completed_no_metrics',
                        'time_seconds': time.time() - model_start_time
                    }
                    print(f"⚠ {model_id} on {data_id} completed but no metrics found")
            else:
                result = {
                    'model': model_id,
                    'dataset': data_id,
                    'log_likelihood': None,
                    'accuracy': None,
                    'rmse': None,
                    'status': 'no_checkpoint',
                    'time_seconds': time.time() - model_start_time
                }
                print(f"⚠ {model_id} on {data_id} - no checkpoint directory found")

        except Exception as e:
            result = {
                'model': model_id,
                'dataset': data_id,
                'log_likelihood': None,
                'accuracy': None,
                'rmse': None,
                'status': 'failed',
                'error': str(e),
                'time_seconds': time.time() - model_start_time
            }
            print(f"✗ Error running {model_id} on {data_id}: {str(e)}")

        results_list.append(result)
        model_results.append(result)

        # Save intermediate results after each experiment
        intermediate_df = pd.DataFrame(results_list)
        intermediate_df.to_csv(os.path.join(RESULTS_DIR, 'intermediate_results.csv'), index=False)

        # Aggressive GPU memory clearing (for memory-intensive models like AttNHP)
        if aggressive_gpu_clearing:
            try:
                del runner
                del config
            except:
                pass
            gc.collect()
            if torch.cuda.is_available():
                torch.cuda.empty_cache()
                torch.cuda.synchronize()

    # Save model-specific results
    model_df = pd.DataFrame(model_results)
    model_csv_path = os.path.join(RESULTS_DIR, f'{model_id}_results.csv')
    model_df.to_csv(model_csv_path, index=False)

    # Model summary
    model_elapsed = time.time() - model_start_time
    successful = sum(1 for r in model_results if r['status'] == 'success')
    failed = sum(1 for r in model_results if r['status'] == 'failed')

    print(f"\n" + "=" * 70)
    print(f"✓ {model_id} completed on all datasets")
    print(f"  - Time: {model_elapsed/60:.1f} minutes")
    print(f"  - Successful: {successful}/{len(data_spec_dict)}")
    print(f"  - Failed: {failed}/{len(data_spec_dict)}")
    print(f"  - Results saved to: {model_csv_path}")
    print("=" * 70)

    # Clear GPU memory for next model
    if torch.cuda.is_available():
        torch.cuda.empty_cache()
        print(f"✓ GPU memory cleared for next model")

    return model_results

## Run Models On Data

In [None]:
# Initialize results storage
results_list = []
total_experiments = len(model_spec_dict) * len(data_spec_dict)

print(f"Starting {total_experiments} experiments...")
print(f"Running {len(model_spec_dict)} models on {len(data_spec_dict)} datasets")
print(f"Results will be saved incrementally to: {RESULTS_DIR}")
print("=" * 70)

Starting 7 experiments...
Running 7 models on 1 datasets
Results will be saved incrementally to: /content/drive/MyDrive/Colab Notebooks/MilestoneFall2025/results/Task3


In [None]:
model_ids = ['RMTPP', 'NHP', 'FullyNN', 'SAHP', 'THP', 'IntensityFree', 'AttNHP']
all_model_results = {}

for i, model_id in enumerate(model_ids, start=1):
    aggressive = (model_id == 'AttNHP')
    all_model_results[model_id] = run_model_on_all_datasets(
        model_id, i, len(model_ids), aggressive
    )

[1;30;43mStreaming output truncated to the last 5000 lines.[0m
[38;20m2025-12-27 17:49:03,484 - tpp_runner.py[pid:3157;line:60:_init_model] - INFO: Num of model parameters 2342[0m
[38;20m2025-12-27 17:49:03,928 - base_runner.py[pid:3157;line:98:train] - INFO: Data 'nonstationary_hawkes' loaded...[0m
[38;20m2025-12-27 17:49:03,929 - base_runner.py[pid:3157;line:103:train] - INFO: Start RMTPP training...[0m
[38;20m2025-12-27 17:49:05,106 - tpp_runner.py[pid:3157;line:96:_train_model] - INFO: [ Epoch 0 (train) ]: train loglike is -1.2063269993485592, num_events is 32620[0m
[38;20m2025-12-27 17:49:05,327 - tpp_runner.py[pid:3157;line:107:_train_model] - INFO: [ Epoch 0 (valid) ]:  valid loglike is -1.1100988263248848, num_events is 10850, acc is 0.49806451612903224, rmse is 0.9226806218331444[0m
[38;20m2025-12-27 17:49:05,355 - tpp_runner.py[pid:3157;line:122:_train_model] - INFO: [ Epoch 0 (test) ]: test loglike is -1.1090106285245447, num_events is 10817, acc is 0.4932975871

## Save Model Results

### Function Definition

In [None]:
import pandas as pd
from typing import Optional, List

def format_results_table(
    results_df: pd.DataFrame,
    models: Optional[List[str]] = None,
    datasets: Optional[List[str]] = None,
    log_likelihood_col: str = 'log_likelihood',
    rmse_col: str = 'rmse',
    accuracy_col: str = 'accuracy',
    type_error_from_accuracy: bool = True,
    format_type: str = 'combined'
) -> pd.DataFrame:
    """
    Parameters
    ----------
    results_df : pd.DataFrame
        DataFrame containing columns: 'model', 'dataset', 'log_likelihood', 'rmse', 'accuracy'
    models : List[str], optional
        List of model names to include. If None, uses all models in results_df
    datasets : List[str], optional
        List of dataset names to include. If None, uses all datasets in results_df
    log_likelihood_col : str, default='log_likelihood'
        Column name for log-likelihood values
    rmse_col : str, default='rmse'
        Column name for RMSE values
    accuracy_col : str, default='accuracy'
        Column name for accuracy values
    type_error_from_accuracy : bool, default=True
        If True, compute Type Error Rate as (1 - accuracy) * 100
    format_type : str, default='combined'
        Output format type:
        - 'combined': "log_like/rmse/type_error% ± std_log/std_rmse/std_type"
        - 'log_likelihood': "log_like ± std_log"
        - 'rmse': "rmse ± std_rmse"
        - 'type_error': "type_error% ± std_type"

    Returns
    -------
    pd.DataFrame
        Formatted table with rows as models and columns as datasets
    """
    # Filter models and datasets if specified
    if models is not None:
        results_df = results_df[results_df['model'].isin(models)].copy()
    if datasets is not None:
        results_df = results_df[results_df['dataset'].isin(datasets)].copy()

    # Compute type error rate if needed
    if type_error_from_accuracy:
        results_df['type_error_rate'] = (1 - results_df[accuracy_col]) * 100

    # Get unique models and datasets (sorted)
    all_models = sorted(results_df['model'].unique())
    all_datasets = sorted(results_df['dataset'].unique())

    # Compute mean and std for each model-dataset combination
    agg_dict = {
        log_likelihood_col: ['mean', 'std'],
        rmse_col: ['mean', 'std'],
        'type_error_rate': ['mean', 'std']
    }

    grouped = results_df.groupby(['model', 'dataset']).agg(agg_dict).reset_index()

    # Flatten column names
    grouped.columns = ['model', 'dataset',
                       'log_like_mean', 'log_like_std',
                       'rmse_mean', 'rmse_std',
                       'type_error_mean', 'type_error_std']

    # Fill NaN std with 0 (for single runs)
    grouped['log_like_std'] = grouped['log_like_std'].fillna(0)
    grouped['rmse_std'] = grouped['rmse_std'].fillna(0)
    grouped['type_error_std'] = grouped['type_error_std'].fillna(0)

    # Create formatted strings based on format_type
    def format_cell(row):
        """Format a single cell based on format_type - HANDLES PARTIAL NaN VALUES"""
        log_like_mean = row['log_like_mean']
        log_like_std = row['log_like_std']
        rmse_mean = row['rmse_mean']
        rmse_std = row['rmse_std']
        type_mean = row['type_error_mean']
        type_std = row['type_error_std']

        if format_type == 'log_likelihood':
            # Log-likelihood only format
            if pd.isna(log_like_mean):
                return 'NaN'
            if log_like_std == 0:
                return f"{log_like_mean:.3f}"
            return f"{log_like_mean:.3f} ± {log_like_std:.3f}"

        elif format_type == 'rmse':
            # RMSE only format
            if pd.isna(rmse_mean):
                return 'NaN'
            if rmse_std == 0:
                return f"{rmse_mean:.3f}"
            return f"{rmse_mean:.3f} ± {rmse_std:.3f}"

        elif format_type == 'type_error':
            # Type Error only format
            if pd.isna(type_mean):
                return 'NaN'
            if type_std == 0:
                return f"{type_mean:.1f}%"
            return f"{type_mean:.1f}% ± {type_std:.3f}"

        else:  # format_type == 'combined' (default)
            # FIXED: Show available metrics even when some are NaN
            # Build mean string
            log_str = f"{log_like_mean:.3f}" if not pd.isna(log_like_mean) else "NaN"
            rmse_str = f"{rmse_mean:.3f}" if not pd.isna(rmse_mean) else "NaN"
            type_str = f"{type_mean:.1f}%" if not pd.isna(type_mean) else "NaN%"
            mean_str = f"{log_str}/{rmse_str}/{type_str}"

            # Build std string (only if at least one mean is not NaN)
            if not pd.isna(log_like_mean) or not pd.isna(rmse_mean) or not pd.isna(type_mean):
                log_std_str = f"{log_like_std:.3f}" if not pd.isna(log_like_mean) else "nan"
                rmse_std_str = f"{rmse_std:.3f}" if not pd.isna(rmse_mean) else "nan"
                type_std_str = f"{type_std:.3f}" if not pd.isna(type_mean) else "nan"
                std_str = f"{log_std_str}/{rmse_std_str}/{type_std_str}"
                return f"{mean_str} ± {std_str}"
            else:
                # All NaN case (shouldn't happen often)
                return 'NaN/NaN/NaN%'

    grouped['formatted'] = grouped.apply(format_cell, axis=1)

    # Pivot to create the final table
    formatted_table = grouped.pivot(index='model', columns='dataset', values='formatted')

    # Ensure all models and datasets are present (fill missing with N/A)
    formatted_table = formatted_table.reindex(index=all_models, columns=all_datasets, fill_value='N/A')

    return formatted_table


def save_results_easytpp_format(
    results_df: pd.DataFrame,
    output_path: str,
    models: Optional[List[str]] = None,
    datasets: Optional[List[str]] = None,
    save_csv: bool = True,
    save_latex: bool = True,
    save_markdown: bool = True,
    save_individual_metrics: bool = True,
    **format_kwargs
) -> dict:
    """
    Save formatted results in multiple formats (CSV, LaTeX, Markdown).

    Parameters
    ----------
    results_df : pd.DataFrame
        Results DataFrame with columns: model, dataset, log_likelihood, rmse, accuracy
    output_path : str
        Base path for output files (without extension)
    models : List[str], optional
        List of models to include
    datasets : List[str], optional
        List of datasets to include
    save_csv : bool, default=True
        Save as CSV
    save_latex : bool, default=True
        Save as LaTeX table
    save_markdown : bool, default=True
        Save as Markdown table
    save_individual_metrics : bool, default=True
        Save separate tables for each metric (log-likelihood, RMSE, Type Error)
    **format_kwargs
        Additional keyword arguments passed to format_results_table

    Returns
    -------
    dict
        Dictionary containing all generated tables
    """
    output_tables = {}

    # Generate combined table (with log-likelihood/RMSE/Type Error)
    combined_table = format_results_table(
        results_df,
        models=models,
        datasets=datasets,
        format_type='combined',
        **format_kwargs
    )
    output_tables['combined'] = combined_table

    # Save combined table
    if save_csv:
        combined_table.to_csv(f"{output_path}.csv")
        print(f"✓ Results saved to: {output_path}.csv")

    if save_latex:
        with open(f"{output_path}.tex", 'w') as f:
            f.write(combined_table.to_latex())
        print(f"✓ LaTeX table saved to: {output_path}.tex")

    if save_markdown:
        with open(f"{output_path}.md", 'w') as f:
            f.write(combined_table.to_markdown())
        print(f"✓ Markdown table saved to: {output_path}.md")

    return output_tables


def create_comparison_table(
    stationary_results_df: pd.DataFrame,
    nonstationary_results_df: pd.DataFrame,
    output_path: str,
    models: Optional[List[str]] = None
) -> pd.DataFrame:
    """
    Create a side-by-side comparison table for stationary vs non-stationary results.

    Parameters
    ----------
    stationary_results_df : pd.DataFrame
        Results from stationary Hawkes data
    nonstationary_results_df : pd.DataFrame
        Results from non-stationary Hawkes data
    output_path : str
        Base path for output files
    models : List[str], optional
        List of models to include

    Returns
    -------
    pd.DataFrame
        Comparison table with degradation analysis
    """
    # Format both tables
    stat_table = format_results_table(stationary_results_df, models=models)
    nonstat_table = format_results_table(nonstationary_results_df, models=models)

    # Compute degradation statistics
    stat_grouped = stationary_results_df.groupby('model').agg({
        'log_likelihood': 'mean',
        'rmse': 'mean',
        'accuracy': lambda x: (1 - x.mean()) * 100
    }).rename(columns={
        'log_likelihood': 'stat_log_like',
        'rmse': 'stat_rmse',
        'accuracy': 'stat_type_error'
    })

    nonstat_grouped = nonstationary_results_df.groupby('model').agg({
        'log_likelihood': 'mean',
        'rmse': 'mean',
        'accuracy': lambda x: (1 - x.mean()) * 100
    }).rename(columns={
        'log_likelihood': 'nonstat_log_like',
        'rmse': 'nonstat_rmse',
        'accuracy': 'nonstat_type_error'
    })

    degradation = stat_grouped.join(nonstat_grouped)

    # Calculate degradation percentages
    degradation['log_like_degradation_%'] = (
        (degradation['nonstat_log_like'] - degradation['stat_log_like']) /
        abs(degradation['stat_log_like']) * 100
    )
    degradation['rmse_degradation_%'] = (
        (degradation['nonstat_rmse'] - degradation['stat_rmse']) /
        degradation['stat_rmse'] * 100
    )
    degradation['type_error_degradation_%'] = (
        degradation['nonstat_type_error'] - degradation['stat_type_error']
    )

    # Save comparison
    degradation.to_csv(f"{output_path}_degradation.csv")
    print(f"✓ Degradation analysis saved to: {output_path}_degradation.csv")

    return degradation

### Function Execution

In [None]:
model_ids = ['RMTPP', 'NHP', 'FullyNN', 'SAHP', 'THP', 'IntensityFree', 'AttNHP']
data_ids = ['nonstationary_hawkes']

In [None]:
import pandas as pd

def flatten_model_results(all_model_results):
    """
    Properly convert all_model_results dictionary to DataFrame.

    The issue: pd.DataFrame(dict) treats keys as COLUMN names, not data rows.
    This function flattens it correctly.
    """
    all_results = []
    for model_name, results_list in all_model_results.items():
        all_results.extend(results_list)
    return pd.DataFrame(all_results)

results_df = flatten_model_results(all_model_results)
results_df = results_df[
    results_df['status'] == 'success'
]

# Check the structure
print("✓ DataFrame created successfully!")
print(f"  Shape: {results_df.shape}")
print(f"  Columns: {list(results_df.columns)}")
print(f"\nFirst few rows:")
print(results_df.head())

formatted_table = format_results_table(
    results_df,
    models=model_ids,
    datasets=data_ids
)

print("\n✓ Formatted table:")
print(formatted_table)

# Save to files
saved_table = save_results_easytpp_format(
    results_df,
    output_path=f'{RESULTS_DIR}/Task3_results',
    models=model_ids,
    datasets=data_ids
)

✓ DataFrame created successfully!
  Shape: (6, 8)
  Columns: ['model', 'dataset', 'log_likelihood', 'accuracy', 'rmse', 'status', 'time_seconds', 'error']

First few rows:
     model               dataset  log_likelihood  accuracy      rmse   status  \
0    RMTPP  nonstationary_hawkes       -0.857463  0.728853  1.111820  success   
1      NHP  nonstationary_hawkes       -0.819737  0.721827  1.047376  success   
2  FullyNN  nonstationary_hawkes       -1.118245  0.502727  1.076472  success   
3     SAHP  nonstationary_hawkes       -0.877497  0.707313  1.008119  success   
4      THP  nonstationary_hawkes       -0.861670  0.726819  1.017348  success   

   time_seconds error  
0     50.829686   NaN  
1    189.605509   NaN  
2     53.524981   NaN  
3     18.506867   NaN  
4     17.769019   NaN  

✓ Formatted table:
dataset                          nonstationary_hawkes
model                                                
FullyNN        -1.118/1.076/49.7% ± 0.000/0.000/0.000
IntensityFree  

## Data Conversion: EasyTPP → Meta-TPP Format

Convert stationary Hawkes pickle files to npz format for Meta-TPP:
- **EasyTPP format**: `{time_seqs: List[List[float]], type_seqs: List[List[int]]}`
- **Meta-TPP format**: `.npz` with `TIMES`, `MARKS`, `MASKS` arrays

In [None]:
import pickle
import numpy as np
import json
import os


def convert_sparklen_hawkes_to_metatpp(dataset_dir, output_dir, dataset_name='stationary_hawkes'):
    """
    Convert Sparklen-generated Hawkes dataset to Meta-TPP format.

    Your data format (from Sparklen):
    {
        'dim_process': 2,
        'train': [
            [
                {'idx_event': 1, 'type_event': 0, 'time_since_start': 0.876, ...},
                {'idx_event': 2, 'type_event': 1, 'time_since_start': 1.883, ...},
                ...
            ],
            ...
        ]
    }
    """
    os.makedirs(output_dir, exist_ok=True)

    print("="*70)
    print(f"Converting {dataset_name} to Meta-TPP format")
    print("="*70)

    # Define file paths and their corresponding keys
    splits = {
        'train': ('train.pkl', 'train'),
        'dev': ('dev.pkl', 'dev'),
        'test': ('test.pkl', 'test')
    }

    all_times = []
    all_marks = []
    all_masks = []
    split_indices = {}

    current_idx = 0

    for split_name, (filename, data_key) in splits.items():
        pickle_path = os.path.join(dataset_dir, filename)

        print(f"\n{split_name.upper()} split:")

        # Load pickle file
        with open(pickle_path, 'rb') as f:
            data = pickle.load(f)

        # Extract sequences
        sequences = data[data_key]
        num_seqs = len(sequences)

        # Extract times and marks from event dictionaries
        time_seqs = []
        type_seqs = []

        for seq in sequences:
            # Each seq is a list of event dicts
            times = [event['time_since_start'] for event in seq]
            marks = [event['type_event'] for event in seq]

            time_seqs.append(times)
            type_seqs.append(marks)

        # Find max sequence length
        max_len = max(len(seq) for seq in time_seqs)

        # Create padded arrays
        times = np.zeros((num_seqs, max_len), dtype=np.float32)
        marks = np.zeros((num_seqs, max_len), dtype=np.int64)
        masks = np.zeros((num_seqs, max_len), dtype=np.float32)

        for i, (time_seq, type_seq) in enumerate(zip(time_seqs, type_seqs)):
            seq_len = len(time_seq)
            times[i, :seq_len] = time_seq
            marks[i, :seq_len] = type_seq
            masks[i, :seq_len] = 1.0

        all_times.append(times)
        all_marks.append(marks)
        all_masks.append(masks)

        split_indices[split_name] = (current_idx, current_idx + num_seqs)
        current_idx += num_seqs

        print(f"  - Sequences: {num_seqs}")
        print(f"  - Max length: {max_len}")
        print(f"  - Avg length: {np.mean([len(seq) for seq in time_seqs]):.1f}")

    # Pad to global max length
    global_max_len = max(arr.shape[1] for arr in all_times)

    def pad_to_length(arrays, target_len):
        padded = []
        for arr in arrays:
            if arr.shape[1] < target_len:
                pad_width = ((0, 0), (0, target_len - arr.shape[1]))
                padded.append(np.pad(arr, pad_width, mode='constant', constant_values=0))
            else:
                padded.append(arr)
        return padded

    all_times = pad_to_length(all_times, global_max_len)
    all_marks = pad_to_length(all_marks, global_max_len)
    all_masks = pad_to_length(all_masks, global_max_len)

    # Concatenate
    times_combined = np.concatenate(all_times, axis=0)
    marks_combined = np.concatenate(all_marks, axis=0)
    masks_combined = np.concatenate(all_masks, axis=0)

    # Save npz
    output_path = os.path.join(output_dir, f'{dataset_name}.npz')
    np.savez(
        output_path,
        times=times_combined,
        marks=marks_combined,
        masks=masks_combined
    )

    # Save split info
    split_info_path = os.path.join(output_dir, f'{dataset_name}_split_info.json')
    with open(split_info_path, 'w') as f:
        json.dump({
            'train': split_indices['train'],
            'val': split_indices['dev'],
            'test': split_indices['test'],
            'num_event_types': int(data['dim_process']),
            'global_max_len': int(global_max_len)
        }, f, indent=2)

    print(f"\n{'='*70}")
    print(f"✓ Conversion complete!")
    print(f"  - Output NPZ: {output_path}")
    print(f"  - Split info: {split_info_path}")
    print(f"  - Total sequences: {times_combined.shape[0]}")
    print(f"  - Sequence length: {times_combined.shape[1]}")
    print(f"  - Train: {split_indices['train']}")
    print(f"  - Val: {split_indices['dev']}")
    print(f"  - Test: {split_indices['test']}")
    print("="*70)

    return output_path


# ============================================================================
# RUN CONVERSION
# ============================================================================

# Convert the stationary Hawkes dataset
hawkes_input_dir = os.path.join(DATASET_DIR, dataset_name)
metatpp_dataset_dir = os.path.join(DATASET_DIR, 'metatpp_format')

# Convert the stationary Hawkes dataset
converted_npz_path = convert_sparklen_hawkes_to_metatpp(
    dataset_dir=hawkes_input_dir,
    output_dir=metatpp_dataset_dir,
    dataset_name=dataset_name
)

Converting nonstationary_hawkes to Meta-TPP format

TRAIN split:
  - Sequences: 300
  - Max length: 163
  - Avg length: 109.7

DEV split:
  - Sequences: 100
  - Max length: 161
  - Avg length: 109.5

TEST split:
  - Sequences: 100
  - Max length: 152
  - Avg length: 109.2

✓ Conversion complete!
  - Output NPZ: /content/drive/MyDrive/Colab Notebooks/MilestoneFall2025/Datasets/metatpp_format/nonstationary_hawkes.npz
  - Split info: /content/drive/MyDrive/Colab Notebooks/MilestoneFall2025/Datasets/metatpp_format/nonstationary_hawkes_split_info.json
  - Total sequences: 500
  - Sequence length: 163
  - Train: (0, 300)
  - Val: (300, 400)
  - Test: (400, 500)


## Meta TPP

### Cell 1: Install Dependencies (CRITICAL - Run First, Then Restart Runtime)

In [None]:
# ==============================================================================
# CELL 1: Install Dependencies with Correct Versions
# ==============================================================================
# IMPORTANT: After running this cell, you MUST restart the runtime!
# Runtime → Restart runtime (or Ctrl+M .)

# Uninstall any existing stribor (Colab installs 0.2.0 which breaks)
!pip uninstall stribor -y -q 2>/dev/null

# Install stribor 0.1.0 (does NOT require torchtyping)
!pip install stribor==0.1.0 -q

# Install nfe (neural-flows-experiments) from GitHub
!SKLEARN_ALLOW_DEPRECATED_SKLEARN_PACKAGE_INSTALL=True pip install git+https://github.com/mbilos/neural-flows-experiments -q

# Install other Meta-TPP dependencies
!pip install lightning pytorch-lightning hydra-core omegaconf hydra-colorlog pyrootutils torchmetrics rich -q

# Verify stribor version
import subprocess
result = subprocess.run(['pip', 'show', 'stribor'], capture_output=True, text=True)
version_line = [l for l in result.stdout.split('\n') if 'Version' in l][0]
print(f"✓ Installed: stribor {version_line.split(': ')[1]}")

print("\n" + "=" * 70)
print("⚠️  RESTART RUNTIME NOW: Runtime → Restart runtime")
print("   Then continue to Cell 2 (do NOT re-run this cell)")
print("=" * 70)

[2K   [90m━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━[0m [32m54.9/54.9 kB[0m [31m2.7 MB/s[0m eta [36m0:00:00[0m
[?25h  Preparing metadata (setup.py) ... [?25l[?25hdone
  Preparing metadata (setup.py) ... [?25l[?25hdone
[2K     [90m━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━[0m [32m2.2/2.2 MB[0m [31m26.5 MB/s[0m eta [36m0:00:00[0m
[?25h  Preparing metadata (setup.py) ... [?25l[?25hdone
  Building wheel for nfe (setup.py) ... [?25l[?25hdone
  Building wheel for reverse_geocoder (setup.py) ... [?25l[?25hdone
  Building wheel for sklearn (setup.py) ... [?25l[?25hdone
✓ Installed: stribor 0.1.0

⚠️  RESTART RUNTIME NOW: Runtime → Restart runtime
   Then continue to Cell 2 (do NOT re-run this cell)


### Cell 2: Verify Imports (Run After Restart)

In [None]:
# ==============================================================================
# CELL 2: Verify Imports After Runtime Restart
# ==============================================================================
# Run this AFTER restarting the runtime

import stribor

# This import should now work without torchtyping errors
import torch
print(f"✓ PyTorch version: {torch.__version__}")

# Test that stribor's distributions work
try:
    from stribor.dist import Normal
    print("✓ stribor.dist imports work")
except Exception as e:
    print(f"✗ Error: {e}")

print("\n✓ All imports successful - ready to proceed!")

✓ PyTorch version: 2.9.0+cu126
✓ stribor.dist imports work

✓ All imports successful - ready to proceed!


### Cell 3: Setup Paths and Clone Meta-TPP

In [None]:
# ==============================================================================
# CELL 3: Setup Paths and Clone Meta-TPP
# ==============================================================================
import os
import sys

# Mount Google Drive
from google.colab import drive
drive.mount('/content/drive')

# Base directories
BASE_DIR = '/content/drive/MyDrive/Colab Notebooks/MilestoneFall2025'
DATASET_DIR = os.path.join(BASE_DIR, 'Datasets')
RESULTS_DIR = os.path.join(BASE_DIR, 'results/Task3')
METATPP_DIR = '/content/meta-tpp'

os.makedirs(RESULTS_DIR, exist_ok=True)

# Clone Meta-TPP
if not os.path.exists(METATPP_DIR):
    !git clone https://github.com/BorealisAI/meta-tpp.git {METATPP_DIR}
    print("✓ Meta-TPP cloned")
else:
    print("✓ Meta-TPP already exists")

# Add to Python path
if METATPP_DIR not in sys.path:
    sys.path.insert(0, METATPP_DIR)

# Change to Meta-TPP directory (required for Hydra)
os.chdir(METATPP_DIR)
print(f"✓ Working directory: {os.getcwd()}")

# Create data directories
os.makedirs(os.path.join(METATPP_DIR, 'data', 'synthetic'), exist_ok=True)
print("✓ Data directories ready")

# ==============================================================================
# Create .project-root indicator file (required by pyrootutils)
# ==============================================================================
project_root_file = os.path.join(METATPP_DIR, '.project-root')
if not os.path.exists(project_root_file):
    with open(project_root_file, 'w') as f:
        f.write('')  # Empty file is fine, just needs to exist
    print(f"✓ Created .project-root indicator file")
else:
    print(f"✓ .project-root already exists")

Drive already mounted at /content/drive; to attempt to forcibly remount, call drive.mount("/content/drive", force_remount=True).
Cloning into '/content/meta-tpp'...
remote: Enumerating objects: 3610, done.[K
remote: Counting objects: 100% (226/226), done.[K
remote: Compressing objects: 100% (67/67), done.[K
remote: Total 3610 (delta 159), reused 211 (delta 155), pack-reused 3384 (from 1)[K
Receiving objects: 100% (3610/3610), 3.45 MiB | 19.85 MiB/s, done.
Resolving deltas: 100% (2132/2132), done.
✓ Meta-TPP cloned
✓ Working directory: /content/meta-tpp
✓ Data directories ready
✓ Created .project-root indicator file


### Cell 4: Convert Stationary Hawkes Data to Meta-TPP Format

In [None]:
# ==============================================================================
# CELL 4: Convert Stationary Hawkes Data to Meta-TPP Format
# ==============================================================================
import numpy as np
import pickle

def convert_hawkes_to_metatpp_format(dataset_dir, output_path, dataset_name='nonstationary_hawkes'):
    """
    Convert Sparklen-generated Hawkes pickle files to Meta-TPP npz format.

    Meta-TPP expects:
    - Single npz file with all sequences
    - Keys: 'times', 'marks', 'masks'
    - Shape: (num_sequences, max_seq_len)
    - Split by indices: 60% train, 20% val, 20% test (or custom)
    """
    print("=" * 70)
    print(f"Converting {dataset_name} to Meta-TPP format")
    print("=" * 70)

    all_times = []
    all_marks = []
    splits = ['train', 'dev', 'test']
    split_files = {'train': 'train.pkl', 'dev': 'dev.pkl', 'test': 'test.pkl'}

    for split in splits:
        pkl_path = os.path.join(dataset_dir, split_files[split])
        print(f"\nLoading {split}...")

        with open(pkl_path, 'rb') as f:
            data = pickle.load(f)

        # Handle different pickle structures
        if split in data:
            sequences = data[split]
        elif 'train' in data or 'dev' in data or 'test' in data:
            sequences = data.get(split, data.get('train', []))
        else:
            sequences = data if isinstance(data, list) else list(data.values())[0]

        print(f"  - {len(sequences)} sequences")

        for seq in sequences:
            if isinstance(seq, list) and len(seq) > 0 and isinstance(seq[0], dict):
                # Format: [{'time_since_start': t, 'type_event': m}, ...]
                times = [event['time_since_start'] for event in seq]
                marks = [event['type_event'] for event in seq]
            else:
                # Assume it's already (times, marks) format
                times = [e[0] if isinstance(e, (list, tuple)) else e for e in seq]
                marks = [e[1] if isinstance(e, (list, tuple)) else 0 for e in seq]

            all_times.append(times)
            all_marks.append(marks)

    # Find max sequence length
    max_len = max(len(t) for t in all_times)
    num_seqs = len(all_times)

    print(f"\n  Total sequences: {num_seqs}")
    print(f"  Max sequence length: {max_len}")

    # Create padded arrays
    times_arr = np.zeros((num_seqs, max_len), dtype=np.float32)
    marks_arr = np.zeros((num_seqs, max_len), dtype=np.int64)
    masks_arr = np.zeros((num_seqs, max_len), dtype=np.float32)

    for i, (t, m) in enumerate(zip(all_times, all_marks)):
        seq_len = len(t)
        times_arr[i, :seq_len] = t
        marks_arr[i, :seq_len] = m
        masks_arr[i, :seq_len] = 1.0

    # Save as npz
    np.savez(output_path, times=times_arr, marks=marks_arr, masks=masks_arr)

    print(f"\n✓ Saved to: {output_path}")
    print(f"  - times: {times_arr.shape}, dtype={times_arr.dtype}")
    print(f"  - marks: {marks_arr.shape}, dtype={marks_arr.dtype}")
    print(f"  - masks: {masks_arr.shape}, dtype={masks_arr.dtype}")
    print(f"  - Split: [0:{int(num_seqs*0.6)}] train, [{int(num_seqs*0.6)}:{int(num_seqs*0.8)}] val, [{int(num_seqs*0.8)}:] test")
    print("=" * 70)

    return output_path

# Convert the data
hawkes_input_dir = os.path.join(DATASET_DIR, dataset_name)
metatpp_data_path = os.path.join(METATPP_DIR, 'data', 'synthetic', 'nonstationary_hawkes.npz')

convert_hawkes_to_metatpp_format(hawkes_input_dir, metatpp_data_path)

# Verify
data = np.load(metatpp_data_path)
print("\n✓ Verification:")
for key in data.keys():
    print(f"  {key}: shape={data[key].shape}, dtype={data[key].dtype}")

Converting nonstationary_hawkes to Meta-TPP format

Loading train...
  - 300 sequences

Loading dev...
  - 100 sequences

Loading test...
  - 100 sequences

  Total sequences: 500
  Max sequence length: 163

✓ Saved to: /content/meta-tpp/data/synthetic/nonstationary_hawkes.npz
  - times: (500, 163), dtype=float32
  - marks: (500, 163), dtype=int64
  - masks: (500, 163), dtype=float32
  - Split: [0:300] train, [300:400] val, [400:] test

✓ Verification:
  times: shape=(500, 163), dtype=float32
  marks: shape=(500, 163), dtype=int64
  masks: shape=(500, 163), dtype=float32


### Cell 5: Create Dataset Config and Patch TPPDataset

In [None]:
# ==============================================================================
# CELL 5: Create Dataset Config and Register Dataset
# ==============================================================================
import yaml

# Create dataset config YAML
dataset_config = {
    'dataset': dataset_name,
    'num_classes': 2
}

config_dir = os.path.join(METATPP_DIR, 'configs', 'data', 'datasets')
config_path = os.path.join(config_dir, 'nonstationary_hawkes.yaml')

with open(config_path, 'w') as f:
    yaml.dump(dataset_config, f)

print(f"✓ Created dataset config: {config_path}")

# Patch TPPDataset to include our new dataset
tpp_dataset_path = os.path.join(METATPP_DIR, 'src', 'data', 'tpp_dataset.py')

with open(tpp_dataset_path, 'r') as f:
    content = f.read()

# Add nonstationary_hawkes to synthetic_data list if not already there
if 'nonstationary_hawkes' not in content:
    old_line = "synthetic_data = ['sin', 'hawkes_univariate', 'hawkes_multivariate_3d', \"hawkes_regime_changes\"]"
    new_line = "synthetic_data = ['sin', 'hawkes_univariate', 'hawkes_multivariate_3d', \"hawkes_regime_changes\", 'nonstationary_hawkes']"

    if old_line in content:
        content = content.replace(old_line, new_line)
        with open(tpp_dataset_path, 'w') as f:
            f.write(content)
        print("✓ Added 'nonstationary_hawkes' to synthetic_data list")
    else:
        # Try alternative format
        import re
        pattern = r"(synthetic_data\s*=\s*\[.*?\])"
        match = re.search(pattern, content, re.DOTALL)
        if match:
            old = match.group(1)
            if 'nonstationary_hawkes' not in old:
                new = old.rstrip(']') + ", 'nonstationary_hawkes']"
                content = content.replace(old, new)
                with open(tpp_dataset_path, 'w') as f:
                    f.write(content)
                print("✓ Added 'nonstationary_hawkes' to synthetic_data list")
else:
    print("✓ 'nonstationary_hawkes' already registered")

# Reload the module
import importlib
if 'src.data.tpp_dataset' in sys.modules:
    del sys.modules['src.data.tpp_dataset']

print("✓ Dataset configuration complete")

✓ Created dataset config: /content/meta-tpp/configs/data/datasets/nonstationary_hawkes.yaml
✓ Added 'nonstationary_hawkes' to synthetic_data list
✓ Dataset configuration complete


### Cell 6: Test Data Loading

In [None]:
# ==============================================================================
# CELL 6: Test Data Loading
# ==============================================================================
# Verify the data can be loaded by Meta-TPP's dataset class

from src.data.tpp_dataset import TPPDataset

# Test loading
data_dir = os.path.join(METATPP_DIR, 'data')

print("Testing data loading...")
try:
    train_ds = TPPDataset(data_dir, dataset_name, num_classes=2, mode='train')
    val_ds = TPPDataset(data_dir, dataset_name, num_classes=2, mode='val')
    test_ds = TPPDataset(data_dir, dataset_name, num_classes=2, mode='test')

    print(f"\n✓ Train: {len(train_ds)} sequences")
    print(f"✓ Val: {len(val_ds)} sequences")
    print(f"✓ Test: {len(test_ds)} sequences")

    # Test a sample
    sample = train_ds[0]
    print(f"\nSample batch keys: {list(sample.keys())}")
    print(f"  times shape: {sample['times'].shape}")
    print(f"  marks shape: {sample['marks'].shape}")
    print(f"  masks shape: {sample['masks'].shape}")

    print("\n✓ Data loading test passed!")
except Exception as e:
    print(f"✗ Error: {e}")
    import traceback
    traceback.print_exc()

Testing data loading...
time shape: torch.Size([300, 163, 1]), marks shape: torch.Size([300, 163, 1]), masks shape: torch.Size([300, 163, 1])
time shape: torch.Size([100, 163, 1]), marks shape: torch.Size([100, 163, 1]), masks shape: torch.Size([100, 163, 1])
time shape: torch.Size([100, 163, 1]), marks shape: torch.Size([100, 163, 1]), masks shape: torch.Size([100, 163, 1])

✓ Train: 300 sequences
✓ Val: 100 sequences
✓ Test: 100 sequences

Sample batch keys: ['times', 'marks', 'masks', 'missing_masks']
  times shape: torch.Size([163, 1])
  marks shape: torch.Size([163, 1])
  masks shape: torch.Size([163, 1])

✓ Data loading test passed!


### Cell 7: Train Meta-TPP Using Hydra

In [None]:
# ==============================================================================
# CELL 7: Train Meta-TPP Using Hydra
# ==============================================================================
import subprocess
import os

# Ensure we're in the Meta-TPP directory
os.chdir(METATPP_DIR)

# Training command using Hydra
# This replicates: python src/train.py data/datasets=nonstationary_hawkes
cmd = [
    'python', 'src/train.py',
    'data/datasets=nonstationary_hawkes',
    'model=thp_mix',
    'trainer.max_epochs=200',
    'trainer.accelerator=gpu',
    'trainer.devices=1',
    'data.batch_size=256',
    'tag=colab_run',
    'test=False',
    'seed=42'
]

print("=" * 70)
print("Starting Meta-TPP Training")
print("=" * 70)
print(f"Command: {' '.join(cmd)}")
print("=" * 70 + "\n")

# Run training
process = subprocess.Popen(
    cmd,
    stdout=subprocess.PIPE,
    stderr=subprocess.STDOUT,
    text=True,
    bufsize=1
)

# Stream output
for line in iter(process.stdout.readline, ''):
    print(line, end='')

process.wait()

if process.returncode == 0:
    print("\n" + "=" * 70)
    print("✓ Training completed successfully!")
    print("=" * 70)
else:
    print(f"\n✗ Training failed with return code: {process.returncode}")

Starting Meta-TPP Training
Command: python src/train.py data/datasets=nonstationary_hawkes model=thp_mix trainer.max_epochs=200 trainer.accelerator=gpu trainer.devices=1 data.batch_size=256 tag=colab_run test=False seed=42

[[36m2025-12-27 17:57:14,653[0m][[34msrc.utils.utils[0m][[32mINFO[0m] - Enforcing tags! <cfg.extras.enforce_tags=True>[0m
[[36m2025-12-27 17:57:14,657[0m][[34msrc.utils.utils[0m][[32mINFO[0m] - Printing config tree with Rich! <cfg.extras.print_config=True>[0m
CONFIG
├── data
│   └── _target_: src.data.tpp_dataset.TPPDataModule                            
│       data_dir: /content/meta-tpp/data/                                       
│       batch_size: 256                                                         
│       num_workers: 4                                                          
│       pin_memory: false                                                       
│       train_rate: 1.0                                                         


In [None]:
# ==============================================================================
# CELL 7b: Manual Testing with proper optimizer setup
# ==============================================================================
import torch
import lightning as L
from torch.utils.data import DataLoader
from functools import partial

# Fix for PyTorch 2.6 checkpoint loading
from omegaconf import ListConfig, DictConfig
torch.serialization.add_safe_globals([ListConfig, DictConfig])

from src.data.tpp_dataset import TPPDataset
from src.models.tpp_module import TPPLitModule
from src.models.tpp.tpp_network import TransformerMix

# Find the best checkpoint
import glob
ckpt_pattern = os.path.join(METATPP_DIR, 'logs', 'nonstationary_hawkes_thp_mix_colab_run', 'seed42', 'checkpoints', '*.ckpt')
ckpts = glob.glob(ckpt_pattern)
ckpts = [c for c in ckpts if 'last' not in c]
best_ckpt = sorted(ckpts)[-1] if ckpts else None

print(f"Best checkpoint: {best_ckpt}")

# Load data
data_dir = os.path.join(METATPP_DIR, 'data')
test_ds = TPPDataset(data_dir, dataset_name, num_classes=2, mode='test')
test_loader = DataLoader(test_ds, batch_size=16, shuffle=False, num_workers=2)

# Create model
net = TransformerMix(
    name='thp_mix', num_classes=2, d_model=64, d_inner=64, n_layers=2,
    cattn_n_layers=1, n_head=1, d_k=64, d_v=64, dropout=0.1,
    attn_l=0, base_l=0, perm_invar=False, use_avg=True, share_weights=True,
    attn_only=False, concat=False, num_latent=64, vi_method=None,
    num_z_samples=32, compute_acc=True,
    activation={'name': 'tanh', 'in_features': 64, 'trainable': True}
)

# Create optimizer as a partial function (this is what Hydra does)
optimizer = partial(
    torch.optim.Adam,
    lr=0.0001,
    weight_decay=0.0001,
    eps=1e-5,
    betas=(0.9, 0.999)
)

model = TPPLitModule(net=net, optimizer=optimizer)

# Load checkpoint
if best_ckpt:
    ckpt = torch.load(best_ckpt, map_location='cuda', weights_only=False)
    model.load_state_dict(ckpt['state_dict'])
    print("✓ Checkpoint loaded")

# Set model to eval mode
model.eval()

# Test
trainer = L.Trainer(accelerator='gpu', devices=1, logger=False, enable_progress_bar=True)
test_results = trainer.test(model, test_loader)

print("\n" + "=" * 70)
print("Meta-TPP Test Results")
print("=" * 70)
for k, v in test_results[0].items():
    print(f"  {k}: {v:.4f}")


Meta-TPP Test Results
  test/nll: 2.7889
  test/rmse: 20.2164
  test/acc: 0.5061


### Cell 8: Extract and Save Results

In [None]:
# ==============================================================================
# CELL 9: Extract and Save Results Properly
# ==============================================================================
import pandas as pd

# test_results is returned from trainer.test() - it's a list of dicts
# Print what we actually got to debug
print("=" * 70)
print("Raw test_results:")
print(test_results)
print("=" * 70)

# Extract metrics from test_results (it's a list with one dict)
if test_results and len(test_results) > 0:
    metrics = test_results[0]

    # Meta-TPP uses keys like 'test/nll', 'test/rmse', 'test/acc'
    # or 'test_nll', 'test_rmse', 'test_acc'

    # Try different possible key formats
    def get_metric(metrics, *possible_keys):
        for key in possible_keys:
            if key in metrics:
                val = metrics[key]
                # Handle tensor values
                if hasattr(val, 'item'):
                    return val.item()
                return val
        return float('nan')

    nll = get_metric(metrics, 'test/nll', 'test_nll', 'test_loss')
    rmse = get_metric(metrics, 'test/rmse', 'test_rmse')
    acc = get_metric(metrics, 'test/acc', 'test_acc', 'test_accuracy')

    # Log-likelihood is negative NLL
    log_likelihood = -nll if not pd.isna(nll) else float('nan')

    print(f"\nExtracted metrics:")
    print(f"  NLL: {nll}")
    print(f"  Log-Likelihood: {log_likelihood}")
    print(f"  RMSE: {rmse}")
    print(f"  Accuracy: {acc}")
else:
    print("No test results found!")
    log_likelihood = float('nan')
    acc = float('nan')
    rmse = float('nan')

# Create result entry
metatpp_result = {
    'model': 'Meta-TPP',
    'dataset': 'nonstationary_hawkes',
    'log_likelihood': log_likelihood,
    'accuracy': acc * 100 if not pd.isna(acc) and acc <= 1 else acc,  # Convert to percentage if needed
    'rmse': rmse,
    'status': 'success'
}

# Save results
results_df = pd.DataFrame([metatpp_result])
metatpp_results_path = os.path.join(RESULTS_DIR, 'metatpp_stationary_results.csv')
results_df.to_csv(metatpp_results_path, index=False)

print("\n" + "=" * 70)
print("Meta-TPP Results (Task 3b)")
print("=" * 70)
print(results_df.to_string(index=False))
print(f"\n✓ Saved to: {metatpp_results_path}")

# Also print in Table-1-like format for easy comparison
print("\n" + "=" * 70)
print("Table-1-like Format:")
print("=" * 70)
print(f"Model: Meta-TPP")
print(f"Dataset: nonstationary_hawkes")
print(f"Log-Likelihood: {log_likelihood:.3f}" if not pd.isna(log_likelihood) else "Log-Likelihood: N/A")
print(f"Accuracy: {metatpp_result['accuracy']:.1f}%" if not pd.isna(metatpp_result['accuracy']) else "Accuracy: N/A")
print(f"RMSE: {rmse:.3f}" if not pd.isna(rmse) else "RMSE: N/A")

Raw test_results:
[{'test/nll': 2.7888832092285156, 'test/rmse': 20.21638298034668, 'test/acc': 0.5061477422714233}]

Extracted metrics:
  NLL: 2.7888832092285156
  Log-Likelihood: -2.7888832092285156
  RMSE: 20.21638298034668
  Accuracy: 0.5061477422714233

Meta-TPP Results (Task 3b)
   model              dataset  log_likelihood  accuracy      rmse  status
Meta-TPP nonstationary_hawkes       -2.788883 50.614774 20.216383 success

✓ Saved to: /content/drive/MyDrive/Colab Notebooks/MilestoneFall2025/results/Task3/metatpp_stationary_results.csv

Table-1-like Format:
Model: Meta-TPP
Dataset: nonstationary_hawkes
Log-Likelihood: -2.789
Accuracy: 50.6%
RMSE: 20.216
