# Chronos Bolt on MUSED-FM using run_musedfm Functions

This notebook demonstrates how to run Chronos Bolt on the MUSED-FM benchmark using the functions from `run_musedfm.py`. This simplifies the outputs to make easier 

In [4]:
import os
import pandas as pd
import numpy as np
import torch
from pathlib import Path
from typing import Optional

# Import MUSED-FM components
from musedfm.data import Benchmark
from musedfm.metrics import MAPE, MAE, RMSE, NMAE

# Import functions from the examples package
from examples import (
    run_models_on_benchmark, 
    compare_model_performance, 
    export_hierarchical_results_to_csv,
    export_results_to_csv
)

print("MUSED-FM components imported successfully!")


MUSED-FM components imported successfully!


In [16]:
# Self-contained ChronosForecast class for the notebook (FIXED VERSION)
class ChronosForecast:
    """
    Chronos forecasting model wrapper for MUSED-FM evaluation.
    This class is self-contained within the notebook.
    """
    
    def __init__(self, model_path: str = "amazon/chronos-bolt-base", device: str = "cuda:0", num_samples: int = 20):
        """
        Initialize Chronos forecast model.
        
        Args:
            model_path: Path to Chronos model (HuggingFace model ID or local path)
            device: Device to run the model on
            num_samples: Number of samples for probabilistic forecasting
        """
        self.model_path = model_path
        self.device = device
        self.num_samples = num_samples
        self.pipeline = None
        self._load_model()
    
    def _load_model(self):
        """Load the Chronos model."""
        try:
            from chronos import BaseChronosPipeline, ForecastType
            
            self.pipeline = BaseChronosPipeline.from_pretrained(
                self.model_path,
                device_map=self.device,
            )
            print(f"Loaded Chronos model: {self.model_path}")
        except ImportError:
            raise ImportError("Chronos package not installed. Please install with: pip install chronos-forecasting")
        except Exception as e:
            raise RuntimeError(f"Failed to load Chronos model: {e}")
    
    def forecast(self, history: np.ndarray, covariates: Optional[np.ndarray] = None, forecast_horizon: Optional[int] = None) -> np.ndarray:
        """
        Generate forecast from historical data using Chronos.
        
        Args:
            history: Historical time series data
            covariates: Optional covariate data (ignored for Chronos)
            forecast_horizon: Number of future points to forecast (default: 1)
            
        Returns:
            Forecast values
        """
        if forecast_horizon is None:
            forecast_horizon = 1
        
        # Convert history to torch tensor
        if isinstance(history, np.ndarray):
            history_tensor = torch.tensor(history, dtype=torch.float32)
        else:
            history_tensor = torch.tensor(np.array(history), dtype=torch.float32)
        
        # Remove NaN values
        history_clean = history_tensor[~torch.isnan(history_tensor)]
        
        if len(history_clean) == 0:
            # If no valid data, return zeros
            return np.zeros(forecast_horizon)
        
        # Ensure we have enough history for forecasting
        if len(history_clean) < 2:
            # If insufficient data, return the last value repeated
            last_value = float(history_clean[-1]) if len(history_clean) > 0 else 0.0
            return np.full(forecast_horizon, last_value)
        
        # Generate forecast using Chronos
        # FIXED: Use predict_quantiles method with proper parameters
        quantiles, mean = self.pipeline.predict_quantiles(
            context=history_clean,
            prediction_length=forecast_horizon,
            quantile_levels=[0.1, 0.5, 0.9]
        )
        
        # Use the median (0.5 quantile) as our forecast
        # Handle different quantiles array structures
        if isinstance(quantiles, torch.Tensor):
            if quantiles.shape[0] >= 3:  # Has all 3 quantiles
                forecast_np = quantiles[1].detach().cpu().numpy()  # 0.5 quantile
            else:  # Only has 1 quantile
                forecast_np = quantiles[0].detach().cpu().numpy()
        else:
            if len(quantiles) >= 3:  # Has all 3 quantiles
                forecast_np = np.array(quantiles[1])  # 0.5 quantile
            else:  # Only has 1 quantile
                forecast_np = np.array(quantiles[0])
        
        # Handle different output shapes
        if forecast_np.ndim > 1:
            # If we have multiple samples, take the mean
            if forecast_np.shape[0] > 1:
                forecast_np = np.mean(forecast_np, axis=0)
            else:
                forecast_np = forecast_np[0]
        
        # Ensure we have the right length
        if len(forecast_np) != forecast_horizon:
            if len(forecast_np) > forecast_horizon:
                forecast_np = forecast_np[:forecast_horizon]
            else:
                # Pad with the last value if needed
                last_val = forecast_np[-1] if len(forecast_np) > 0 else 0.0
                forecast_np = np.pad(forecast_np, (0, forecast_horizon - len(forecast_np)), 'constant', constant_values=last_val)
        
        return forecast_np
            

print("ChronosForecast class defined successfully!")


ChronosForecast class defined successfully!


In [26]:
# Configuration
BENCHMARK_PATH = "/workspace/data/fm_eval_nested/"  # Adjust this path to your MUSED-FM data
MODEL_PATH = "amazon/chronos-bolt-base"  # Chronos Bolt model
DEVICE = "cuda:0"  # Use "cpu" if you don't have CUDA
NUM_SAMPLES = 20  # Number of samples for probabilistic forecasting
MAX_WINDOWS = 1000  # Limit windows per dataset for faster testing
OUTPUT_DIR = "./results/chronos_bolt"

# Create output directory
os.makedirs(OUTPUT_DIR, exist_ok=True)

print(f"Benchmark path: {BENCHMARK_PATH}")
print(f"Model: {MODEL_PATH}")
print(f"Device: {DEVICE}")
print(f"Output directory: {OUTPUT_DIR}")


Benchmark path: /workspace/data/fm_eval_nested/
Model: amazon/chronos-bolt-base
Device: cuda:0
Output directory: ./results/chronos_bolt


In [21]:
# Initialize Chronos model
try:
    chronos_model = ChronosForecast(
        model_path=MODEL_PATH,
        device=DEVICE,
        num_samples=NUM_SAMPLES
    )
    print("Chronos model initialized successfully!")
except Exception as e:
    print(f"Error initializing Chronos model: {e}")
    print("Make sure you have installed chronos-forecasting and have the required dependencies.")


Loaded Chronos model: amazon/chronos-bolt-base
Chronos model initialized successfully!


In [None]:
# Run Chronos evaluation using run_musedfm functions
print("Starting Chronos evaluation using run_musedfm functions...")

# Define models dictionary (same format as run_musedfm.py)
models = {
    "chronos": {"model": chronos_model, "univariate": True}
}

# Run evaluation using the efficient run_musedfm function
results = run_models_on_benchmark(
    benchmark_path=BENCHMARK_PATH,
    models=models,
    max_windows=MAX_WINDOWS,
    history_length=512,
    forecast_horizon=128,
    stride=256,
    load_cached_counts=True
)

print("Evaluation completed successfully!")


Starting Chronos evaluation using run_musedfm functions...
Running Multiple Models on Benchmark
Loading KITTI data from /workspace/data/fm_eval_nested/sequential/KITTI
Found 6114 parquet files
Successfully loaded 6114 valid files
Domain ALL_DATASETS not found in file hierarchy
successfully counted windows from cached JSON files
Domain ALL_DATASETS not found in file hierarchy
successfully counted windows from cached JSON files
Dataset aus_electricity not found in data_hierarchy.json
/workspace/data/fm_eval_nested/traditional/open_aq [PosixPath('/workspace/data/fm_eval_nested/traditional/open_aq/delhi_combined.parquet'), PosixPath('/workspace/data/fm_eval_nested/traditional/open_aq/reykjavik_combined.parquet'), PosixPath('/workspace/data/fm_eval_nested/traditional/open_aq/rotterdam_combined.parquet'), PosixPath('/workspace/data/fm_eval_nested/traditional/open_aq/winnipeg_combined.parquet')]
Domain ALL_DATASETS not found in file hierarchy
successfully counted windows from cached JSON file



    chronos: 1000 windows in 48.11s
  ✓ No NaN values detected in 1000 windows
  Completed dataset sequential/cifar100_timeseries_csvs

Processing dataset 2: sequential/KITTI
  Processing 1000 windows with 1 models...




In [23]:
# Compare model performance
compare_model_performance(results)



Model Performance Comparison
Model                MAPE (%)   MAE        RMSE       NMAE       Time (s)  
--------------------------------------------------------------------------------
chronos              1756.58    108611419.2457 129233905.5416 -18.7448   154.05    
chronos_univariate   nan        nan        nan        nan        0.00      

Best Performance:
  Lowest MAPE: chronos (1756.58%)
  Lowest MAE:  chronos (108611419.2457)
  Lowest RMSE: chronos (129233905.5416)
  Fastest:     chronos_univariate (0.00s)


In [24]:
# Export hierarchical results to CSV
export_hierarchical_results_to_csv(results, output_dir=OUTPUT_DIR)



Exporting Hierarchical Results to CSV
✓ Category results saved: chronos_category_results.csv
✓ Dataset results saved: chronos_dataset_results.csv

Hierarchical CSV files saved to:
  Categories: ./results/chronos_bolt/categories/
  Domains: ./results/chronos_bolt/domains/
  Datasets: ./results/chronos_bolt/datasets/


In [25]:
# Export detailed CSV results
export_results_to_csv(
    benchmark_path=BENCHMARK_PATH,
    models=models,
    max_windows=MAX_WINDOWS,
    output_dir=OUTPUT_DIR,
    history_length=512,
    forecast_horizon=128,
    stride=256,
    load_cached_counts=True
)



Exporting Results to CSV
Loading KITTI data from /workspace/data/fm_eval_nested/sequential/KITTI
Found 6114 parquet files
Successfully loaded 6114 valid files
Domain ALL_DATASETS not found in file hierarchy
successfully counted windows from cached JSON files
Domain ALL_DATASETS not found in file hierarchy
successfully counted windows from cached JSON files
Dataset aus_electricity not found in data_hierarchy.json
/workspace/data/fm_eval_nested/traditional/open_aq [PosixPath('/workspace/data/fm_eval_nested/traditional/open_aq/delhi_combined.parquet'), PosixPath('/workspace/data/fm_eval_nested/traditional/open_aq/reykjavik_combined.parquet'), PosixPath('/workspace/data/fm_eval_nested/traditional/open_aq/rotterdam_combined.parquet'), PosixPath('/workspace/data/fm_eval_nested/traditional/open_aq/winnipeg_combined.parquet')]
Domain ALL_DATASETS not found in file hierarchy
successfully counted windows from cached JSON files
Domain ALL_DATASETS not found in file hierarchy
successfully counted



TypeError: Window.submit_forecast() got an unexpected keyword argument 'univariate'

In [None]:
# Display results summary
if results and 'chronos' in results:
    chronos_results = results['chronos']
    print("\nChronos Results Summary:")
    print(f"Total windows processed: {chronos_results['windows']}")
    print(f"Total time: {chronos_results['time']:.2f} seconds")
    
    if chronos_results['metrics']:
        metrics = chronos_results['metrics']
        print(f"Average MAPE: {metrics.get('MAPE', 'N/A'):.2f}%")
        print(f"Average MAE: {metrics.get('MAE', 'N/A'):.4f}")
        print(f"Average RMSE: {metrics.get('RMSE', 'N/A'):.4f}")
        print(f"Average NMAE: {metrics.get('NMAE', 'N/A'):.4f}")
    
    print(f"\nResults saved to: {OUTPUT_DIR}")
else:
    print("No results available.")


## Summary

This notebook demonstrates how to run Chronos Bolt on the MUSED-FM benchmark using the efficient functions from `run_musedfm.py`. 

### Key Benefits:

1. **Efficient**: Uses optimized evaluation functions instead of custom loops
2. **Self-contained**: ChronosForecast class defined within the notebook
3. **Comprehensive**: Generates hierarchical CSV results and performance comparisons
4. **Minimal code**: Only ~10 cells vs. hundreds of lines in custom evaluation

### Functions Used:

- `run_models_on_benchmark()`: Main evaluation function
- `compare_model_performance()`: Performance comparison
- `export_hierarchical_results_to_csv()`: Hierarchical CSV export
- `export_results_to_csv()`: Detailed CSV export

This approach leverages the existing, well-tested evaluation framework while keeping the notebook clean and efficient.
