In [2]:
import os
import sys
import glob
import torch
import numpy as np
import pandas as pd
from collections import defaultdict
from sklearn.preprocessing import minmax_scale
from datetime import datetime

In [5]:
# ==========================================
# üìö CHRONOS IMPORTS
# ==========================================
try:
    from chronos import BaseChronosPipeline
    print("‚úÖ Chronos libraries loaded successfully.")
except ImportError as e:
    print(f"‚ùå Failed to load Chronos: {e}")
    print("üëâ Please run: pip install chronos-forecasting>=2.1")
    sys.exit(1)

# Metrics
from sklearn.metrics import (
    mean_absolute_percentage_error,
    mean_absolute_error,
    mean_squared_error,
    mean_squared_log_error,
)

def root_mean_squared_error(y_true, y_pred):
    return np.sqrt(mean_squared_error(y_true, y_pred))

try:
    from permetrics.regression import RegressionMetric
except ImportError:
    print("‚ùå Error: 'permetrics' library is missing.")
    sys.exit(1)

‚úÖ Chronos libraries loaded successfully.


In [3]:
# ==========================================
# ‚öôÔ∏è CONFIGURATION
# ==========================================
os.environ["CUBLAS_WORKSPACE_CONFIG"] = ":4096:8"

print(f"{'='*40}")
if torch.cuda.is_available():
    device_str = "cuda"
    print(f"üöÄ GPU DETECTED: {torch.cuda.get_device_name(0)}")
else:
    device_str = "cpu"
    print("‚ö†Ô∏è WARNING: GPU not detected. Running on CPU.")
print(f"{'='*40}")

# You can change this to 'amazon/chronos-t5-small', 'amazon/chronos-bolt-small', etc.
MODEL_NAME = 'amazon/chronos-t5-small' 
CONTEXT_LEN = 512  
PREDICTION_LEN = 24
BATCH_SIZE = 16    

DATA_CONFIG = [
    {
        "type": "in",
        "root_path": "/home/user/Thanish/test/in/", 
        "output_forecast_dir": "./chronos_forecasts/in",
        "output_results_dir": "./chronos_results/in"
    },
    {
        "type": "out",
        "root_path": "/home/user/Thanish/test/out/", 
        "output_forecast_dir": "./chronos_forecasts/out",
        "output_results_dir": "./chronos_results/out"
    }
]

üöÄ GPU DETECTED: NVIDIA GeForce RTX 5090


In [4]:
# ==========================================
# üß† MODEL LOADING
# ==========================================
def load_chronos_model():
    print(f"‚è≥ Loading Chronos Model ({MODEL_NAME})...")
    try:
        # Load pipeline (automatically handles Chronos-2 vs Bolt based on model name)
        pipeline = BaseChronosPipeline.from_pretrained(
            MODEL_NAME,
            device_map=device_str,
            torch_dtype=torch.bfloat16 if device_str == "cuda" else torch.float32,
        )
        print("‚úÖ Chronos Pipeline Loaded.")
        return pipeline
    except Exception as e:
        print(f"‚ùå Failed to load model: {e}")
        sys.exit(1)

chronos_pipeline = load_chronos_model()

‚è≥ Loading Chronos Model (amazon/chronos-t5-small)...
‚úÖ Chronos Pipeline Loaded.


In [5]:
# ==========================================
# üìä DATA BATCHING
# ==========================================
def get_batched_data_fn(sub_df, batch_size=64, context_len=512, horizon_len=24):
    examples = defaultdict(list)
    num_examples = 0
    sub_df["ds"] = pd.to_datetime(sub_df["ds"])
    y_full = sub_df["y"].tolist()
    ds_full = sub_df["ds"].tolist()
    total_len = len(sub_df)

    for start in range(0, total_len - (context_len + horizon_len), horizon_len):
        num_examples += 1
        context_end = start + context_len
        history_start = max(0, context_end - context_len)
        
        # Chronos expects 1D arrays for context
        examples["inputs"].append(np.array(y_full[history_start:context_end]))
        examples["outputs"].append(y_full[context_end:(context_end + horizon_len)])
        examples["inputs_ts"].append(ds_full[history_start:context_end])
        examples["outputs_ts"].append(ds_full[context_end:(context_end + horizon_len)])

    def data_fn():
        for i in range(1 + (num_examples - 1) // batch_size):
            yield {k: v[(i * batch_size):((i + 1) * batch_size)] for k, v in examples.items()}

    return data_fn

In [6]:
# ==========================================
# üîÆ FORECASTING
# ==========================================
def process_building(df, horizon_len=24, batch_size=16):
    input_data = get_batched_data_fn(df, batch_size=batch_size, context_len=CONTEXT_LEN, horizon_len=horizon_len)
    results_all = []

    for i, example in enumerate(input_data()):
        history_list = example["inputs"] # List of numpy arrays
        
        if not history_list: continue

        try:
            # üöÄ CHRONOS INFERENCE
            # 1. Convert numpy arrays to Torch Tensors
            context_tensors = [torch.tensor(x) for x in history_list]

            with torch.no_grad():
                # 2. Inference
                quantiles, _ = chronos_pipeline.predict_quantiles(
                    inputs=context_tensors,
                    prediction_length=horizon_len,
                    quantile_levels=[0.5], # Median
                    num_samples=20
                )
                # 3. Handle Output Shapes
                if isinstance(quantiles, (list, tuple)):
                    quantiles = torch.stack(quantiles)
                
                # üõë FIX: Check dimensions to handle Univariate vs Multivariate
                if quantiles.dim() == 3:
                    # Shape: [Batch, Quantiles, Length] -> e.g. [16, 1, 24]
                    # We want: [Batch, Length]
                    # Take the 0th index of dim 1 (Quantiles)
                    preds = quantiles[:, 0, :].cpu().numpy()
                    
                elif quantiles.dim() == 4:
                    # Shape: [Batch, Variates, Quantiles, Length]
                    # We want: [Batch, Length]
                    preds = quantiles[:, 0, 0, :].cpu().numpy()
                    
                else:
                    raise ValueError(f"Unknown output dimension: {quantiles.shape}")

        except Exception as e:
            print(f"  ‚ö†Ô∏è Chronos Failed Batch {i}: {e} -> Using Naive")
            fallback_preds = []
            for inp in example["inputs"]:
                naive_val = np.mean(inp) 
                fallback_preds.append([naive_val] * horizon_len)
            preds = np.array(fallback_preds)

        # Map predictions back to timestamps
        for ts, y_true, y_pred in zip(example["outputs_ts"], example["outputs"], preds):
            # Safety Truncate
            y_pred = np.asarray(y_pred).reshape(-1)
            min_len = min(len(ts), len(y_true), len(y_pred))
            
            res_df = pd.DataFrame({
                "ts": ts[:min_len],
                "y_true": y_true[:min_len],
                "y_pred": y_pred[:min_len]
            })
            results_all.append(res_df)

    if len(results_all) == 0:
        return pd.DataFrame(columns=["ts", "y_true", "y_pred"])

    results_all_df = pd.concat(results_all, ignore_index=True)
    return results_all_df

In [7]:
# ==========================================
# üìÇ FILE PROCESSING
# ==========================================
def process_file(filename):
    df = pd.read_parquet(filename)

    df.columns = [str(c) for c in df.columns]
    df = df.reset_index(drop=True)
    df = df.fillna(0) 

    results_all = []
    for building_name in df.columns:
        print(datetime.now(), f"‚Üí Processing: {building_name}", flush=True)
        df1 = df[[building_name]].reset_index()
        df1.columns = ["ds", "y"]
        # Note: Chronos handles unscaled data well, but since we are comparing 
        # using the same pipeline as FlowState, we keep MinMax scaling.
        df1["y"] = minmax_scale(df1["y"])

        res = process_building(df1, horizon_len=PREDICTION_LEN, batch_size=BATCH_SIZE)
        if res.empty: continue

        res["building"] = building_name
        results_all.append(res)

    if len(results_all) == 0: return None
    return pd.concat(results_all, ignore_index=True)

In [8]:
# ==========================================
# üöÄ MAIN EXECUTION
# ==========================================
for config in DATA_CONFIG:
    dist_type = config["type"]
    root_path = config["root_path"]
    
    print(f"\n{'='*60}")
    print(f"üöÄ Starting Processing: {dist_type.upper()}")
    print(f"üìÇ Source: {root_path}")
    print(f"{'='*60}")

    if not os.path.exists(root_path): continue

    all_items = os.listdir(root_path)
    datasets = [d for d in all_items if os.path.isdir(os.path.join(root_path, d))]

    for idx, dataset_name in enumerate(datasets):
        dataset_path = os.path.join(root_path, dataset_name)
        print(f"\nüìç Dataset: {dataset_name} | {idx+1}/{len(datasets)}")

        files_list = glob.glob(f"{dataset_path}/*.parquet")
        if not files_list: continue

        out_forecast_dir = os.path.join(config["output_forecast_dir"], dataset_name)
        out_results_dir = os.path.join(config["output_results_dir"], dataset_name)
        os.makedirs(out_forecast_dir, exist_ok=True)
        os.makedirs(out_results_dir, exist_ok=True)

        for filename in files_list:
            try:
                results = process_file(filename)
                if results is not None and not results.empty:
                    save_name = os.path.basename(filename).replace('.parquet', '.csv')
                    forecast_path = os.path.join(out_forecast_dir, save_name)
                    results.to_csv(forecast_path, index=False)
                    print(f"‚úÖ Saved: {save_name}")
            except Exception as e:
                print(f"‚ùå Error processing {filename}: {e}")

    print(f"‚úÖ Completed {dist_type.upper()}.")


üöÄ Starting Processing: IN
üìÇ Source: /home/user/Thanish/test/in/

üìç Dataset: IRH | 1/27
2025-12-24 17:27:25.630263 ‚Üí Processing: H2
2025-12-24 17:27:31.822113 ‚Üí Processing: H12
2025-12-24 17:27:37.809177 ‚Üí Processing: H18
2025-12-24 17:27:43.439307 ‚Üí Processing: H9
‚úÖ Saved: IRH-test-1H.csv

üìç Dataset: SAVE | 2/27
2025-12-24 17:27:49.181521 ‚Üí Processing: 956615418
2025-12-24 17:28:03.972852 ‚Üí Processing: 956610544
2025-12-24 17:28:18.334782 ‚Üí Processing: 956640760
2025-12-24 17:28:32.856457 ‚Üí Processing: 956662363
2025-12-24 17:28:47.448849 ‚Üí Processing: 956621519
2025-12-24 17:29:01.956632 ‚Üí Processing: 956621923
2025-12-24 17:29:16.200109 ‚Üí Processing: 956619415
2025-12-24 17:29:31.206472 ‚Üí Processing: 956660046
2025-12-24 17:29:45.557976 ‚Üí Processing: 956600128
2025-12-24 17:29:58.671073 ‚Üí Processing: 956619369
2025-12-24 17:30:12.376877 ‚Üí Processing: 956625058
2025-12-24 17:30:25.461248 ‚Üí Processing: 956661592
2025-12-24 17:30:38.193590 

In [6]:
# ==========================================
# üìä METRICS
# ==========================================
print("\nüìä Computing metrics...")
for config in DATA_CONFIG:
    dist_type = config["type"]
    forecast_base_dir = config["output_forecast_dir"]
    results_base_dir = config["output_results_dir"]
    
    if not os.path.exists(forecast_base_dir): continue
        
    all_results = []
    dir_list = os.listdir(forecast_base_dir)

    for dataset in dir_list:
        files_list = glob.glob(f"{forecast_base_dir}/{dataset}/*.csv")
        if not files_list: continue
            
        for filename in files_list:
            try:
                res = pd.read_csv(filename)
                metrics_all = []
                for g, data in res.groupby(["building"]):
                    data = data.dropna()
                    data = data[data.y_pred >= 0]
                    if data.empty: continue

                    rmse = root_mean_squared_error(data.y_true, data.y_pred)
                    mae = mean_absolute_error(data.y_true, data.y_pred)
                    mse = mean_squared_error(data.y_true, data.y_pred)
                    nrmse = rmse / (data.y_true.mean() + 1e-6)
                    
                    evaluator = RegressionMetric(data.y_true.to_list(), data.y_pred.to_list())
                    nrmse_eve = evaluator.normalized_root_mean_square_error()
                    smape = evaluator.symmetric_mean_absolute_percentage_error()

                    metrics = pd.DataFrame({
                        "building_name": [g], "mae": [mae], "mse": [mse], "rmse": [rmse],
                        "nrmse": [nrmse], "nrmse_eve": [nrmse_eve], "sMAPE": [smape],
                        "filename": [os.path.basename(filename)], "dataset": [dataset]
                    })
                    metrics_all.append(metrics)

                if metrics_all:
                    metrics_all_df = pd.concat(metrics_all, ignore_index=True)
                    out_path = os.path.join(results_base_dir, dataset, os.path.basename(filename))
                    os.makedirs(os.path.dirname(out_path), exist_ok=True)
                    metrics_all_df.to_csv(out_path, index=False)
                    all_results.append(metrics_all_df)
            except Exception as e:
                print(f"Metric calculation failed for {filename}: {e}")

    if all_results:
        metrics_all_files_df = pd.concat(all_results, ignore_index=True)
        summary_path = os.path.join(results_base_dir, "chronos_metrics_summary.csv")
        metrics_all_files_df.to_csv(summary_path, index=False)
        
        cols = ['dataset', 'nrmse']
        res_agg = metrics_all_files_df[cols].groupby(['dataset']).agg({'nrmse': ['median']})
        final_csv_name = f"{dist_type}_distribution_CHRONOS.csv"
        res_agg.mul(100).round(2).to_csv(final_csv_name)
        print(f"üìú Final aggregated report saved to: {final_csv_name}")
        print(res_agg.mul(100).round(2))

print("\nüéâ Done.")


üìä Computing metrics...


  result = rmse / y_pred.std(axis=0)
  result = rmse / y_pred.std(axis=0)


üìú Final aggregated report saved to: in_distribution_CHRONOS.csv
            nrmse
           median
dataset          
BDG-2        7.66
DESM       119.51
DGS         16.08
DTH         32.10
ECCC        46.82
ENERTALK    77.26
Enernoc      4.47
GoiEner    160.75
HES        164.60
HSG          0.33
HUE        125.08
IBlend      12.51
IRH         69.25
LEC        104.28
NEEA        94.88
NESEMP     102.85
Norwegian   39.25
PES         75.87
PSS         44.62
Plegma     153.28
RSL        120.48
SAVE         1.91
SGSC        74.95
SKC         16.80
UKST        87.83
UNICON      15.26
iFlex       45.86
üìú Final aggregated report saved to: out_distribution_CHRONOS.csv
                   nrmse
                  median
dataset                 
CEEW               90.65
ECWM               27.78
HONDA-Smart-Home   12.64
IPC-Commercial     15.13
MIHEC             168.59
NDB               117.71
RHC                84.58
RKP                29.51
SFAC               85.80
fIEECe             49.74
