<a href="https://colab.research.google.com/github/Mehdislik/Traffic-Predict/blob/main/traffic-prediction.ipynb" target="_parent"><img src="https://colab.research.google.com/assets/colab-badge.svg" alt="Open In Colab"/></a>

In [None]:
!pip install dateparser

Collecting dateparser
  Downloading dateparser-1.2.2-py3-none-any.whl.metadata (29 kB)
Downloading dateparser-1.2.2-py3-none-any.whl (315 kB)
[?25l   [90m━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━[0m [32m0.0/315.5 kB[0m [31m?[0m eta [36m-:--:--[0m[2K   [90m━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━[0m [32m315.5/315.5 kB[0m [31m26.9 MB/s[0m eta [36m0:00:00[0m
[?25hInstalling collected packages: dateparser
Successfully installed dateparser-1.2.2


In [None]:
# data cleaning
import pandas as pd
import dateparser

df = pd.read_csv("histo_trafic.csv", encoding="ISO-8859-1", sep=";")
df.columns = df.columns.str.strip().str.lower()
df = df[["secteur", "site", "tstamp", "trafic_mbps"]]
df["trafic_mbps"] = df["trafic_mbps"].astype(str).str.replace(",", ".", regex=False)
df["trafic_mbps"] = pd.to_numeric(df["trafic_mbps"], errors="coerce")
df["tstamp"] = df["tstamp"].apply(lambda x: dateparser.parse(str(x), languages=['fr']))
df = df.dropna(subset=["tstamp", "trafic_mbps"])
df = df.sort_values(by=["secteur", "tstamp"])
print(df.head())
df.to_csv("histo_trafic_cleaned.csv", index=False, encoding="utf-8")


      secteur    site     tstamp  trafic_mbps
1575  T36870A  T36870 2018-11-12     0.263481
1658  T36870A  T36870 2018-11-19     0.066913
1741  T36870A  T36870 2018-11-26     0.062066
1824  T36870A  T36870 2018-12-03     0.084320
1907  T36870A  T36870 2018-12-10     0.047759


In [None]:
!pip install statsmodels psutil -q

In [None]:
# 2. IMPORTS
from statsmodels.tsa.arima.model import ARIMA
import numpy as np
import pandas as pd
import time
import psutil
import warnings
warnings.filterwarnings('ignore')
from sklearn.metrics import mean_squared_error

# 3. UTILITY FUNCTIONS
def count_arima_operations(p, d, q, n_obs):
    """
    Calculate FLOPS for ARIMA(p,d,q) using analytical formula:
    O(n · (p² + q²))
    where n is series length, p is AR order, q is MA order
    """
    # Analytical formula
    flops = n_obs * (p**2 + q**2)
    return flops

def safe_cpu_measure():
    """CPU measurement adapted for Colab"""
    try:
        # Measure over short period to avoid conflicts
        return psutil.cpu_percent(interval=0.1)
    except:
        return 0  # Fallback if issues

# 4. DATA VERIFICATION
print(" Data verification...")
print(f"DataFrame shape: {df.shape}")
print(f"Columns: {list(df.columns)}")
print(f"Number of unique sectors: {df['secteur'].nunique()}")
print(f"Data sample:")
print(df.head(3))

# 5. MAIN ARIMA ANALYSIS
print("\n Starting ARIMA analysis...")

arima_results = []
grouped = df.groupby("secteur")
total_sectors = len(grouped)

# Manual progress bar
processed = 0
errors = 0

for sector, data in grouped:
    processed += 1

    # Progress display
    if processed % 100 == 0 or processed in [1, 10, 50]:
        print(f" Progress: {processed}/{total_sectors} sectors ({processed/total_sectors*100:.1f}%)")

    # Data preparation
    y = data.sort_values("tstamp")["trafic_mbps"].values
    if len(y) <= 10:  # Skip if not enough data
        continue

    train, test = y[:-1], y[-1]

    try:
        # STEP 1: FITTING (training)
        cpu_before_fit = safe_cpu_measure()
        fit_start = time.perf_counter()  # More precise than time.time()

        model = ARIMA(train, order=(2,1,2))
        fitted_model = model.fit(method_kwargs={"warn_convergence": False})

        fit_time = time.perf_counter() - fit_start
        cpu_after_fit = safe_cpu_measure()

        # STEP 2: INFERENCE (pure prediction)
        cpu_before_inf = safe_cpu_measure()
        inference_start = time.perf_counter()

        pred = float(fitted_model.forecast()[0])

        inference_time = time.perf_counter() - inference_start
        cpu_after_inf = safe_cpu_measure()

        # METRICS CALCULATION
        rmse = float(np.sqrt(mean_squared_error([test], [pred])))

        #  FLOPS calculation: O(n · (p² + q²))
        flops = count_arima_operations(p=2, d=1, q=2, n_obs=len(train))

        # Power (average CPU measurements)
        avg_cpu_fit = (cpu_before_fit + cpu_after_fit) / 2
        avg_cpu_inf = (cpu_before_inf + cpu_after_inf) / 2

        # Store results
        arima_results.append({
            "sector": sector,
            "actual_value": float(test),
            "predicted_value": pred,
            "RMSE": rmse,
            "fit_time_s": fit_time,
            "inference_time_s": inference_time,
            "total_time_s": fit_time + inference_time,
            "FLOPS_estimated": flops,
            "cpu_fit_percent": avg_cpu_fit,
            "cpu_inference_percent": avg_cpu_inf,
            "n_observations": len(train)
        })

    except Exception as e:
        errors += 1
        if errors <= 5:  # Show only first 5 errors
            print(f" Error sector {sector}: {str(e)[:50]}...")
        continue

# 6. RESULTS ANALYSIS
print(f"\n Analysis completed!")
print(f"   • Successful predictions: {len(arima_results)}")
print(f"   • Errors: {errors}")
print(f"   • Success rate: {len(arima_results)/(len(arima_results)+errors)*100:.1f}%")

if len(arima_results) == 0:
    print(" No successful predictions!")
else:
    # Create final DataFrame
    df_arima = pd.DataFrame(arima_results)

    # GLOBAL METRICS
    global_rmse = np.sqrt(np.mean(df_arima["RMSE"]**2))
    avg_inference_time = df_arima["inference_time_s"].mean()
    avg_flops = df_arima["FLOPS_estimated"].mean()
    avg_power_inference = df_arima["cpu_inference_percent"].mean()

    # 7. FINAL RESULTS
    print("\n" + "="*50)
    print("ARIMA RESULTS - PROJECT METRICS")
    print("="*50)
    print(f" Global RMSE: {global_rmse:.4f}")
    print(f"  Average inference time: {avg_inference_time:.6f} seconds")
    print(f" Average FLOPS: {avg_flops:.0f} operations")
    print(f" Average CPU (inference): {avg_power_inference:.1f}%")
    print(f" Number of antennas: {len(df_arima)}")

    # Detailed statistics
    print(f"\n Detailed statistics:")
    print(f"   • RMSE min/max: {df_arima['RMSE'].min():.4f} / {df_arima['RMSE'].max():.4f}")
    print(f"   • Inference time min/max: {df_arima['inference_time_s'].min():.6f}s / {df_arima['inference_time_s'].max():.6f}s")

    # 8. SAVE RESULTS
    df_arima_sorted = df_arima.sort_values("RMSE")
    df_arima_sorted.to_csv("ARIMA_complete_results.csv", index=False)
    print(f"\n File saved: ARIMA_complete_results.csv")

    # 9. SUMMARY FOR FINAL COMPARISON
    arima_summary = {
        "model": "ARIMA(2,1,2)",
        "global_rmse": global_rmse,
        "avg_inference_time_s": avg_inference_time,
        "avg_flops": avg_flops,
        "avg_power_cpu_percent": avg_power_inference,
        "n_predictions": len(df_arima),
        "success_rate_percent": len(arima_results)/(len(arima_results)+errors)*100
    }

    print(f"\n Summary stored in 'arima_summary' for comparison!")

    # Preview of best/worst predictions with timing
    print(f"\n Top 5 best predictions (lowest RMSE):")
    print(df_arima_sorted[["sector", "actual_value", "predicted_value", "RMSE", "inference_time_s"]].head().to_string(index=False))

    print(f"\n Top 5 worst predictions (highest RMSE):")
    print(df_arima_sorted[["sector", "actual_value", "predicted_value", "RMSE", "inference_time_s"]].tail().to_string(index=False))

print(f"\n ARIMA analysis complete!")

🔍 Data verification...
DataFrame shape: (24486, 4)
Columns: ['secteur', 'site', 'tstamp', 'trafic_mbps']
Number of unique sectors: 86
Data sample:
      secteur    site     tstamp  trafic_mbps
1575  T36870A  T36870 2018-11-12     0.263481
1658  T36870A  T36870 2018-11-19     0.066913
1741  T36870A  T36870 2018-11-26     0.062066

🚀 Starting ARIMA analysis...
📊 Progress: 1/86 sectors (1.2%)
📊 Progress: 10/86 sectors (11.6%)
📊 Progress: 50/86 sectors (58.1%)

✅ Analysis completed!
   • Successful predictions: 86
   • Errors: 0
   • Success rate: 100.0%

📈 ARIMA RESULTS - PROJECT METRICS
🎯 Global RMSE: 7.7527
⏱️  Average inference time: 0.001493 seconds
🔥 Average FLOPS: 2270 operations
⚡ Average CPU (inference): 5.3%
📊 Number of antennas: 86

📋 Detailed statistics:
   • RMSE min/max: 0.0222 / 32.9842
   • Inference time min/max: 0.001254s / 0.002572s

💾 File saved: ARIMA_complete_results.csv

🎯 Summary stored in 'arima_summary' for comparison!

🏆 Top 5 best predictions (lowest RMSE):
 sec

In [None]:
# 1. INSTALLATIONS
!pip install tensorflow psutil -q

In [None]:
# 2. IMPORTS
import numpy as np
import pandas as pd
import time
import psutil
import warnings
warnings.filterwarnings('ignore')
from sklearn.preprocessing import MinMaxScaler
from sklearn.metrics import mean_squared_error
import tensorflow as tf
from tensorflow.keras.models import Sequential
from tensorflow.keras.layers import LSTM, Dense

# 3. UTILITY FUNCTIONS
def make_sequences(arr, window=5):
    """Create sequences for LSTM training"""
    X, y = [], []
    for i in range(len(arr) - window):
        X.append(arr[i:i+window])
        y.append(arr[i+window])
    X, y = np.array(X), np.array(y)
    X = X.reshape((X.shape[0], X.shape[1], 1))
    return X, y

def count_lstm_flops(model, sequence_length, batch_size=1):
    """
    Calculate FLOPS for LSTM using analytical formula:
    FLOPS ≈ 4 × (h² + h·x + h) × timesteps
    where h is hidden units, x is input features, timesteps is sequence length
    """
    lstm_units = 50  # From model architecture (h)
    input_size = 1   # Single feature (x)
    timesteps = sequence_length

    # Analytical formula from project specification
    # FLOPS ≈ 4 × (h² + h·x + h) × timesteps
    flops = 4 * (lstm_units**2 + lstm_units * input_size + lstm_units) * timesteps

    return flops

def safe_cpu_measure():
    """CPU measurement """
    try:
        return psutil.cpu_percent(interval=0.1)
    except:
        return 0

# 4. LSTM PARAMETERS
window_size = 5
epochs = 50
batch_size = 8

print("Data verification...")
print(f"DataFrame shape: {df.shape}")
print(f"Columns: {list(df.columns)}")
print(f"Number of unique sectors: {df['secteur'].nunique()}")

# 5. MAIN LSTM ANALYSIS
print(f"\n Starting LSTM analysis...")
print(f"   • Window size: {window_size}")
print(f"   • Epochs: {epochs}")
print(f"   • Batch size: {batch_size}")

lstm_results = []
grouped = df.groupby("secteur")
total_sectors = len(grouped)

processed = 0
errors = 0

# Suppress TensorFlow warnings
tf.get_logger().setLevel('ERROR')

for sector, data in grouped:
    processed += 1

    # Progress display
    if processed % 50 == 0 or processed in [1, 10, 25]:
        print(f"Progress: {processed}/{total_sectors} sectors ({processed/total_sectors*100:.1f}%)")

    # Data preparation
    serie = data.sort_values("tstamp")["trafic_mbps"].values
    if len(serie) <= window_size + 1:
        continue

    try:
        # Data scaling
        scaler = MinMaxScaler()
        y_scaled = scaler.fit_transform(serie.reshape(-1,1)).flatten()

        # Create sequences
        X, y = make_sequences(y_scaled, window=window_size)
        if len(y) < 2:
            continue

        # Train/test split (last point for testing)
        X_train, y_train = X[:-1], y[:-1]
        X_test, y_test = X[-1:], y[-1:]

        # STEP 1: MODEL CREATION AND TRAINING
        cpu_before_train = safe_cpu_measure()
        train_start = time.perf_counter()

        model = Sequential([
            LSTM(50, activation='relu', input_shape=(window_size, 1)),
            Dense(1)
        ])
        model.compile(optimizer='adam', loss='mse')

        # Training (with progress suppressed)
        model.fit(X_train, y_train, epochs=epochs, batch_size=batch_size, verbose=0)

        train_time = time.perf_counter() - train_start
        cpu_after_train = safe_cpu_measure()

        # STEP 2: INFERENCE (pure prediction)
        cpu_before_inf = safe_cpu_measure()
        inference_start = time.perf_counter()

        y_pred = model.predict(X_test, verbose=0)

        inference_time = time.perf_counter() - inference_start
        cpu_after_inf = safe_cpu_measure()

        # STEP 3: METRICS CALCULATION
        # Inverse scaling
        y_test_inv = scaler.inverse_transform(y_test.reshape(-1,1)).flatten()[0]
        y_pred_inv = scaler.inverse_transform(y_pred.reshape(-1,1)).flatten()[0]

        # RMSE
        rmse = float(np.sqrt(mean_squared_error([y_test_inv], [y_pred_inv])))

        # CORRECTED FLOPS estimation: 4 × (h² + h·x + h) × timesteps
        flops = count_lstm_flops(model, window_size)

        # Power (CPU averages)
        avg_cpu_train = (cpu_before_train + cpu_after_train) / 2
        avg_cpu_inf = (cpu_before_inf + cpu_after_inf) / 2

        # Store results
        lstm_results.append({
            "sector": sector,
            "actual_value": float(y_test_inv),
            "predicted_value": float(y_pred_inv),
            "RMSE": rmse,
            "train_time_s": train_time,
            "inference_time_s": inference_time,
            "total_time_s": train_time + inference_time,
            "FLOPS_estimated": flops,
            "cpu_train_percent": avg_cpu_train,
            "cpu_inference_percent": avg_cpu_inf,
            "n_observations": len(serie),
            "sequence_length": window_size
        })

        # Clean up model to free memory
        del model
        tf.keras.backend.clear_session()

    except Exception as e:
        errors += 1
        if errors <= 5:  # Show only first 5 errors
            print(f"Error sector {sector}: {str(e)[:50]}...")
        continue

# 6. RESULTS ANALYSIS
print(f"\n LSTM analysis completed!")
print(f"   • Successful predictions: {len(lstm_results)}")
print(f"   • Errors: {errors}")
print(f"   • Success rate: {len(lstm_results)/(len(lstm_results)+errors)*100:.1f}%")

if len(lstm_results) == 0:
    print(" No successful predictions!")
else:
    # Create final DataFrame
    df_lstm = pd.DataFrame(lstm_results)

    # GLOBAL METRICS (as requested in project)
    global_rmse = np.sqrt(np.mean(df_lstm["RMSE"]**2))
    avg_inference_time = df_lstm["inference_time_s"].mean()
    avg_train_time = df_lstm["train_time_s"].mean()
    avg_flops = df_lstm["FLOPS_estimated"].mean()
    avg_power_inference = df_lstm["cpu_inference_percent"].mean()

    # 7. FINAL RESULTS
    print("\n" + "="*50)
    print(" LSTM RESULTS - PROJECT METRICS")
    print("="*50)
    print(f" Global RMSE: {global_rmse:.4f}")
    print(f"  Average inference time: {avg_inference_time:.6f} seconds")
    print(f"  Average training time: {avg_train_time:.3f} seconds")
    print(f" Average FLOPS: {avg_flops:.0f} operations")
    print(f" Average CPU (inference): {avg_power_inference:.1f}%")
    print(f" Number of antennas: {len(df_lstm)}")

    # Detailed statistics
    print(f"\n Detailed statistics:")
    print(f"   • RMSE min/max: {df_lstm['RMSE'].min():.4f} / {df_lstm['RMSE'].max():.4f}")
    print(f"   • Inference time min/max: {df_lstm['inference_time_s'].min():.6f}s / {df_lstm['inference_time_s'].max():.6f}s")
    print(f"   • Training time min/max: {df_lstm['train_time_s'].min():.3f}s / {df_lstm['train_time_s'].max():.3f}s")

    # 8. SAVE RESULTS
    df_lstm_sorted = df_lstm.sort_values("RMSE")
    df_lstm_sorted.to_csv("LSTM_complete_results.csv", index=False)
    print(f"\n File saved: LSTM_complete_results.csv")

    # 9. SUMMARY FOR FINAL COMPARISON
    lstm_summary = {
        "model": "LSTM(50_units)",
        "global_rmse": global_rmse,
        "avg_inference_time_s": avg_inference_time,
        "avg_train_time_s": avg_train_time,
        "avg_flops": avg_flops,
        "avg_power_cpu_percent": avg_power_inference,
        "n_predictions": len(df_lstm),
        "success_rate_percent": len(lstm_results)/(len(lstm_results)+errors)*100,
        "window_size": window_size,
        "epochs": epochs
    }

    print(f"\n Summary stored in 'lstm_summary' for comparison!")

    # Preview of best/worst predictions with timing
    print(f"\n Top 5 best predictions (lowest RMSE):")
    print(df_lstm_sorted[["sector", "actual_value", "predicted_value", "RMSE", "inference_time_s"]].head().to_string(index=False))

    print(f"\n Top 5 worst predictions (highest RMSE):")
    print(df_lstm_sorted[["sector", "actual_value", "predicted_value", "RMSE", "inference_time_s"]].tail().to_string(index=False))

    # Model complexity info
    print(f"\n Model complexity:")
    print(f"   • LSTM units: 50")
    print(f"   • Input sequence length: {window_size}")
    print(f"   • Training epochs: {epochs}")
    print(f"   • Estimated parameters: ~10,000")

print(f"\n LSTM analysis complete!")


🔍 Data verification...
DataFrame shape: (24486, 4)
Columns: ['secteur', 'site', 'tstamp', 'trafic_mbps']
Number of unique sectors: 86

🚀 Starting LSTM analysis...
   • Window size: 5
   • Epochs: 50
   • Batch size: 8
📊 Progress: 1/86 sectors (1.2%)
📊 Progress: 10/86 sectors (11.6%)
📊 Progress: 25/86 sectors (29.1%)
📊 Progress: 50/86 sectors (58.1%)

✅ LSTM analysis completed!
   • Successful predictions: 86
   • Errors: 0
   • Success rate: 100.0%

📈 LSTM RESULTS - PROJECT METRICS
🎯 Global RMSE: 7.6592
⏱️  Average inference time: 0.458046 seconds
🏋️  Average training time: 8.878 seconds
🔥 Average FLOPS: 52000 operations
⚡ Average CPU (inference): 1.5%
📊 Number of antennas: 86

📋 Detailed statistics:
   • RMSE min/max: 0.0382 / 29.8227
   • Inference time min/max: 0.448031s / 0.468795s
   • Training time min/max: 8.553s / 9.028s

💾 File saved: LSTM_complete_results.csv

🎯 Summary stored in 'lstm_summary' for comparison!

🏆 Top 5 best predictions (lowest RMSE):
 sector  actual_value  pr

In [None]:
# 1. INSTALLATIONS
!pip install git+https://github.com/amazon-science/chronos-forecasting.git torch psutil -q

In [None]:
# === CHRONOS BOLT CODE WITH COMPLETE PROJECT METRICS ===

# 2. IMPORTS
import time
import torch
import numpy as np
import pandas as pd
import psutil
import warnings
warnings.filterwarnings('ignore')
from chronos import BaseChronosPipeline

# 3. UTILITY FUNCTIONS
def rmse_np(y_true, y_pred):
    """RMSE calculation without sklearn dependency"""
    y_true = np.asarray(y_true, dtype=np.float64)
    y_pred = np.asarray(y_pred, dtype=np.float64)
    return float(np.sqrt(np.mean((y_true - y_pred) ** 2)))

def safe_cpu_measure():
    """CPU measurement """
    try:
        return psutil.cpu_percent(interval=0.1)
    except:
        return 0

def count_transformer_flops(model, sequence_length, vocab_size_approx=32000):
    """
    Estimate FLOPS for Transformer-based model (Chronos)
    Based on attention mechanism and feed-forward operations
    """
    try:
        # Get model parameters
        total_params = sum(p.numel() for p in model.parameters())

        # Estimate based on typical transformer operations
        # Attention: Q*K^T, softmax, attention*V
        # Feed-forward: 2 linear layers

        # Rough estimation: 2 * params * sequence_length
        estimated_flops = 2 * total_params * sequence_length
        return int(estimated_flops)
    except:
        # Fallback estimation based on model size
        model_size_map = {
            "tiny": 8_000_000,    # ~8M parameters
            "mini": 20_000_000,   # ~20M parameters
            "small": 46_000_000,  # ~46M parameters
            "base": 200_000_000,  # ~200M parameters
        }

        # Extract size from model name
        for size, params in model_size_map.items():
            if size in str(model).lower():
                return int(2 * params * sequence_length)

        return 1_000_000  # Default fallback

def get_model_size_mb(model):
    """Estimate model size in MB"""
    try:
        total_params = sum(p.numel() for p in model.parameters())
        # Assuming float32: 4 bytes per parameter
        size_mb = (total_params * 4) / (1024 ** 2)
        return size_mb
    except:
        return 0

# 4. DATA VERIFICATION
print(" Data verification...")
try:
    df
    print(f"DataFrame loaded: {df.shape}")
except NameError:
    df = pd.read_csv("histo_trafic_cleaned.csv", parse_dates=["tstamp"])
    print(f"DataFrame loaded from CSV: {df.shape}")

# Check required columns
required_cols = {"secteur", "tstamp", "trafic_mbps"}
missing = required_cols - set(df.columns)
if missing:
    raise ValueError(f"Missing columns in df: {missing}")

print(f"Columns: {list(df.columns)}")
print(f"Number of unique sectors: {df['secteur'].nunique()}")

# 5. MODEL CONFIGURATION
models = [
    "amazon/chronos-bolt-tiny",
    "amazon/chronos-bolt-mini",
    "amazon/chronos-bolt-small",
    "amazon/chronos-bolt-base",
]

DEVICE_MAP = "cuda" if torch.cuda.is_available() else "cpu"
USE_BF16 = (DEVICE_MAP == "cuda")

print(f"\n🔧 Configuration:")
print(f"   • Device: {DEVICE_MAP}")
print(f"   • BF16: {USE_BF16}")
print(f"   • Models to test: {len(models)}")

# 6. MAIN CHRONOS ANALYSIS FUNCTION
def run_bolt_for_model(model_name: str, df: pd.DataFrame, min_len: int = 6) -> dict:
    """Run Chronos model """
    print(f"\n Loading {model_name} on {DEVICE_MAP}...")

    # Model loading with timing
    load_start = time.perf_counter()
    cpu_before_load = safe_cpu_measure()

    pipe = BaseChronosPipeline.from_pretrained(
        model_name,
        device_map=DEVICE_MAP,
        torch_dtype=(torch.bfloat16 if USE_BF16 else None),
    )

    load_time = time.perf_counter() - load_start
    cpu_after_load = safe_cpu_measure()
    model_device = next(pipe.model.parameters()).device

    # Model info
    model_size_mb = get_model_size_mb(pipe.model)
    print(f"   • Model size: {model_size_mb:.1f} MB")
    print(f"   • Load time: {load_time:.3f}s")

    # Quantiles & median index
    quantiles = getattr(pipe, "quantiles", [0.1,0.2,0.3,0.4,0.5,0.6,0.7,0.8,0.9])
    median_idx = quantiles.index(0.5) if 0.5 in quantiles else 4

    results = []
    inference_times = []
    cpu_measurements = []
    flops_list = []

    grouped = df.groupby("secteur")
    total_sectors = len(grouped)
    processed = 0
    errors = 0

    print(f"   • Processing {total_sectors} sectors...")

    for sector, data in grouped:
        processed += 1

        if processed % 100 == 0 or processed in [1, 10, 50]:
            print(f"     Progress: {processed}/{total_sectors} ({processed/total_sectors*100:.1f}%)")

        y = data.sort_values("tstamp")["trafic_mbps"].values.astype(np.float32)
        if len(y) < (min_len + 1):
            continue

        true_last = float(y[-1])
        history = y[:-1]

        # Prepare context
        context = torch.tensor(history, dtype=torch.float32).to(model_device)

        try:
            # INFERENCE
            cpu_before_inf = safe_cpu_measure()
            inference_start = time.perf_counter()

            y_hat = pipe.predict(
                context=context,
                prediction_length=1,
                limit_prediction_length=True,
            )  # (1, 9, 1)

            inference_time = time.perf_counter() - inference_start
            cpu_after_inf = safe_cpu_measure()

            # Extract prediction (median quantile)
            pred_last = float(y_hat[0, median_idx, 0].item())

            # Calculate metrics
            rmse_val = rmse_np([true_last], [pred_last])

            # FLOPS estimation
            sequence_len = len(history)
            flops = count_transformer_flops(pipe.model, sequence_len)

            # CPU power measurement
            avg_cpu_inf = (cpu_before_inf + cpu_after_inf) / 2

            # Store results
            results.append({
                "sector": sector,
                "actual_value": true_last,
                "predicted_value": pred_last,
                "RMSE": rmse_val,
                "inference_time_s": inference_time,
                "FLOPS_estimated": flops,
                "cpu_inference_percent": avg_cpu_inf,
                "sequence_length": sequence_len,
                "n_observations": len(y)
            })

            inference_times.append(inference_time)
            cpu_measurements.append(avg_cpu_inf)
            flops_list.append(flops)

        except Exception as e:
            errors += 1
            if errors <= 3:  # Show first 3 errors
                print(f"     Error sector {sector}: {str(e)[:40]}...")

            results.append({
                "sector": sector,
                "actual_value": true_last,
                "predicted_value": None,
                "RMSE": None,
                "inference_time_s": None,
                "FLOPS_estimated": None,
                "cpu_inference_percent": None,
                "sequence_length": len(history),
                "n_observations": len(y),
                "error": str(e)
            })

    # Create results DataFrame
    df_results = pd.DataFrame(results)
    successful = df_results.dropna(subset=["RMSE"])

    # Calculate global metrics
    if not successful.empty:
        global_rmse = rmse_np(successful["actual_value"].values,
                            successful["predicted_value"].values)
        avg_inference_time = successful["inference_time_s"].mean()
        avg_flops = successful["FLOPS_estimated"].mean()
        avg_cpu = successful["cpu_inference_percent"].mean()
    else:
        global_rmse = None
        avg_inference_time = None
        avg_flops = None
        avg_cpu = None

    print(f"   Results: {len(successful)} successful predictions")
    print(f"    Global RMSE: {global_rmse:.4f}" if global_rmse else "    No successful predictions")
    print(f"    Avg inference time: {avg_inference_time:.6f}s" if avg_inference_time else "")


# Show best/worst predictions like other models
    if not successful.empty:
        successful_sorted = successful.sort_values("RMSE")
        print(f"\n    Top 5 best predictions (lowest RMSE):")
        best_5 = successful_sorted[["sector", "actual_value", "predicted_value", "RMSE", "inference_time_s"]].head()
        for _, row in best_5.iterrows():
            print(f"      {row['sector']}: Real={row['actual_value']:.2f}, Pred={row['predicted_value']:.2f}, RMSE={row['RMSE']:.4f}, Time={row['inference_time_s']:.6f}s")

        print(f"\n   Top 5 worst predictions (highest RMSE):")
        worst_5 = successful_sorted[["sector", "actual_value", "predicted_value", "RMSE", "inference_time_s"]].tail()
        for _, row in worst_5.iterrows():
            print(f"      {row['sector']}: Real={row['actual_value']:.2f}, Pred={row['predicted_value']:.2f}, RMSE={row['RMSE']:.4f}, Time={row['inference_time_s']:.6f}s")

    # Summary for comparison
    model_summary = {
        "model": model_name.split("/")[-1],  # Extract model name
        "global_rmse": global_rmse,
        "avg_inference_time_s": avg_inference_time,
        "model_load_time_s": load_time,
        "avg_flops": avg_flops,
        "avg_power_cpu_percent": avg_cpu,
        "model_size_mb": model_size_mb,
        "n_predictions": len(successful),
        "success_rate_percent": len(successful)/(len(successful)+errors)*100,
        "device": str(model_device),
        "bf16_enabled": USE_BF16
    }

    return {
        "results_df": df_results.sort_values("RMSE", na_position="last"),
        "summary": model_summary,
        "successful_df": successful
    }

# 7. RUN ALL CHRONOS MODELS
print(f"\n🎯 Starting analysis of {len(models)} Chronos models...")

all_results = {}
all_summaries = {}

for i, model_name in enumerate(models):
    print(f"\n{'='*60}")
    print(f"📈 MODEL {i+1}/{len(models)}: {model_name}")
    print(f"{'='*60}")

    try:
        result = run_bolt_for_model(model_name, df)

        # Store results
        all_results[model_name] = result["results_df"]
        all_summaries[model_name] = result["summary"]

        # Save individual CSV
        model_short = model_name.replace("/", "_")
        csv_filename = f"Chronos_{model_short}_complete_results.csv"
        result["results_df"].to_csv(csv_filename, index=False)
        print(f"   💾 Saved: {csv_filename}")

    except Exception as e:
        print(f"    Failed to run {model_name}: {e}")
        all_summaries[model_name] = {
            "model": model_name.split("/")[-1],
            "global_rmse": None,
            "error": str(e)
        }

# 8. FINAL COMPARISON AND RESULTS
print(f"\n{'='*60}")
print(" CHRONOS BOLT MODELS - FINAL COMPARISON")
print(f"{'='*60}")

# Create comparison DataFrame
comparison_data = []
for model_name, summary in all_summaries.items():
    if summary.get("global_rmse") is not None:
        comparison_data.append({
            "Model": summary["model"],
            "Global_RMSE": f"{summary['global_rmse']:.4f}",
            "Avg_Inference_Time_s": f"{summary['avg_inference_time_s']:.6f}",
            "Avg_FLOPS": f"{summary['avg_flops']:.0f}",
            "Model_Size_MB": f"{summary['model_size_mb']:.1f}",
            "Success_Rate_%": f"{summary['success_rate_percent']:.1f}",
            "N_Predictions": summary["n_predictions"]
        })

if comparison_data:
    df_comparison = pd.DataFrame(comparison_data)
    print(df_comparison.to_string(index=False))

    # Save comparison
    df_comparison.to_csv("Chronos_models_comparison.csv", index=False)
    print(f"\n Comparison saved: Chronos_models_comparison.csv")

    # Store summaries for final project comparison
    chronos_summaries = all_summaries
    print(f"\n All summaries stored in 'chronos_summaries' for final comparison!")
else:
    print(" No successful model runs for comparison.")

print(f"\n Chronos analysis complete!")

🔍 Data verification...
DataFrame loaded: (24486, 4)
Columns: ['secteur', 'site', 'tstamp', 'trafic_mbps']
Number of unique sectors: 86

🔧 Configuration:
   • Device: cuda
   • BF16: True
   • Models to test: 4

🎯 Starting analysis of 4 Chronos models...

📈 MODEL 1/4: amazon/chronos-bolt-tiny

🚀 Loading amazon/chronos-bolt-tiny on cuda...
   • Model size: 33.0 MB
   • Load time: 1.163s
   • Processing 86 sectors...
     📊 Progress: 1/86 (1.2%)
     📊 Progress: 10/86 (11.6%)
     📊 Progress: 50/86 (58.1%)
   ✅ Results: 86 successful predictions
   ✅ Global RMSE: 8.2450
   ✅ Avg inference time: 0.018732s

   🏆 Top 5 best predictions (lowest RMSE):
      T36870B: Real=5.58, Pred=5.57, RMSE=0.0020, Time=0.017170s
      T70747B: Real=14.65, Pred=14.63, RMSE=0.0193, Time=0.020658s
      T78279B: Real=2.47, Pred=2.39, RMSE=0.0800, Time=0.017307s
      T76995B: Real=6.70, Pred=6.60, RMSE=0.0948, Time=0.017961s
      T78273A: Real=21.79, Pred=21.63, RMSE=0.1587, Time=0.018880s

   ⚠️ Top 5 worst