## Comparison between AutoARIMA and Amazon Chronos


 ### Step 1: Load the Dataset

In [None]:
import pandas as pd
import numpy as np

# Load and combine Date + Time as datetime index
df = pd.read_csv(
    'household_power_consumption.txt',
    sep=';',
    parse_dates={'datetime': ['Date', 'Time']},
    infer_datetime_format=True,
    na_values='?',
    index_col='datetime',
    low_memory=False
)

# Convert all numeric columns to float32
df = df.astype('float32')
df.sort_index(inplace=True)
df

### Step 2: Resample to Hourly Frequency

We’ll resample the data to hourly frequency by taking the mean of all 1-minute readings per hour.


In [None]:
# Resample to hourly frequency using mean aggregation
df_hourly = df.resample('H').mean()

# Focus only on the target variable
df_hourly = df_hourly[['Global_active_power']]


### Step 3: Handle Missing Values

In [None]:
# Fill missing values via linear interpolation
df_hourly['Global_active_power'] = df_hourly['Global_active_power'].interpolate(method='linear')

# Drop remaining NaNs if any
df_hourly.dropna(inplace=True)


### Step 4: Train/Test Split
Let’s split using 80% train / 20% test, preserving time order.

In [None]:
# 80% training / 20% test split
split_index = int(len(df_hourly) * 0.8)
train = df_hourly.iloc[:split_index]
test = df_hourly.iloc[split_index:]

# Separate into target arrays
y_train = train['Global_active_power']
y_test = test['Global_active_power']


### Step 5: Preparing data for Chronos & AutoARIMA

In [None]:
chronos_df = df_hourly.copy().reset_index()
chronos_df['item_id'] = 'household_1'
chronos_df.columns = ['timestamp', 'target_value', 'item_id']

# Reorder columns
chronos_df = chronos_df[['item_id', 'timestamp', 'target_value']]

# Save for HuggingFace Chronos interface or tokenization
chronos_df.to_csv('chronos_input.csv', index=False)


### Forecasting using Amazon Chronos

In [None]:
import torch
from chronos import BaseChronosPipeline

# Load pre-trained Chronos model (change to chronos-bolt-small for faster inference)
pipeline = BaseChronosPipeline.from_pretrained(
    "amazon/chronos-t5-large",  
    device_map="cuda",  # use "cpu" if no GPU
    torch_dtype=torch.bfloat16,  # torch.float32 if bfloat16 unsupported
)


In [None]:
# Load the preprocessed CSV used earlier
df = pd.read_csv("chronos_input.csv")

# Extract the numeric values
series = df[df['item_id'] == 'household_1']['target_value'].values

# Define input context and prediction length
prediction_length = 24
max_possible = len(series) - prediction_length
context_tensor = torch.tensor(series[-(max_possible + prediction_length):-prediction_length])

# Run prediction
quantiles, mean = pipeline.predict_quantiles(
    context=context_tensor,
    prediction_length=prediction_length,
    quantile_levels=[0.1, 0.5, 0.9],
)


In [None]:
import matplotlib.pyplot as plt
import numpy as np

# === CONFIG ===
plot_context_length = 96         # Show last 96 hours of context
prediction_length = 24           # Forecast horizon

# === CONVERT TO NUMPY ARRAYS ===
context_np = context_tensor.detach().cpu().numpy()
mean_np = mean[0].detach().cpu().numpy()
p10 = quantiles[0, :, 0].detach().cpu().numpy()
p90 = quantiles[0, :, 2].detach().cpu().numpy()

# === GROUND TRUTH ===
# Ensure y_test only contains the last 24 values of the full series
# (Replace 'series' with your actual NumPy array of the time series)
y_test = series[-prediction_length:]

# === CREATE X AXES ===
context_display = context_np[-plot_context_length:]  # show only last 96 hours
x_context = np.arange(plot_context_length)
x_forecast = np.arange(plot_context_length, plot_context_length + prediction_length)

# === PLOT ===
plt.figure(figsize=(12, 6))

# Plot last 96 hours of context
plt.plot(x_context, context_display, label='Training Context (last 96h)', color='blue')

# Plot actual values
plt.plot(x_forecast, y_test, label='Actual', color='black')

# Plot Chronos mean forecast
plt.plot(x_forecast, mean_np, label='Chronos Forecast (mean)', color='green')

# Plot P10–P90 interval
plt.fill_between(x_forecast, p10, p90, color='lightgreen', alpha=0.5, label='P10–P90 Interval')

# Styling
plt.title("Chronos Forecast vs. Actual (24-Hour Horizon, 96h Context)")
plt.xlabel("Hour")
plt.ylabel("Global Active Power (kW)")
plt.grid(True)
plt.legend()
plt.tight_layout()
plt.show()


In [None]:
from sklearn.metrics import mean_squared_error, mean_absolute_error
import numpy as np

# Convert Chronos mean forecast to NumPy
chronos_pred = mean[0].detach().cpu().numpy()  # shape: [prediction_length]

# Ensure ground truth is a NumPy array (from earlier test split)
# y_test = series[-prediction_length:]  # already defined in Part 5

# Compute metrics
rmse = np.sqrt(mean_squared_error(y_test, chronos_pred))
mae = mean_absolute_error(y_test, chronos_pred)
mape = np.mean(np.abs((y_test - chronos_pred) / y_test)) * 100

# Display
print(f"Chronos Forecast Accuracy:")
print(f"RMSE: {rmse:.4f}")
print(f"MAE:  {mae:.4f}")
print(f"MAPE: {mape:.2f}%")


### Forecasting using AutoARIMA

In [None]:
import pandas as pd
import numpy as np
from pmdarima import auto_arima
from sklearn.metrics import mean_squared_error, mean_absolute_error
import matplotlib.pyplot as plt

# === CONFIGURATION ===
prediction_length = 24
plot_context_length = 96
max_context_window = 1024
csv_path = 'chronos_input.csv'

# === STEP 1: Load Chronos CSV ===
df = pd.read_csv(csv_path)
series = df[df['item_id'] == 'household_1']['target_value'].values

# === STEP 2: Split Series Properly ===
y_train = series[-(prediction_length + max_context_window):-prediction_length]
y_test = series[-prediction_length:]

# === STEP 3: Train AutoARIMA ===
model = auto_arima(
    y_train,
    start_p=0, max_p=5,       # AR terms
    start_q=0, max_q=5,       # MA terms
    d=None,                   # Let it determine differencing
    seasonal=True,
    start_P=0, max_P=2,
    start_Q=0, max_Q=2,
    D=None,
    m=24,                     # Daily seasonality for hourly data
    stepwise=False,           # Enable full grid search
    error_action='ignore',
    suppress_warnings=True,
    method='lbfgs',
    trace=True
)


# === STEP 4: Forecast ===
forecast = model.predict(n_periods=prediction_length)

# === STEP 5: Evaluation ===
rmse = np.sqrt(mean_squared_error(y_test, forecast))
mae = mean_absolute_error(y_test, forecast)
mape = np.mean(np.abs((y_test - forecast) / y_test)) * 100

print("AutoARIMA Forecast Accuracy:")
print(f"RMSE: {rmse:.4f}")
print(f"MAE:  {mae:.4f}")
print(f"MAPE: {mape:.2f}%")

# === STEP 6: Plot (last 96h of training + forecast) ===
context_display = y_train[-plot_context_length:]
x_context = np.arange(plot_context_length)
x_forecast = np.arange(plot_context_length, plot_context_length + prediction_length)

plt.figure(figsize=(12, 6))
plt.plot(x_context, context_display, label='Training Context (last 96h)', color='blue')
plt.plot(x_forecast, y_test, label='Actual', color='black')
plt.plot(x_forecast, forecast, label='AutoARIMA Forecast', color='red')
plt.title("AutoARIMA Forecast vs. Actual (24-Hour Horizon, 1024h Context)")
plt.xlabel("Hour")
plt.ylabel("Global Active Power (kW)")
plt.grid(True)
plt.legend()
plt.tight_layout()
plt.show()


In [None]:
context_tensor.size()