In [1]:
import torch
import pandas as pd
from datetime import timedelta
from chronos import Chronos2Pipeline
from data_pipelines import data_prep_train, data_prep_test_id
from functions import db_connection
from sklearn.metrics import mean_absolute_percentage_error, root_mean_squared_error
import warnings
warnings.filterwarnings('ignore')

In [2]:
engine = db_connection()
device = "cuda" if torch.cuda.is_available() else "cpu"
dtype = torch.float16 if (device and torch.cuda.get_device_properties(0).total_memory > 6_000_000_000) else torch.float32
# print("device_map:", device, "torch_dtype:", dtype)

Successfully connected to the database!


In [3]:
local_model_path = "models/chronos-2"
pipeline = Chronos2Pipeline.from_pretrained(
    local_model_path,
    device_map=device,
    dtype=dtype    
)

In [4]:
def dedupe_index(df, keep="first"):
    df = df.sort_index()
    return df[~df.index.duplicated(keep=keep)]

In [5]:
def chorons_model_safe(df_train, df_test, target, features, pipeline):
    # Column name containing the values to forecast (energy prices)
    target = ["target"]
    # Number of hours to forecast ahead
    prediction_length = len(df_test)
    # Column identifying different time series (countries/regions)
    id_column = ["id"]
    # Column containing datetime information
    timestamp_column = ["timestamp"]
    # Specific time series to visualize (Germany)
    timeseries_id = "DE"    
    # Add id column to both dataframes
    df_train[id_column[0]] = timeseries_id
    df_test[id_column[0]] = timeseries_id    
    # Reset index
    df_train = df_train.reset_index()
    df_test = df_test.reset_index()    
    # Rename columns
    df_train.rename(columns={'ds': 'timestamp', 'y': 'target'}, inplace=True)
    # print(f"Training from {df_train.timestamp.min()} to {df_train.timestamp.max()}")
    df_test.rename(columns={'ds': 'timestamp', 'y': 'target'}, inplace=True)  
    # print(f"Predicting from {df_test.timestamp.min()} to {df_test.timestamp.max()}")  
    df_train = df_train[id_column + timestamp_column + target + features]
    # df_test = df_test[id_column + timestamp_column + features]  # Fixed: was df_train, should be df_test
    pred_df = pipeline.predict_df(
        df_train,
        future_df=df_test[id_column + timestamp_column + features], 
        prediction_length=prediction_length,
        quantile_levels=[0.1, 0.5, 0.9], 
        id_column=id_column[0],
        timestamp_column=timestamp_column[0],
        target=target,
    )
    pred_df['y_true'] = df_test[target]
    return pred_df

In [6]:
start_date = '2025-12-31'
# ((pd.Timestamp.now().date())-pd.Timedelta(days=30)).strftime('%Y-%m-%d')
# end_date = start_date
end_date = ((pd.Timestamp.now().date())-pd.Timedelta(days=2)).strftime('%Y-%m-%d')
model_id = 202
pred_dates = pd.date_range(start=start_date, end=end_date, freq='D')
lby = 0
lbm = 4
# lbs = [(0,6),(0,9),(1,0),(1,3),(1,6),(1,9),(2,0),(3,0),(4,0)]
sp_id = 2
target = ['y']
features = [
    # 'temp',
    'hi',
    'is_day_of_week',
    # 'is_weekend',
    'normal_holiday',
    # 'special_day',
    # 'is_day_before_holiday',
    # 'is_day_after_holiday',
    # 'nh_dow_interaction', 
    # 'sd_dow_interaction',
    # 'minute_sin','minute_cos',
    # 'hour_sin','hour_cos',
    # 'day_of_week_sin','day_of_week_cos',
    # 'month_sin','month_cos'
]
metrics = pd.DataFrame()
for pred_date in pred_dates:
# for lby,lbm in lbs:
    tdate = pred_date + timedelta(days=1)
    raw_train = data_prep_train(input_date=tdate, lby=lby, lbm=lbm, sp_id=2,hrs_end=23,engine=engine)
    tdate = pred_date + timedelta(days=1)
    raw_test  = data_prep_test_id(input_date=tdate, fcwt='t_forecasted_weather',sp_id=4,hrs_start=0,engine=engine)
    df_train_in = dedupe_index(raw_train).asfreq("15min").round(2)
    df_test_in  = dedupe_index(raw_test).asfreq("15min").round(2)    
    preds = chorons_model_safe(df_train_in, df_test_in, target, features, pipeline=pipeline)
    preds = preds[-96:]  
    preds = preds[['timestamp','predictions','y_true']]
    # print(preds.head(2))
    # print(preds.tail(2))
    # print(preds.shape)
    mape = mean_absolute_percentage_error(y_true=preds['y_true'],y_pred=preds['predictions'])*100
    rmse = root_mean_squared_error(y_true=preds['y_true'],y_pred=preds['predictions'])
    metrics = pd.concat([metrics,pd.DataFrame(
        {'date': [tdate.strftime('%Y-%m-%d')],
        'mape': [round(mape, 2)],
        'rmse': [round(rmse, 2)]})],axis=0,ignore_index=True)
    print(tdate.strftime('%Y-%m-%d'),round(mape,2),round(rmse,2))
# print(f'For {len(metrics)} days avg Mape is {round(metrics['mape'].mean(),2)}')
# print(f'For {len(metrics)} days avg RMSE is {round(metrics['rmse'].mean(),2)}') 
    

2026-01-01 4.71 65.74
2026-01-02 2.1 31.11
2026-01-03 0.62 9.07
2026-01-04 1.79 24.35
2026-01-05 1.16 16.97
2026-01-06 0.99 16.54
2026-01-07 1.18 18.08
2026-01-08 1.31 18.16
2026-01-09 0.79 10.38
2026-01-10 2.4 35.48
2026-01-11 0.94 12.5
2026-01-12 1.4 19.78
2026-01-13 3.78 55.97
2026-01-14 2.04 36.36
2026-01-15 3.9 56.96
2026-01-16 2.38 40.35
2026-01-17 1.62 26.51
2026-01-18 2.54 33.86
2026-01-19 1.76 24.79
2026-01-20 1.64 24.08
2026-01-21 2.47 36.08
2026-01-22 0.74 12.39
2026-01-23 2.08 29.68
2026-01-24 3.41 46.9
2026-01-25 1.27 17.08
2026-01-26 8.63 124.98


In [7]:
metrics['date'] = pd.to_datetime(metrics['date'])
metrics['month'] = metrics['date'].dt.month
metrics = metrics.round(2)
metrics.groupby('month').agg(
    no_of_days=('date', 'count'),   # or any column that exists
    mape_mean=('mape', 'mean'),
    rmse_mean=('rmse', 'mean')
).round(2)

Unnamed: 0_level_0,no_of_days,mape_mean,rmse_mean
month,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1
1,26,2.22,32.47


In [None]:
from datetime import datetime, timedelta
import numpy as np
start_date = ((pd.Timestamp.now().date())-pd.Timedelta(days=0)).strftime('%Y-%m-%d')
# end_date = start_date
end_date = ((pd.Timestamp.now().date())-pd.Timedelta(days=0)).strftime('%Y-%m-%d')
model_id = 500
pred_dates = pd.date_range(start=start_date, end=end_date, freq='D')
lby = 0
lbm = 6
lbs = [(5,0),(4,0),(3,0),(2,0),(1,0),(0,9),(0,6),(0,4),(0,3)]
sp_id = 2
target = ['y']
features = [
    # 'temp',
    'hi',
    # 'is_day_of_week',
    # 'is_weekend',
    'normal_holiday',
    # 'special_day',
    # 'is_day_before_holiday',
    # 'is_day_after_holiday',
    # 'nh_dow_interaction', 
    # 'sd_dow_interaction',
    # 'minute_sin','minute_cos',
    # 'hour_sin','hour_cos',
    # 'day_of_week_sin','day_of_week_cos',
    # 'month_sin','month_cos'
]
metrics = pd.DataFrame()
for pred_date in pred_dates:
    # for lby,lbm in lbs:
        raw_train = data_prep_train(input_date=pred_date, lby=lby, lbm=lbm, sp_id=2,hrs_end=5,engine=engine)
        raw_test  = data_prep_test(input_date=pred_date, fcwt='t_forecasted_weather',sp_id=4,hrs_start=6,engine=engine)
        df_train_in = dedupe_index(raw_train).asfreq("15min").round(2)
        df_test_in  = dedupe_index(raw_test).asfreq("15min").round(2)    
        preds1 = chorons_model_safe(df_train_in, df_test_in, target, features, pipeline=pipeline)
        now = datetime.now()
        preds1['block'] = (np.arange(len(preds1)) % 96) + 1
        last_actual_time = now - timedelta(minutes=23)
        # Convert to block number
    # 00:15 = block 1, 00:30 = block 2, ..., 23:45 = block 96
        last_block = (last_actual_time.hour * 60 + last_actual_time.minute) // 15
        # Slice block 1 to last_block
        preds = preds1[preds1['timestamp'] >= start_date].copy()


        # preds = preds[-96:]  
        mape = mean_absolute_percentage_error(y_true=preds['y_true'],y_pred=preds['predictions'])*100
        rmse = root_mean_squared_error(y_true=preds['y_true'],y_pred=preds['predictions'])
        metrics = pd.concat([metrics,pd.DataFrame(
            {'date': [pred_date.strftime('%Y-%m-%d')],
            'mape': [round(mape, 2)],
            'rmse': [round(rmse, 2)]})],axis=0,ignore_index=True)
        print('lby',lby,'lbm',lbm,'till',preds.timestamp.max(),round(mape,2),round(rmse,2))

# print(len(preds))
# print(f'For {len(metrics)} days avg Mape is {round(metrics['mape'].mean(),2)}')
# print(f'For {len(metrics)} days avg RMSE is {round(metrics['rmse'].mean(),2)}') 
    