## Experimentation
Backtesting using expanding window approach with multiple models

In [None]:
from mlforecast import MLForecast
from mlforecast.target_transforms import Differences
from mlforecast.utils import PredictionIntervals
from window_ops.expanding_window import expanding_mean
from lightgbm import LGBMRegressor
from xgboost import XGBRegressor
from sklearn.linear_model import Ridge, Lasso, LinearRegression
from sklearn.neural_network import MLPRegressor
from sklearn.ensemble import RandomForestRegressor
from utilsforecast.plotting import plot_series


import pandas as pd
import datetime
import numpy as np
import os
import requests
import statistics import mean

In [None]:
# example
df = pd.read_csv('data/data.csv')
ts = df[['period', 'value']]
ts['period'] = pd.to_datetime(ts['period'])
ts = ts.sort_values('period')

ts = ts.rename(columns={'period': 'ds', 'value': 'y'})


end = ts['ds'].max()
start = end - datetime.timedelta(hours = 24*31*25)
ts = ts[(ts['ds'] >= start)]
os.environ['NIXTLA_ID_AS_COL'] = '1'

# define models
ml_models = {
    'lightgbm': LGBMRegressor(n_estimators=500, verbosity=-1),
    'xgboost': XGBRegressor(),
    'ridge': Ridge(),
    'lasso': Lasso(),
    'linear_regression': LinearRegression(),
    'mlp': MLPRegressor(max_iter=500),
    'random_forest': RandomForestRegressor()
}

mlf = MLForecast(
    models=ml_models,
    freq='h',
    lags=list(range(1,24)),
    date_features=['month', 'day', 'dayofweek', 'week', 'hour', # seasonal features
                   'quarter'])

# window setting
partitions =10
step_size = 24
h=72

# prediction intervals
n_windows = 5
method = 'conformal_distribution'
pi = PredictionIntervals(h=h,
                         n_windows= n_windows,
                         method=method)
levels = [95]

# backtesting with expanding window
bkt_df = mlf.cross_validation(
    df = ts,
    h = h,
    step_size = step_size,
    n_windows=partitions,
    prediction_intervals=pi,
    levels=levels           
)


partitions_labels = bkt_df["cutoff"].unique()
ts_sub = ts[ts["ds"] > ts["ds"].max() - datetime.timedelta(hours=24 * 7)]

# Create subplots with four rows (one for each partition)
from plotly.subplots import make_subplots
fig = make_subplots(rows=4, cols=1, subplot_titles=["Partitions: " + str(i) for i in partitions_labels])

r = 1  

for i in partitions_labels:
    if r == 1:
        showlegend = True
    else:
        showlegend = False
    bkt_sub = bkt_df[bkt_df["cutoff"] == i]
    # Add actual values to the plot
    fig.append_trace(go.Scatter(x=ts_sub["ds"], y=ts_sub["y"], legendgroup="actual", showlegend=showlegend, 
                                mode='lines', name='Actual', line=dict(color='#023047', width=2)), row=r, col=1)
    # Add k-nearest neighbors predictions
    fig.append_trace(go.Scatter(x=bkt_sub["ds"], y=bkt_sub["knn"], mode='lines', name='k-nearest neighbors', 
                                legendgroup="knn", showlegend=showlegend, line=dict(color='#2a9d8f', width=1.5, dash="dash")), row=r, col=1)
    # Add Multi-layer Perceptron predictions
    fig.append_trace(go.Scatter(x=bkt_sub["ds"], y=bkt_sub["mlp"], mode='lines', name='Multi-layer Perceptron', 
                                legendgroup="mlp", showlegend=showlegend, line=dict(color='#0077b6', width=1.5, dash="dot")), row=r, col=1)
    # Add ElasticNet predictions
    fig.append_trace(go.Scatter(x=bkt_sub["ds"], y=bkt_sub["enet"], mode='lines', name='ElasticNet', 
                                legendgroup="enet", showlegend=showlegend, line=dict(color='#ffc8dd', width=1.5, dash="dot")), row=r, col=1)
    r = r + 1 

fig.update_layout(height=500)
fig.show()

### Model Scoring

In [None]:
# reformart backtesting results
cutoff = bkt_df["cutoff"].unique()
partitions_mapping = pd.DataFrame({
    'cutoff': cutoff,
    'partition': range(1, len(cutoff)+1)
})

model_label = ['lightgbm', 'xgboost', 'ridge', 'lasso', 'linear_regression', 'mlp', 'random_forest']
model_name = ['LGBMRegressor', 'XGBRegressor', 'Ridge', 'Lasso', 'LinearRegression', 'MLPRegressor', 'RandomForestRegressor']
models_mapping = pd.DataFrame({
    'model_label': model_label,
    'model_name': model_name
})

bkt_long = bkt_df.melt(
    bkt_df,
    id_vars = ['unique_id', 'ds', 'cutoff', 'y'],
    value_vars = model_label + [f'{model}-lo-95' for model in model_label] + [f'{model}-hi-95' for model in model_label],
    var_name = 'model_label',
    value_name = 'value'
)

def split_model_confidence(model_name):
    if '-lo-95' in model_name:
        return model_name.replace('-lo-95', ''), 'lower'
    elif '-hi-95' in model_name:
        return model_name.replace('-hi-95', ''), 'upper'
    else:
        return model_name, 'forecast'
    
bkt_long['model_label'],\
bkt_long['type'] = zip(*bkt_long['model_label'].map(split_model_confidence)) 

bkt_long = bkt_long.merge(partitions_mapping, on='cutoff', how='left')
bkt =(bkt_long
      .pivot(index=['unique_id', 'ds', 'cutoff', 'y', 'partition', 'model_label'],
                   columns='type',
                   values='value')
      .reset_index()   
      .merge(models_mapping, on='model_label', how='left')
)

# model evaluation
def mape(y_true, y_pred):
    mape = mean(abs((y_true - y_pred) / y_true))
    return mape

def rmse(y_true, y_pred):
    rmse = sqrt(mean((y_true - y_pred) ** 2))
    return rmse

def coverage(y_true, lower_bound, upper_bound):
    coverage = mean((y_true >= lower_bound) & (y_true <= upper_bound))
    return coverage

def score(df):
    mape_score = mape(y = df["y"], yhat = df["forecast"])
    rmse_score = rmse(y = df["y"], yhat = df["forecast"])
    coverage_score = coverage(y = df["y"], lower = df["lower"], upper = df["upper"])
    cols = ["mape", "rmse", "coverage"]
    df = pd.Series([mape_score, rmse_score,  coverage_score], index=cols)
    return df

score_df = (bkt
            .groupby(['unique_id', 'model_label', 'model_name', 'partition'])[['unique_id', 
                                                                               'model_label',
                                                                               'model_name',
                                                                               'partition',
                                                                               'y', 
                                                                               'forecast', 'lower', 'upper']]
            .apply(score)
            .reset_index()
)



mlflow workflow
1. Define experiment
2. run experiment
3. log parameters
4. log KPIs
5. compare and analyze experiments results


In [None]:
import mlflow
import datetime
experiment_name = "ml_forecast'
mlflow_path = "./mlruns"

tags = {
    'h':h,
    'step_size':step_size,
    'partitions':partitions,
    'intervals_type': 'ConformalIntervals',
    'intervals_h': h,
    'intervals_n_windows': n_windows,
    'intervals_method': 'conformal_distribution',
    'levels': levels
}

# log backtesting results
try:
    mlflow.create_experiment(name=experiment_name,
                             artifact_location=mlflow_path,
                             tags=tags)
    meta = mlflow.get_experiment_by_name(experiment_name)
    print(f'Set a new experiment {experiment_name}')
    print('Pulling the metadata')
except:
    print(f'Experiment {experiment_name} exists, pulling the metadata')
    meta = mlflow.get_experiment_by_name(experiment_name)

# run time
run_time = datetime.datetime.now().strftime("%Y-%m-%d %H:%M:%S")

for index, row in score_df.iterrows():
    run_name - row['model_label'] + '_' + run_time
    with mlflow.start_run(experiment_id=meta.experiment_id,
                          run_name=run_name,
                          tags= {'type': 'backtesting',
                                 'partition': row['partition'],
                                 'unique_id': row['unique_id'],
                                 'model_label': row['model_label'],
                                 'model_name': row['model_name'],
                                 'run_name': run_name}) as run:
        model_params = ml_models[row['model_label']].get_params()
        model_params['model_name'] = row['model_name']
        model_params['model_label'] = row['model_label']
        model_params['partition'] = row['partition']
        model_params['lags'] = list(range(1,24))
        model_params['date_features'] = ['month', 'day', 'dayofweek', 'week', 'hour']
        mlflow.log_params(model_params)
        mlflow.log_metric('mape', row['mape'])
        mlflow.log_metric('rmse', row['rmse'])
        mlflow.log_metric('coverage', row['coverage'])

In [None]:
# Set the experiment name
experiment_name = "hyperparameter_tuning"
experiment_id = mlflow.create_experiment(experiment_name)

# Loop through the DataFrame
for idx, row in df.iterrows():
  # Start a run
  with mlflow.start_run(experiment_id=experiment_id):
    model_params = ml_models[row["model_label"]].get_params()
    model_params["model_name"] = row["model_name"]
    model_params["model_label"] = row["model_label"]
    model_params["partition"] = row["partition"]
    model_params["lags"] = list(range(1, 24))
    model_params["date_features"] = ["month", "day", "dayofweek", "week", "hour"]
    mlflow.log_params(model_params)
    mlflow.log_metric("mape", row["mape"])
    mlflow.log_metric("rmse", row["rmse"])
    
experiment_name = "hyperparameter_tuning"

# Search MLflow runs
all_results = mlflow.search_runs(experiment_names=[experiment_name])

# Filter for the model with the best MAPE score
best_mape_model = all_results.sort_values("metrics.mape").head(1)

# Print the model
print(best_mape_model[["params.model_name", "metrics.mape"]])

run this in mlflow
mlflow ui

Model evaluation and selection
1. benchmarking
2. residual analysis
3. backtesting analysis

Potential improvements
1. Different models
2. New features
3. Tuning hyperparameters


Since lightgbm performed best, we will proceed with it for hyperparameter tuning and final model training.

Hypothesis
1. Using lower learning rate and,
2. training with more trees will improve performance.

In [None]:
ml_models2 = {
    'lightgbm1': LGBMRegressor(n_estimators=100, learning_rate=0.1),
    'lightgbm2': LGBMRegressor(n_estimators=250, learning_rate=0.1),
    'ligthgbm3': LGBMRegressor(n_estimators=500, learning_rate=0.1),
    'lightgbm4': LGBMRegressor(n_estimators=100, learning_rate=0.05),
    'lightgbm5': LGBMRegressor(n_estimators=250, learning_rate=0.05),
    'lightgbm6': LGBMRegressor(n_estimators=500, learning_rate=0.05),
}



 Lightgbm6 seems good based on backtesting results. You can proceed or continue to tune based on computational resources. 