<a href="https://colab.research.google.com/github/souhirbenamor/EPF/blob/main/2025_Lasso_RegressionLARX_model_Bridging_paper.ipynb" target="_parent"><img src="https://colab.research.google.com/assets/colab-badge.svg" alt="Open In Colab"/></a>

All Experiments LARX

In [None]:
import os
import numpy as np
import pandas as pd
import matplotlib.pyplot as plt
from sklearn.linear_model import Lasso
from sklearn.preprocessing import MinMaxScaler
from sklearn.metrics import mean_squared_error, mean_absolute_error
import math
import csv

# --- Reproducibility ---
os.environ['PYTHONHASHSEED'] = '0'
np.random.seed(42)

# -------------------------------------------------------------
# 1. Load & Preprocess Data
# -------------------------------------------------------------
DATA_PATH = 'EPF_2015_2020.xlsx'
data = pd.read_excel(DATA_PATH)
data.drop(data.loc[data['Date'].duplicated()].index, inplace=True)
data['Date'] = pd.to_datetime(data['Date'])
data.set_index('Date', inplace=True)
data = data[data.index >= '2015-01-01']

# -------------------------------------------------------------
# 2. Define Experiments
# -------------------------------------------------------------
experiments = [
    (['Demand Day-ahead DE', 'Wind and PV Day ahead (MWh/h)', 'Gas', 'Coal', 'CO2'], 'LARX'),
    (['MCP','Demand Day-ahead DE', 'Wind and PV Day ahead (MWh/h)', 'Gas', 'Coal', 'CO2'], 'LARX_ESM+'),
    (['MCP'], 'LARX_ESM')
]

train_years = [2015, 2016, 2017, 2018]
test_years = [y for y in sorted(data.index.year.unique()) if y not in train_years]

train_data = data[data.index.year.isin(train_years)]
test_data  = data[data.index.year.isin(test_years)]

# -------------------------------------------------------------
# 3. Experiment Loop
# -------------------------------------------------------------
all_forecasts = pd.DataFrame()
metrics = []

alpha_value = 0.001

test_days = pd.to_datetime(test_data.index.date).unique()
test_days = np.sort(test_days)

for features_list, label in experiments:
    print(f"\nStarting experiment: {label}")

    # Initialize training history
    updated_history = train_data.copy()
    
    forecast_results = pd.DataFrame()

    for day in test_days:
        day_mask = test_data.index.normalize() == pd.Timestamp(day)
        X_forecast_raw = test_data.loc[day_mask, features_list].copy()

        if X_forecast_raw.empty or len(X_forecast_raw) < 24:
            print(f"Insufficient data for {day}. Skipping.")
            continue

        if X_forecast_raw.isna().any().any():
            X_forecast_raw = X_forecast_raw.fillna(method='ffill').fillna(method='bfill')

        X_hist = updated_history[features_list].copy()
        y_hist = updated_history['Price'].copy()

        scaler_X = MinMaxScaler(feature_range=(-1,1))
        scaler_y = MinMaxScaler(feature_range=(-1,1))

        X_hist_scaled = pd.DataFrame(scaler_X.fit_transform(X_hist),
                                     index=X_hist.index,
                                     columns=X_hist.columns)
        y_hist_scaled = pd.DataFrame(scaler_y.fit_transform(y_hist.values.reshape(-1,1)),
                                     index=y_hist.index,
                                     columns=['Price'])

        lasso_model = Lasso(alpha=alpha_value, random_state=42)
        lasso_model.fit(X_hist_scaled, y_hist_scaled.values.ravel())

        X_forecast_scaled = pd.DataFrame(scaler_X.transform(X_forecast_raw),
                                         index=X_forecast_raw.index,
                                         columns=X_forecast_raw.columns)

        y_forecast_scaled = lasso_model.predict(X_forecast_scaled)
        y_forecast = scaler_y.inverse_transform(y_forecast_scaled.reshape(-1,1))

        df_forecast = pd.DataFrame({'Price': y_forecast.ravel()},
                                   index=X_forecast_raw.index)

        new_training_rows = X_forecast_raw.copy()
        new_training_rows['Price'] = df_forecast['Price']
        updated_history = pd.concat([updated_history, new_training_rows]).sort_index()

        forecast_results = pd.concat([forecast_results, df_forecast])

    # Align forecasts with actuals
    forecast_results = forecast_results.sort_index()
    actual_test = test_data[['Price']].loc[forecast_results.index]
    results = actual_test.copy()
    results[label] = forecast_results['Price']

    # Collect forecasts
    all_forecasts = all_forecasts.join(results[[label]], how='outer')

    # Metrics
    rmse = math.sqrt(mean_squared_error(actual_test['Price'], forecast_results['Price']))
    mae = mean_absolute_error(actual_test['Price'], forecast_results['Price'])
    metrics.append({'Label': label, 'RMSE': rmse, 'MAE': mae})

# -------------------------------------------------------------
# 4. Save Combined Results
# -------------------------------------------------------------
metrics_df = pd.DataFrame(metrics).set_index('Label')

with pd.ExcelWriter('Combined_LARX_Experiments_2015_2020.xlsx') as writer:
    all_forecasts.to_excel(writer, sheet_name='Forecasts')
    metrics_df.to_excel(writer, sheet_name='Metrics')

print("\nDone! Saved Combined_LARX_Experiments.xlsx with LARX, LARX_ESM+, LARX_ESM")


Starting experiment: LARX
Insufficient data for 2021-01-01T00:00:00.000000000. Skipping.

Starting experiment: LARX_ESM+
Insufficient data for 2021-01-01T00:00:00.000000000. Skipping.

Starting experiment: LARX_ESM
Insufficient data for 2021-01-01T00:00:00.000000000. Skipping.

Done! Saved Combined_LARX_Experiments.xlsx with LARX, LARX_ESM+, LARX_ESM
