In [1]:
from google.colab import drive
drive.mount('/content/drive')
import os
os.chdir('/content/drive/MyDrive/Dissertation')

Mounted at /content/drive


###Setup Imports

In [2]:
import sys
!{sys.executable} -m pip install optuna lightgbm xgboost==1.7.5 pandas numpy matplotlib seaborn scikit-learn tensorflow joblib -q

import pandas as pd
import numpy as np
import matplotlib.pyplot as plt
import seaborn as sns
from sklearn.preprocessing import RobustScaler, MinMaxScaler, StandardScaler
from sklearn.metrics import mean_squared_error, mean_absolute_error, r2_score
from sklearn.model_selection import TimeSeriesSplit
import optuna
from optuna.samplers import TPESampler
import xgboost as xgb
import lightgbm as lgb
import tensorflow as tf
from tensorflow.keras.models import Model
from tensorflow.keras.layers import Bidirectional, LSTM, Dense, Dropout, Input
from tensorflow.keras.optimizers import Adam
from tensorflow.keras.callbacks import EarlyStopping, ReduceLROnPlateau
from tensorflow.keras.regularizers import l2
import joblib

# Set random seeds for reproducibility
np.random.seed(42)
tf.random.set_seed(42)

ERROR: Could not install packages due to an OSError: [Errno 2] No such file or directory: 'C:\\Users\\shiha\\OneDrive\\Desktop\\FinalYearProject\\FYProject\\appliance-scheduler\\ml_model\\.venv\\Lib\\site-packages\\tensorflow\\include\\tensorflow\\compiler\\xla\\mlir_hlo\\_virtual_includes\\chlo_legalize_to_hlo_inc_gen\\chlo_legalize_to_hlo\\generated_chlo_legalize_to_hlo.inc'


[notice] A new release of pip is available: 23.2.1 -> 25.1.1
[notice] To update, run: python.exe -m pip install --upgrade pip
  from .autonotebook import tqdm as notebook_tqdm


ModuleNotFoundError: No module named 'tensorflow.python'

####Helper Functions

In [None]:
def load_and_preprocess_data(data):
    # Normalize column
    data.columns = data.columns.str.strip()
    data.columns = data.columns.str.lower()
    # Create mapping from CSV column names to standard names
    mapping = {
        'start date/time': 'StartDateTime',
        'temperature': 'temperature',
        'relative humidity': 'relative_humidity',
        'precipitation': 'precipitation',
        'rain': 'rain',
        'snowfall': 'snowfall',
        'weather code': 'weather_code',
        'wind speed': 'wind_speed',
        'price': 'Price',
        'grid load': 'total_consumption',
        'total generation': 'total_generation',
        'day of week': 'DayOfWeek'
    }
    # Rename columns if present
    new_cols = {}
    for col in data.columns:
        if col in mapping:
            new_cols[col] = mapping[col]
    data.rename(columns=new_cols, inplace=True)

    # Define potential numeric columns
    potential_numeric_cols = [
        'Price', 'total_consumption', 'temperature', 'precipitation', 'rain',
        'snowfall', 'wind_speed', 'relative_humidity', 'weather_code', 'total_generation'
    ]
    # Select only columns that exist in the dataframe
    numeric_cols = [col for col in potential_numeric_cols if col in data.columns]

    # Convert numeric columns
    for col in numeric_cols:
        data[col] = pd.to_numeric(
            data[col].astype(str)
            .str.replace(',', '')  # Remove commas
            .str.replace(' ', '')
            .str.replace('–', '-'),
            errors='coerce'
        )

    # Parse datetime format dd/mm/yyyy HH:MM
    data['StartDateTime'] = pd.to_datetime(data['StartDateTime'], format='%d/%m/%Y %H:%M', errors='coerce')
    data = data.sort_values('StartDateTime').dropna(subset=['StartDateTime']).reset_index(drop=True)

    day_map = {'monday': 0, 'tuesday': 1, 'wednesday': 2, 'thursday': 3,
               'friday': 4, 'saturday': 5, 'sunday': 6}
    if 'dayofweek' in data.columns:
        data['DayOfWeek'] = data['DayOfWeek'].str.strip().str.lower().map(day_map)
        data = data.dropna(subset=['DayOfWeek'])

    # Interpolate numeric columns and fill missing values
    for col in data.columns:
        if data[col].dtype in [np.float64, np.int64]:
            data[col] = data[col].interpolate(method='linear', limit_direction='both')
            data[col] = data[col].ffill().bfill()

    print("NaN counts after preprocessing:", data[numeric_cols].isna().sum())
    return data

def evaluate_model(y_true, y_pred, model_name, return_metrics=False):
    rmse = np.sqrt(mean_squared_error(y_true, y_pred))
    mae = mean_absolute_error(y_true, y_pred)
    r2 = r2_score(y_true, y_pred)
    mape = np.mean(np.abs((y_true - y_pred) / np.clip(y_true, a_min=0.01, a_max=None))) * 100
    smape = 100 / len(y_true) * np.sum(2 * np.abs(y_pred - y_true) / (np.abs(y_true) + np.abs(y_pred) + 1e-8))
    print(f"=== {model_name} Evaluation ===")
    print(f"RMSE: {rmse:.2f}")
    print(f"MAE: {mae:.2f}")
    print(f"R²: {r2:.4f}")
    print(f"MAPE: {mape:.2f}%")
    print(f"SMAPE: {smape:.2f}%")
    if return_metrics:
        return rmse, mae, r2, mape, smape

def plot_predictions(dates, y_true, y_pred, title):
    plt.figure(figsize=(14, 7))
    plt.plot(dates, y_true, label='Actual', color='red', marker='x')
    plt.plot(dates, y_pred, label='Predicted', color='blue', marker='o')
    plt.xlabel('Date and Time')
    plt.ylabel('Price [Euro/MWh]')
    plt.title(title)
    plt.legend()
    plt.xticks(rotation=45)
    plt.tight_layout()
    plt.show()

def engineer_features(data):
    df = data.copy()
    df['Hour'] = df['StartDateTime'].dt.hour
    df['Day'] = df['StartDateTime'].dt.day
    df['DayOfWeek'] = pd.to_numeric(df['DayOfWeek'], errors='coerce')
    df['Hour'] = pd.to_numeric(df['Hour'], errors='coerce')
    # Create cyclic features
    df['Hour_sin'] = np.sin(2 * np.pi * df['Hour'] / 24)
    df['Hour_cos'] = np.cos(2 * np.pi * df['Hour'] / 24)
    # Lag features for Price
    if 'Price' in df.columns:
        df['Lag_Price_1h'] = df['Price'].shift(1)
        df['Lag_Price_24h'] = df['Price'].shift(24)
        df['Price_RollingStd24'] = df['Price'].rolling(window=24, min_periods=1).std()
        df['Lag_Price_1h'] = df['Lag_Price_1h'].interpolate(method='linear', limit_direction='both').ffill().bfill()
        df['Lag_Price_24h'] = df['Lag_Price_24h'].interpolate(method='linear', limit_direction='both').ffill().bfill()
        df['Price_RollingStd24'] = df['Price_RollingStd24'].interpolate(method='linear', limit_direction='both').ffill().bfill()
    else:
        df['Lag_Price_1h'] = np.nan
        df['Lag_Price_24h'] = np.nan
        df['Price_RollingStd24'] = np.nan
    # Rolling features
    df['Rolling_Temp_24h'] = df['temperature'].rolling(window=24, min_periods=1).mean()
    df['Rolling_Wind_24h'] = df['wind_speed'].rolling(window=24, min_periods=1).mean()
    df['Rolling_Load_24h'] = df['total_consumption'].rolling(window=24, min_periods=1).mean()
    return df

####Load and Preprocess Data

In [None]:
historical_data = pd.read_csv('data/merged-data.csv')
future_data = pd.read_csv('data/future-data.csv')
historical_data = load_and_preprocess_data(historical_data)
future_data = load_and_preprocess_data(future_data)

# Historical and future data load
forecast_start_dt = pd.to_datetime('2025-02-10 00:00:00')
forecast_end_dt = pd.to_datetime('2025-02-16 23:00:00')
future_data = future_data[
    (future_data['StartDateTime'] >= forecast_start_dt) &
    (future_data['StartDateTime'] <= forecast_end_dt)
].reset_index(drop=True)

# Engineer features on both datasets
historical_data = engineer_features(historical_data)
future_data = engineer_features(future_data)

dates = historical_data['StartDateTime'].copy()
print("Dates saved. First few entries:\n", dates.head())

###Feature Selection & Correlation Analysis

In [None]:
target = 'Price'
common_features = [
    'temperature', 'precipitation',
  'relative_humidity', 'total_consumption', 'total_generation', 'Hour',
    'Hour_sin', 'Hour_cos',
    'Lag_Price_1h', 'Lag_Price_24h', 'Rolling_Temp_24h',
    'Rolling_Load_24h', 'Price_RollingStd24'
]

# Check columns
for col in common_features + [target]:
    if col not in historical_data.columns:
        print(f"Warning: Column {col} not in historical_data.")
    elif historical_data[col].isna().all():
        print(f"Warning: Column {col} is entirely NaN.")
    elif historical_data[col].nunique() == 1:
        print(f"Warning: Column {col} is constant with value {historical_data[col].iloc[0]}.")

corr_matrix = historical_data[common_features + [target]].corr()
plt.figure(figsize=(12,10))
sns.heatmap(corr_matrix, annot=True, cmap='coolwarm', fmt=".2f")
plt.title("Correlation Heatmap of Features and Target")
plt.show()

# Feature selection
target_corr = corr_matrix[target].drop(target)
selected_features = target_corr[target_corr.abs() > 0.1].index.tolist()
print("Features with absolute correlation > 0.1 with target:", selected_features)

def drop_highly_correlated_features(df, features, threshold=0.9):
    corr = df[features].corr().abs()
    upper = corr.where(np.triu(np.ones(corr.shape), k=1).astype(bool))
    to_drop = [column for column in upper.columns if any(upper[column] > threshold)]
    return [feature for feature in features if feature not in to_drop], to_drop

selected_features, dropped_features = drop_highly_correlated_features(historical_data, selected_features, threshold=0.9)
print("Selected features after dropping highly correlated ones:", selected_features)
print("Dropped features due to high inter-correlation:", dropped_features)

print("Final feature set for modeling:", common_features)

####Split Historical Data

In [None]:
features_and_target = common_features + [target]
historical_data = historical_data[features_and_target].dropna().reset_index(drop=True)

# split sizes 60% train, 20% validation, 20% test
n = len(historical_data)
train_end = int(0.6 * n)
val_end = int(0.8 * n)

# Split the data while preserving temporal order
train_data = historical_data.iloc[:train_end].reset_index(drop=True)
val_data = historical_data.iloc[train_end:val_end].reset_index(drop=True)
test_data = historical_data.iloc[val_end:].reset_index(drop=True)

print(f"Train: {len(train_data)} rows, Validation: {len(val_data)} rows, Test: {len(test_data)} rows")

print("Missing values in train_data:", train_data.isna().sum().sum())
print("Missing values in val_data:", val_data.isna().sum().sum())
print("Missing values in test_data:", test_data.isna().sum().sum())

####XGBoost

In [None]:
train_val = pd.concat([train_data, val_data]).iloc[24:].reset_index(drop=True)
X = train_val[common_features].copy()
y = train_val[target].copy()
X = X.ffill().bfill().fillna(0)

# Scale features
scaler_xgb = RobustScaler()
X_scaled = scaler_xgb.fit_transform(X)

def objective_xgb(trial):
    params = {
        'n_estimators': trial.suggest_int('n_estimators', 50, 200),
        'max_depth': trial.suggest_int('max_depth', 3, 6),
        'learning_rate': trial.suggest_float('learning_rate', 0.01, 0.1),
        'subsample': trial.suggest_float('subsample', 0.7, 0.9),
        'colsample_bytree': trial.suggest_float('colsample_bytree', 0.7, 0.9),
        'gamma': trial.suggest_float('gamma', 0.1, 1.0),
        'min_child_weight': trial.suggest_int('min_child_weight', 3, 10),
        'reg_alpha': trial.suggest_float('reg_alpha', 1, 10),
        'reg_lambda': trial.suggest_float('reg_lambda', 1, 10),
        'random_state': 42,
        'objective': 'reg:squarederror',
        'eval_metric': 'rmse'
    }
    tscv = TimeSeriesSplit(n_splits=5)
    scores = []
    for train_idx, val_idx in tscv.split(X_scaled):
        X_train, X_val = X_scaled[train_idx], X_scaled[val_idx]
        y_train, y_val = y.iloc[train_idx], y.iloc[val_idx]
        model = xgb.XGBRegressor(**params)
        model.fit(X_train, y_train, eval_set=[(X_val, y_val)], verbose=False)
        y_pred = model.predict(X_val)
        scores.append(mean_squared_error(y_val, y_pred))
    return np.mean(scores)

study_xgb = optuna.create_study(direction='minimize', sampler=TPESampler(seed=42))
study_xgb.optimize(objective_xgb, n_trials=20)
best_params_xgb = study_xgb.best_params
print("Best parameters for XGBoost:", best_params_xgb)

model_xgb = xgb.XGBRegressor(**best_params_xgb, random_state=42)
model_xgb.fit(X_scaled, y)

# Evaluate on test set
X_test = test_data[common_features].copy().ffill().bfill().fillna(0)
y_test = test_data[target]
X_test_scaled = scaler_xgb.transform(X_test)
y_pred_xgb = model_xgb.predict(X_test_scaled)
evaluate_model(y_test, y_pred_xgb, "XGBoost (Test)")

# Define test_dates_xgb
test_dates_xgb = dates.iloc[val_end:].reset_index(drop=True)

# Plot actual vs predicted prices
last_date_xgb = test_dates_xgb.max()
three_months_prior_xgb = last_date_xgb - pd.DateOffset(months=3)
start_idx_xgb = test_dates_xgb.searchsorted(three_months_prior_xgb, side='left')
test_dates_last_3m_xgb = test_dates_xgb.iloc[start_idx_xgb:]
y_test_last_3m_xgb = y_test.iloc[start_idx_xgb:]
y_pred_last_3m_xgb = y_pred_xgb[start_idx_xgb:]
plot_predictions(test_dates_last_3m_xgb, y_test_last_3m_xgb, y_pred_last_3m_xgb, "XGBoost: Actual vs Predicted Prices (Last 3 Months of Test)")

#Download the models as it was the best performing
joblib.dump(model_xgb, 'xgb_model.pkl')
joblib.dump(scaler_xgb, 'scaler_xgb.pkl')
print("XGBoost model and scaler saved successfully.")

####LSTM

In [None]:
X_lstm = historical_data[common_features].iloc[24:].copy()
y_lstm = historical_data.loc[X_lstm.index, target].copy()
X_lstm = X_lstm.ffill().bfill().fillna(0)  # Handle missing values

# Scale features with StandardScaler
scaler_X_lstm = StandardScaler()
X_lstm_scaled = scaler_X_lstm.fit_transform(X_lstm)

scaler_y_lstm = StandardScaler()
y_lstm_scaled = scaler_y_lstm.fit_transform(y_lstm.values.reshape(-1, 1)).flatten()

# Create sequences
time_steps = 24
def create_sequences(X, y, time_steps):
    X_seq, y_seq = [], []
    for i in range(len(X) - time_steps):
        X_seq.append(X[i:i + time_steps])
        y_seq.append(y[i + time_steps])
    return np.array(X_seq), np.array(y_seq)

X_seq, y_seq = create_sequences(X_lstm_scaled, y_lstm_scaled, time_steps)
mask = ~np.isnan(X_seq).any(axis=(1, 2)) & ~np.isnan(y_seq)
X_seq = X_seq[mask]
y_seq = y_seq[mask]

# Hyperparameter tuning
def objective_lstm(trial):
    lstm_units_1 = trial.suggest_int('lstm_units_1', 16, 64)
    lstm_units_2 = trial.suggest_int('lstm_units_2', 8, 32)
    dropout_rate = trial.suggest_float('dropout_rate', 0.2, 0.5)
    learning_rate = trial.suggest_float('learning_rate', 1e-4, 1e-2, log=True)

    # Build model
    inputs = Input(shape=(time_steps, len(common_features)))
    x = Bidirectional(LSTM(lstm_units_1, return_sequences=True, kernel_regularizer=l2(0.01)))(inputs)
    x = Dropout(dropout_rate)(x)
    x = Bidirectional(LSTM(lstm_units_2, return_sequences=False, kernel_regularizer=l2(0.01)))(x)
    x = Dropout(dropout_rate)(x)
    x = Dense(16, activation='relu', kernel_regularizer=l2(0.01))(x)
    outputs = Dense(1)(x)
    model = Model(inputs, outputs)
    model.compile(optimizer=Adam(learning_rate=learning_rate, clipnorm=1.0), loss='mean_squared_error')

    early_stop = EarlyStopping(monitor='val_loss', patience=30, restore_best_weights=True)
    reduce_lr = ReduceLROnPlateau(monitor='val_loss', factor=0.5, patience=15, min_lr=1e-6)

    # Train model
    history = model.fit(
        X_seq, y_seq,
        epochs=150,
        batch_size=32,
        validation_split=0.1,
        callbacks=[early_stop, reduce_lr],
        verbose=0
    )

    # Return validation loss
    return min(history.history['val_loss'])

study_lstm = optuna.create_study(direction='minimize', sampler=TPESampler(seed=42))
study_lstm.optimize(objective_lstm, n_trials=20)
best_params_lstm = study_lstm.best_params
print("Best parameters for LSTM:", best_params_lstm)

# Build final LSTM model
def build_final_lstm_model(time_steps, n_features, params):
    inputs = Input(shape=(time_steps, n_features))
    x = Bidirectional(LSTM(params['lstm_units_1'], return_sequences=True, kernel_regularizer=l2(0.01)))(inputs)
    x = Dropout(params['dropout_rate'])(x)
    x = Bidirectional(LSTM(params['lstm_units_2'], return_sequences=False, kernel_regularizer=l2(0.01)))(x)
    x = Dropout(params['dropout_rate'])(x)
    x = Dense(16, activation='relu', kernel_regularizer=l2(0.01))(x)
    outputs = Dense(1)(x)
    model = Model(inputs, outputs)
    model.compile(optimizer=Adam(learning_rate=params['learning_rate'], clipnorm=1.0), loss='mean_squared_error')
    return model

model_lstm = build_final_lstm_model(time_steps, len(common_features), best_params_lstm)

early_stop = EarlyStopping(monitor='val_loss', patience=30, restore_best_weights=True)
reduce_lr = ReduceLROnPlateau(monitor='val_loss', factor=0.5, patience=15, min_lr=1e-6)

history_lstm = model_lstm.fit(
    X_seq, y_seq,
    epochs=150,
    batch_size=32,
    validation_split=0.1,
    callbacks=[early_stop, reduce_lr],
    verbose=1
)

# Evaluate on test data
train_size = int(len(X_seq) * 0.8)
X_test_lstm = X_seq[train_size:]
y_test_lstm_scaled = y_seq[train_size:]
y_pred_lstm_scaled = model_lstm.predict(X_test_lstm).flatten()
y_pred_lstm = scaler_y_lstm.inverse_transform(y_pred_lstm_scaled.reshape(-1, 1)).flatten()
y_test_lstm = scaler_y_lstm.inverse_transform(y_test_lstm_scaled.reshape(-1, 1)).flatten()
evaluate_model(y_test_lstm, y_pred_lstm, "LSTM (Test)")

start_test_idx = 24 + time_steps + train_size
end_test_idx = 24 + time_steps + len(X_seq)
test_dates_lstm = dates.iloc[start_test_idx:end_test_idx].reset_index(drop=True)

# Plot test predictions
last_date_lstm = test_dates_lstm.max()
three_months_prior_lstm = last_date_lstm - pd.DateOffset(months=3)
start_idx_lstm = test_dates_lstm.searchsorted(three_months_prior_lstm, side='left')
test_dates_last_3m_lstm = test_dates_lstm.iloc[start_idx_lstm:]
y_test_last_3m_lstm = y_test_lstm[start_idx_lstm:]
y_pred_last_3m_lstm = y_pred_lstm[start_idx_lstm:]
plot_predictions(test_dates_last_3m_lstm, y_test_last_3m_lstm, y_pred_last_3m_lstm, "LSTM: Actual vs Predicted Prices (Last 3 Months of Test)")

####Hybrid Model (XGB + LSTM)

In [None]:
X_full = historical_data[common_features].copy()
X_full = X_full.ffill().bfill().fillna(0)
X_full_scaled = scaler_X_lstm.transform(X_full)
X_full_sequences = np.array([
    X_full_scaled[i - time_steps:i]
    for i in range(time_steps, len(X_full_scaled))
])

# Generate predictions and inverse transform to original scale
lstm_preds_scaled = model_lstm.predict(X_full_sequences, verbose=0).flatten()
lstm_preds = scaler_y_lstm.inverse_transform(lstm_preds_scaled.reshape(-1, 1)).flatten()

# Merge LSTM predictions back into historical data
df_lstm = historical_data.reset_index(drop=False)
df_lstm['global_idx'] = df_lstm.index
df_preds = pd.DataFrame({
    'global_idx': df_lstm.index[time_steps:],
    'LSTM_pred': lstm_preds
})

df_merged = pd.merge(df_lstm, df_preds, on='global_idx', how='left')
df_merged['LSTM_pred'] = df_merged['LSTM_pred'].ffill().fillna(0)
df_merged = df_merged.dropna(subset=[target])

# Create hybrid features
hybrid_features = common_features + ['LSTM_pred']
y_hybrid_log = np.log1p(df_merged[target].clip(lower=0.01))
X_hybrid_full = df_merged[hybrid_features].copy()

# Scale hybrid features
scaler_hybrid = RobustScaler()
X_hybrid_full_scaled = scaler_hybrid.fit_transform(X_hybrid_full)

# Set up TimeSeriesSplit and Optuna tuning
tscv_hybrid = TimeSeriesSplit(n_splits=5)

def objective_hybrid(trial):
    params = {
        'n_estimators': trial.suggest_int('n_estimators', 100, 500),
        'max_depth': trial.suggest_int('max_depth', 3, 12),
        'learning_rate': trial.suggest_float('learning_rate', 0.01, 0.3),
        'subsample': trial.suggest_float('subsample', 0.5, 1.0),
        'colsample_bytree': trial.suggest_float('colsample_bytree', 0.5, 1.0),
        'gamma': trial.suggest_float('gamma', 0, 0.5),
        'min_child_weight': trial.suggest_int('min_child_weight', 1, 7),
        'reg_alpha': trial.suggest_float('reg_alpha', 0, 20),
        'reg_lambda': trial.suggest_float('reg_lambda', 0, 20),
        'random_state': 42,
        'objective': 'reg:squarederror',
        'eval_metric': 'rmse'
    }
    scores = []
    for train_idx, val_idx in tscv_hybrid.split(X_hybrid_full_scaled):
        X_train, X_val = X_hybrid_full_scaled[train_idx], X_hybrid_full_scaled[val_idx]
        y_train, y_val = y_hybrid_log.iloc[train_idx], y_hybrid_log.iloc[val_idx]
        model = xgb.XGBRegressor(**params)
        model.fit(X_train, y_train, eval_set=[(X_val, y_val)], verbose=False)
        y_pred_log = model.predict(X_val)
        scores.append(mean_squared_error(y_val, y_pred_log))
    return np.mean(scores)

# Run Optuna optimization
study_hybrid = optuna.create_study(direction='minimize', sampler=TPESampler(seed=42))
study_hybrid.optimize(objective_hybrid, n_trials=10)
best_params_hybrid = study_hybrid.best_params
print("Best parameters for Hybrid Model:", best_params_hybrid)

model_hybrid = xgb.XGBRegressor(**best_params_hybrid, random_state=42)
model_hybrid.fit(X_hybrid_full_scaled, y_hybrid_log,
                 eval_set=[(X_hybrid_full_scaled, y_hybrid_log)],
                 early_stopping_rounds=30, verbose=False)

# Evaluate on test set
train_idx_hyb, test_idx_hyb = list(tscv_hybrid.split(X_hybrid_full_scaled))[-1]
X_train_hyb, X_test_hyb = X_hybrid_full_scaled[train_idx_hyb], X_hybrid_full_scaled[test_idx_hyb]
y_train_hyb_log, y_test_hyb_log = y_hybrid_log.iloc[train_idx_hyb], y_hybrid_log.iloc[test_idx_hyb]

y_pred_hyb_log = model_hybrid.predict(X_test_hyb)
y_test_hyb = np.expm1(y_test_hyb_log)
y_pred_hyb = np.expm1(y_pred_hyb_log)

evaluate_model(y_test_hyb, y_pred_hyb, "Hybrid (XGBoost + LSTM) (Historical)")
test_dates_hybrid = dates.iloc[test_idx_hyb].reset_index(drop=True)

# Plot actual vs predicted prices
last_date_hyb = test_dates_hybrid.max()
three_months_prior_hyb = last_date_hyb - pd.DateOffset(months=3)
start_idx_hyb = test_dates_hybrid.searchsorted(three_months_prior_hyb, side='left')
test_dates_last_3m_hyb = test_dates_hybrid.iloc[start_idx_hyb:]
y_test_last_3m_hyb = y_test_hyb[start_idx_hyb:]
y_pred_last_3m_hyb = y_pred_hyb[start_idx_hyb:]
plot_predictions(test_dates_last_3m_hyb, y_test_last_3m_hyb, y_pred_last_3m_hyb, "Hybrid (XGBoost + LSTM): Actual vs Predicted Prices (Last 3 Months of Test)")

####Future Forecasting

In [None]:
# Actual prices for testing
actual_data = [
    {"start date/time": "2025-02-10 00:00:00", "actual_price": 118.58},
    {"start date/time": "2025-02-10 01:00:00", "actual_price": 111.82},
    {"start date/time": "2025-02-10 02:00:00", "actual_price": 109.22},
    {"start date/time": "2025-02-10 03:00:00", "actual_price": 106.29},
    {"start date/time": "2025-02-10 04:00:00", "actual_price": 105.85},
    {"start date/time": "2025-02-10 05:00:00", "actual_price": 112.00},
    {"start date/time": "2025-02-10 06:00:00", "actual_price": 145.38},
    {"start date/time": "2025-02-10 07:00:00", "actual_price": 162.02},
    {"start date/time": "2025-02-10 08:00:00", "actual_price": 181.36},
    {"start date/time": "2025-02-10 09:00:00", "actual_price": 163.18},
    {"start date/time": "2025-02-10 10:00:00", "actual_price": 135.90},
    {"start date/time": "2025-02-10 11:00:00", "actual_price": 115.28},
    {"start date/time": "2025-02-10 12:00:00", "actual_price": 109.36},
    {"start date/time": "2025-02-10 13:00:00", "actual_price": 109.74},
    {"start date/time": "2025-02-10 14:00:00", "actual_price": 123.07},
    {"start date/time": "2025-02-10 15:00:00", "actual_price": 128.54},
    {"start date/time": "2025-02-10 16:00:00", "actual_price": 133.58},
    {"start date/time": "2025-02-10 17:00:00", "actual_price": 133.82},
    {"start date/time": "2025-02-10 18:00:00", "actual_price": 142.11},
    {"start date/time": "2025-02-10 19:00:00", "actual_price": 114.85},
    {"start date/time": "2025-02-10 20:00:00", "actual_price": 94.37},
    {"start date/time": "2025-02-10 21:00:00", "actual_price": 91.48},
    {"start date/time": "2025-02-10 22:00:00", "actual_price": 88.87},
    {"start date/time": "2025-02-10 23:00:00", "actual_price": 87.38},
    {"start date/time": "2025-02-11 00:00:00", "actual_price": 77.45},
    {"start date/time": "2025-02-11 01:00:00", "actual_price": 71.10},
    {"start date/time": "2025-02-11 02:00:00", "actual_price": 71.62},
    {"start date/time": "2025-02-11 03:00:00", "actual_price": 74.89},
    {"start date/time": "2025-02-11 04:00:00", "actual_price": 77.45},
    {"start date/time": "2025-02-11 05:00:00", "actual_price": 87.44},
    {"start date/time": "2025-02-11 06:00:00", "actual_price": 100.46},
    {"start date/time": "2025-02-11 07:00:00", "actual_price": 134.93},
    {"start date/time": "2025-02-11 08:00:00", "actual_price": 155.53},
    {"start date/time": "2025-02-11 09:00:00", "actual_price": 141.42},
    {"start date/time": "2025-02-11 10:00:00", "actual_price": 96.65},
    {"start date/time": "2025-02-11 11:00:00", "actual_price": 92.78},
    {"start date/time": "2025-02-11 12:00:00", "actual_price": 97.68},
    {"start date/time": "2025-02-11 13:00:00", "actual_price": 111.11},
    {"start date/time": "2025-02-11 14:00:00", "actual_price": 123.46},
    {"start date/time": "2025-02-11 15:00:00", "actual_price": 143.69},
    {"start date/time": "2025-02-11 16:00:00", "actual_price": 144.47},
    {"start date/time": "2025-02-11 17:00:00", "actual_price": 159.97},
    {"start date/time": "2025-02-11 18:00:00", "actual_price": 168.19},
    {"start date/time": "2025-02-11 19:00:00", "actual_price": 167.45},
    {"start date/time": "2025-02-11 20:00:00", "actual_price": 162.63},
    {"start date/time": "2025-02-11 21:00:00", "actual_price": 148.75},
    {"start date/time": "2025-02-11 22:00:00", "actual_price": 142.95},
    {"start date/time": "2025-02-11 23:00:00", "actual_price": 128.24},
    {"start date/time": "2025-02-12 00:00:00", "actual_price": 129.13},
    {"start date/time": "2025-02-12 01:00:00", "actual_price": 127.77},
    {"start date/time": "2025-02-12 02:00:00", "actual_price": 126.82},
    {"start date/time": "2025-02-12 03:00:00", "actual_price": 126.59},
    {"start date/time": "2025-02-12 04:00:00", "actual_price": 128.74},
    {"start date/time": "2025-02-12 05:00:00", "actual_price": 131.24},
    {"start date/time": "2025-02-12 06:00:00", "actual_price": 151.13},
    {"start date/time": "2025-02-12 07:00:00", "actual_price": 163.64},
    {"start date/time": "2025-02-12 08:00:00", "actual_price": 180.00},
    {"start date/time": "2025-02-12 09:00:00", "actual_price": 174.92},
    {"start date/time": "2025-02-12 10:00:00", "actual_price": 162.05},
    {"start date/time": "2025-02-12 11:00:00", "actual_price": 144.01},
    {"start date/time": "2025-02-12 12:00:00", "actual_price": 137.28},
    {"start date/time": "2025-02-12 13:00:00", "actual_price": 142.03},
    {"start date/time": "2025-02-12 14:00:00", "actual_price": 149.40},
    {"start date/time": "2025-02-12 15:00:00", "actual_price": 159.67},
    {"start date/time": "2025-02-12 16:00:00", "actual_price": 173.21},
    {"start date/time": "2025-02-12 17:00:00", "actual_price": 184.59},
    {"start date/time": "2025-02-12 18:00:00", "actual_price": 197.13},
    {"start date/time": "2025-02-12 19:00:00", "actual_price": 192.57},
    {"start date/time": "2025-02-12 20:00:00", "actual_price": 173.90},
    {"start date/time": "2025-02-12 21:00:00", "actual_price": 155.78},
    {"start date/time": "2025-02-12 22:00:00", "actual_price": 150.38},
    {"start date/time": "2025-02-12 23:00:00", "actual_price": 141.81},
    {"start date/time": "2025-02-13 00:00:00", "actual_price": 140.33},
    {"start date/time": "2025-02-13 01:00:00", "actual_price": 137.75},
    {"start date/time": "2025-02-13 02:00:00", "actual_price": 140.79},
    {"start date/time": "2025-02-13 03:00:00", "actual_price": 136.59},
    {"start date/time": "2025-02-13 04:00:00", "actual_price": 140.00},
    {"start date/time": "2025-02-13 05:00:00", "actual_price": 142.00},
    {"start date/time": "2025-02-13 06:00:00", "actual_price": 159.99},
    {"start date/time": "2025-02-13 07:00:00", "actual_price": 190.33},
    {"start date/time": "2025-02-13 08:00:00", "actual_price": 222.36},
    {"start date/time": "2025-02-13 09:00:00", "actual_price": 220.05},
    {"start date/time": "2025-02-13 10:00:00", "actual_price": 203.89},
    {"start date/time": "2025-02-13 11:00:00", "actual_price": 184.68},
    {"start date/time": "2025-02-13 12:00:00", "actual_price": 174.52},
    {"start date/time": "2025-02-13 13:00:00", "actual_price": 170.10},
    {"start date/time": "2025-02-13 14:00:00", "actual_price": 166.70},
    {"start date/time": "2025-02-13 15:00:00", "actual_price": 170.10},
    {"start date/time": "2025-02-13 16:00:00", "actual_price": 182.00},
    {"start date/time": "2025-02-13 17:00:00", "actual_price": 213.05},
    {"start date/time": "2025-02-13 18:00:00", "actual_price": 207.17},
    {"start date/time": "2025-02-13 19:00:00", "actual_price": 196.52},
    {"start date/time": "2025-02-13 20:00:00", "actual_price": 177.03},
    {"start date/time": "2025-02-13 21:00:00", "actual_price": 162.92},
    {"start date/time": "2025-02-13 22:00:00", "actual_price": 153.12},
    {"start date/time": "2025-02-13 23:00:00", "actual_price": 138.97},
    {"start date/time": "2025-02-14 00:00:00", "actual_price": 139.21},
    {"start date/time": "2025-02-14 01:00:00", "actual_price": 132.80},
    {"start date/time": "2025-02-14 02:00:00", "actual_price": 128.50},
    {"start date/time": "2025-02-14 03:00:00", "actual_price": 127.65},
    {"start date/time": "2025-02-14 04:00:00", "actual_price": 129.58},
    {"start date/time": "2025-02-14 05:00:00", "actual_price": 139.59},
    {"start date/time": "2025-02-14 06:00:00", "actual_price": 162.89},
    {"start date/time": "2025-02-14 07:00:00", "actual_price": 227.08},
    {"start date/time": "2025-02-14 08:00:00", "actual_price": 265.64},
    {"start date/time": "2025-02-14 09:00:00", "actual_price": 215.11},
    {"start date/time": "2025-02-14 10:00:00", "actual_price": 181.03},
    {"start date/time": "2025-02-14 11:00:00", "actual_price": 163.01},
    {"start date/time": "2025-02-14 12:00:00", "actual_price": 149.86},
    {"start date/time": "2025-02-14 13:00:00", "actual_price": 148.61},
    {"start date/time": "2025-02-14 14:00:00", "actual_price": 155.80},
    {"start date/time": "2025-02-14 15:00:00", "actual_price": 175.06},
    {"start date/time": "2025-02-14 16:00:00", "actual_price": 200.00},
    {"start date/time": "2025-02-14 17:00:00", "actual_price": 298.91},
    {"start date/time": "2025-02-14 18:00:00", "actual_price": 293.06},
    {"start date/time": "2025-02-14 19:00:00", "actual_price": 225.70},
    {"start date/time": "2025-02-14 20:00:00", "actual_price": 183.55},
    {"start date/time": "2025-02-14 21:00:00", "actual_price": 163.79},
    {"start date/time": "2025-02-14 22:00:00", "actual_price": 153.00},
    {"start date/time": "2025-02-14 23:00:00", "actual_price": 140.03},
    {"start date/time": "2025-02-15 00:00:00", "actual_price": 135.29},
    {"start date/time": "2025-02-15 01:00:00", "actual_price": 130.34},
    {"start date/time": "2025-02-15 02:00:00", "actual_price": 120.00},
    {"start date/time": "2025-02-15 03:00:00", "actual_price": 118.24},
    {"start date/time": "2025-02-15 04:00:00", "actual_price": 118.45},
    {"start date/time": "2025-02-15 05:00:00", "actual_price": 120.37},
    {"start date/time": "2025-02-15 06:00:00", "actual_price": 133.74},
    {"start date/time": "2025-02-15 07:00:00", "actual_price": 148.58},
    {"start date/time": "2025-02-15 08:00:00", "actual_price": 150.67},
    {"start date/time": "2025-02-15 09:00:00", "actual_price": 137.01},
    {"start date/time": "2025-02-15 10:00:00", "actual_price": 124.59},
    {"start date/time": "2025-02-15 11:00:00", "actual_price": 118.36},
    {"start date/time": "2025-02-15 12:00:00", "actual_price": 112.39},
    {"start date/time": "2025-02-15 13:00:00", "actual_price": 110.01},
    {"start date/time": "2025-02-15 14:00:00", "actual_price": 116.15},
    {"start date/time": "2025-02-15 15:00:00", "actual_price": 128.83},
    {"start date/time": "2025-02-15 16:00:00", "actual_price": 145.89},
    {"start date/time": "2025-02-15 17:00:00", "actual_price": 164.54},
    {"start date/time": "2025-02-15 18:00:00", "actual_price": 174.70},
    {"start date/time": "2025-02-15 19:00:00", "actual_price": 164.76},
    {"start date/time": "2025-02-15 20:00:00", "actual_price": 149.65},
    {"start date/time": "2025-02-15 21:00:00", "actual_price": 128.31},
    {"start date/time": "2025-02-15 22:00:00", "actual_price": 132.37},
    {"start date/time": "2025-02-15 23:00:00", "actual_price": 124.54},
    {"start date/time": "2025-02-16 00:00:00", "actual_price": 134.11},
    {"start date/time": "2025-02-16 01:00:00", "actual_price": 128.90},
    {"start date/time": "2025-02-16 02:00:00", "actual_price": 123.57},
    {"start date/time": "2025-02-16 03:00:00", "actual_price": 119.90},
    {"start date/time": "2025-02-16 04:00:00", "actual_price": 119.57},
    {"start date/time": "2025-02-16 05:00:00", "actual_price": 120.18},
    {"start date/time": "2025-02-16 06:00:00", "actual_price": 119.92},
    {"start date/time": "2025-02-16 07:00:00", "actual_price": 127.51},
    {"start date/time": "2025-02-16 08:00:00", "actual_price": 129.60},
    {"start date/time": "2025-02-16 09:00:00", "actual_price": 130.49},
    {"start date/time": "2025-02-16 10:00:00", "actual_price": 127.70},
    {"start date/time": "2025-02-16 11:00:00", "actual_price": 124.03},
    {"start date/time": "2025-02-16 12:00:00", "actual_price": 124.67},
    {"start date/time": "2025-02-16 13:00:00", "actual_price": 118.77},
    {"start date/time": "2025-02-16 14:00:00", "actual_price": 121.02},
    {"start date/time": "2025-02-16 15:00:00", "actual_price": 129.55},
    {"start date/time": "2025-02-16 16:00:00", "actual_price": 137.68},
    {"start date/time": "2025-02-16 17:00:00", "actual_price": 153.98},
    {"start date/time": "2025-02-16 18:00:00", "actual_price": 169.98},
    {"start date/time": "2025-02-16 19:00:00", "actual_price": 160.29},
    {"start date/time": "2025-02-16 20:00:00", "actual_price": 153.72},
    {"start date/time": "2025-02-16 21:00:00", "actual_price": 139.34},
    {"start date/time": "2025-02-16 22:00:00", "actual_price": 137.62},
    {"start date/time": "2025-02-16 23:00:00", "actual_price": 131.51},
    {"start date/time": "2025-02-17 00:00:00", "actual_price": 128.05},
    {"start date/time": "2025-02-17 01:00:00", "actual_price": 121.06},
    {"start date/time": "2025-02-17 02:00:00", "actual_price": 120.34},
    {"start date/time": "2025-02-17 03:00:00", "actual_price": 125.52},
    {"start date/time": "2025-02-17 04:00:00", "actual_price": 128.94},
    {"start date/time": "2025-02-17 05:00:00", "actual_price": 131.54},
    {"start date/time": "2025-02-17 06:00:00", "actual_price": 153.16},
    {"start date/time": "2025-02-17 07:00:00", "actual_price": 204.74},
    {"start date/time": "2025-02-17 08:00:00", "actual_price": 209.79},
    {"start date/time": "2025-02-17 09:00:00", "actual_price": 171.30},
    {"start date/time": "2025-02-17 10:00:00", "actual_price": 139.67},
    {"start date/time": "2025-02-17 11:00:00", "actual_price": 117.19},
    {"start date/time": "2025-02-17 12:00:00", "actual_price": 109.47},
    {"start date/time": "2025-02-17 13:00:00", "actual_price": 106.78},
    {"start date/time": "2025-02-17 14:00:00", "actual_price": 111.89},
    {"start date/time": "2025-02-17 15:00:00", "actual_price": 132.27},
    {"start date/time": "2025-02-17 16:00:00", "actual_price": 158.96},
    {"start date/time": "2025-02-17 17:00:00", "actual_price": 222.92},
    {"start date/time": "2025-02-17 18:00:00", "actual_price": 271.00},
    {"start date/time": "2025-02-17 19:00:00", "actual_price": 215.36},
    {"start date/time": "2025-02-17 20:00:00", "actual_price": 177.86},
    {"start date/time": "2025-02-17 21:00:00", "actual_price": 155.19},
    {"start date/time": "2025-02-17 22:00:00", "actual_price": 139.39},
    {"start date/time": "2025-02-17 23:00:00", "actual_price": 129.57},
    {"start date/time": "2025-02-18 00:00:00", "actual_price": 132.95},
    {"start date/time": "2025-02-18 01:00:00", "actual_price": 125.00},
    {"start date/time": "2025-02-18 02:00:00", "actual_price": 123.56},
    {"start date/time": "2025-02-18 03:00:00", "actual_price": 117.11},
    {"start date/time": "2025-02-18 04:00:00", "actual_price": 115.66},
    {"start date/time": "2025-02-18 05:00:00", "actual_price": 121.93},
    {"start date/time": "2025-02-18 06:00:00", "actual_price": 143.61},
    {"start date/time": "2025-02-18 07:00:00", "actual_price": 190.34},
    {"start date/time": "2025-02-18 08:00:00", "actual_price": 193.80},
    {"start date/time": "2025-02-18 09:00:00", "actual_price": 143.53},
    {"start date/time": "2025-02-18 10:00:00", "actual_price": 124.24},
    {"start date/time": "2025-02-18 11:00:00", "actual_price": 109.71},
    {"start date/time": "2025-02-18 12:00:00", "actual_price": 96.41},
    {"start date/time": "2025-02-18 13:00:00", "actual_price": 88.53},
    {"start date/time": "2025-02-18 14:00:00", "actual_price": 98.35},
    {"start date/time": "2025-02-18 15:00:00", "actual_price": 110.00},
    {"start date/time": "2025-02-18 16:00:00", "actual_price": 128.16},
    {"start date/time": "2025-02-18 17:00:00", "actual_price": 177.61},
    {"start date/time": "2025-02-18 18:00:00", "actual_price": 174.32},
    {"start date/time": "2025-02-18 19:00:00", "actual_price": 155.91},
    {"start date/time": "2025-02-18 20:00:00", "actual_price": 142.42},
    {"start date/time": "2025-02-18 21:00:00", "actual_price": 129.41},
    {"start date/time": "2025-02-18 22:00:00", "actual_price": 121.90},
    {"start date/time": "2025-02-18 23:00:00", "actual_price": 114.45},
    {"start date/time": "2025-02-19 00:00:00", "actual_price": 104.54},
    {"start date/time": "2025-02-19 01:00:00", "actual_price": 99.29},
    {"start date/time": "2025-02-19 02:00:00", "actual_price": 99.60},
    {"start date/time": "2025-02-19 03:00:00", "actual_price": 97.29},
    {"start date/time": "2025-02-19 04:00:00", "actual_price": 94.50},
    {"start date/time": "2025-02-19 05:00:00", "actual_price": 97.48},
    {"start date/time": "2025-02-19 06:00:00", "actual_price": 115.00},
    {"start date/time": "2025-02-19 07:00:00", "actual_price": 149.50},
    {"start date/time": "2025-02-19 08:00:00", "actual_price": 166.37},
    {"start date/time": "2025-02-19 09:00:00", "actual_price": 128.76},
    {"start date/time": "2025-02-19 10:00:00", "actual_price": 107.91},
    {"start date/time": "2025-02-19 11:00:00", "actual_price": 89.28},
    {"start date/time": "2025-02-19 12:00:00", "actual_price": 81.52},
    {"start date/time": "2025-02-19 13:00:00", "actual_price": 82.74},
    {"start date/time": "2025-02-19 14:00:00", "actual_price": 85.20},
    {"start date/time": "2025-02-19 15:00:00", "actual_price": 90.63},
    {"start date/time": "2025-02-19 16:00:00", "actual_price": 110.03},
    {"start date/time": "2025-02-19 17:00:00", "actual_price": 131.28},
    {"start date/time": "2025-02-19 18:00:00", "actual_price": 142.98},
    {"start date/time": "2025-02-19 19:00:00", "actual_price": 144.42},
    {"start date/time": "2025-02-19 20:00:00", "actual_price": 120.90},
    {"start date/time": "2025-02-19 21:00:00", "actual_price": 113.75},
    {"start date/time": "2025-02-19 22:00:00", "actual_price": 107.13},
    {"start date/time": "2025-02-19 23:00:00", "actual_price": 99.81},
    {"start date/time": "2025-02-20 00:00:00", "actual_price": 86.17},
    {"start date/time": "2025-02-20 01:00:00", "actual_price": 85.20},
    {"start date/time": "2025-02-20 02:00:00", "actual_price": 84.56},
    {"start date/time": "2025-02-20 03:00:00", "actual_price": 83.64},
    {"start date/time": "2025-02-20 04:00:00", "actual_price": 82.18},
    {"start date/time": "2025-02-20 05:00:00", "actual_price": 83.84},
    {"start date/time": "2025-02-20 06:00:00", "actual_price": 86.31},
    {"start date/time": "2025-02-20 07:00:00", "actual_price": 110.41},
    {"start date/time": "2025-02-20 08:00:00", "actual_price": 127.98},
    {"start date/time": "2025-02-20 09:00:00", "actual_price": 105.87},
    {"start date/time": "2025-02-20 10:00:00", "actual_price": 93.72},
    {"start date/time": "2025-02-20 11:00:00", "actual_price": 87.23},
    {"start date/time": "2025-02-20 12:00:00", "actual_price": 86.87},
    {"start date/time": "2025-02-20 13:00:00", "actual_price": 84.51},
    {"start date/time": "2025-02-20 14:00:00", "actual_price": 84.78},
    {"start date/time": "2025-02-20 15:00:00", "actual_price": 90.05},
    {"start date/time": "2025-02-20 16:00:00", "actual_price": 98.34},
    {"start date/time": "2025-02-20 17:00:00", "actual_price": 110.13},
    {"start date/time": "2025-02-20 18:00:00", "actual_price": 112.38},
    {"start date/time": "2025-02-20 19:00:00", "actual_price": 122.45},
    {"start date/time": "2025-02-20 20:00:00", "actual_price": 105.91},
    {"start date/time": "2025-02-20 21:00:00", "actual_price": 92.41},
    {"start date/time": "2025-02-20 22:00:00", "actual_price": 90.84},
    {"start date/time": "2025-02-20 23:00:00", "actual_price": 83.00},
    {"start date/time": "2025-02-21 00:00:00", "actual_price": 75.51},
    {"start date/time": "2025-02-21 01:00:00", "actual_price": 74.99},
    {"start date/time": "2025-02-21 02:00:00", "actual_price": 79.80},
    {"start date/time": "2025-02-21 03:00:00", "actual_price": 83.40},
    {"start date/time": "2025-02-21 04:00:00", "actual_price": 82.40},
    {"start date/time": "2025-02-21 05:00:00", "actual_price": 82.03},
    {"start date/time": "2025-02-21 06:00:00", "actual_price": 106.47},
    {"start date/time": "2025-02-21 07:00:00", "actual_price": 152.50},
    {"start date/time": "2025-02-21 08:00:00", "actual_price": 145.21},
    {"start date/time": "2025-02-21 09:00:00", "actual_price": 120.98},
    {"start date/time": "2025-02-21 10:00:00", "actual_price": 98.68},
    {"start date/time": "2025-02-21 11:00:00", "actual_price": 83.40},
    {"start date/time": "2025-02-21 12:00:00", "actual_price": 78.00},
    {"start date/time": "2025-02-21 13:00:00", "actual_price": 76.41},
    {"start date/time": "2025-02-21 14:00:00", "actual_price": 79.52},
    {"start date/time": "2025-02-21 15:00:00", "actual_price": 82.94},
    {"start date/time": "2025-02-21 16:00:00", "actual_price": 103.47},
    {"start date/time": "2025-02-21 17:00:00", "actual_price": 103.45},
    {"start date/time": "2025-02-21 18:00:00", "actual_price": 101.98},
    {"start date/time": "2025-02-21 19:00:00", "actual_price": 81.95},
    {"start date/time": "2025-02-21 20:00:00", "actual_price": 80.25},
    {"start date/time": "2025-02-21 21:00:00", "actual_price": 70.85},
    {"start date/time": "2025-02-21 22:00:00", "actual_price": 70.64},
    {"start date/time": "2025-02-21 23:00:00", "actual_price": 64.33},
    {"start date/time": "2025-02-22 00:00:00", "actual_price": 50.99},
    {"start date/time": "2025-02-22 01:00:00", "actual_price": 48.62},
    {"start date/time": "2025-02-22 02:00:00", "actual_price": 47.96},
    {"start date/time": "2025-02-22 03:00:00", "actual_price": 42.62},
    {"start date/time": "2025-02-22 04:00:00", "actual_price": 41.20},
    {"start date/time": "2025-02-22 05:00:00", "actual_price": 49.15},
    {"start date/time": "2025-02-22 06:00:00", "actual_price": 49.11},
    {"start date/time": "2025-02-22 07:00:00", "actual_price": 61.22},
    {"start date/time": "2025-02-22 08:00:00", "actual_price": 68.25},
    {"start date/time": "2025-02-22 09:00:00", "actual_price": 67.74},
    {"start date/time": "2025-02-22 10:00:00", "actual_price": 53.21},
    {"start date/time": "2025-02-22 11:00:00", "actual_price": 47.13},
    {"start date/time": "2025-02-22 12:00:00", "actual_price": 40.40},
    {"start date/time": "2025-02-22 13:00:00", "actual_price": 52.69},
    {"start date/time": "2025-02-22 14:00:00", "actual_price": 68.00},
    {"start date/time": "2025-02-22 15:00:00", "actual_price": 85.60},
    {"start date/time": "2025-02-22 16:00:00", "actual_price": 100.95},
    {"start date/time": "2025-02-22 17:00:00", "actual_price": 108.38},
    {"start date/time": "2025-02-22 18:00:00", "actual_price": 106.98},
    {"start date/time": "2025-02-22 19:00:00", "actual_price": 115.18},
    {"start date/time": "2025-02-22 20:00:00", "actual_price": 103.00},
    {"start date/time": "2025-02-22 21:00:00", "actual_price": 101.58},
    {"start date/time": "2025-02-22 22:00:00", "actual_price": 94.27},
    {"start date/time": "2025-02-22 23:00:00", "actual_price": 83.51},
    {"start date/time": "2025-02-23 00:00:00", "actual_price": 91.10},
    {"start date/time": "2025-02-23 01:00:00", "actual_price": 84.99},
    {"start date/time": "2025-02-23 02:00:00", "actual_price": 86.21},
    {"start date/time": "2025-02-23 03:00:00", "actual_price": 83.63},
    {"start date/time": "2025-02-23 04:00:00", "actual_price": 85.77},
    {"start date/time": "2025-02-23 05:00:00", "actual_price": 87.33},
    {"start date/time": "2025-02-23 06:00:00", "actual_price": 87.70},
    {"start date/time": "2025-02-23 07:00:00", "actual_price": 108.00},
    {"start date/time": "2025-02-23 08:00:00", "actual_price": 112.24},
    {"start date/time": "2025-02-23 09:00:00", "actual_price": 99.35},
    {"start date/time": "2025-02-23 10:00:00", "actual_price": 83.00},
    {"start date/time": "2025-02-23 11:00:00", "actual_price": 81.50},
    {"start date/time": "2025-02-23 12:00:00", "actual_price": 71.12},
    {"start date/time": "2025-02-23 13:00:00", "actual_price": 69.20},
    {"start date/time": "2025-02-23 14:00:00", "actual_price": 64.75},
    {"start date/time": "2025-02-23 15:00:00", "actual_price": 69.11},
    {"start date/time": "2025-02-23 16:00:00", "actual_price": 84.70},
    {"start date/time": "2025-02-23 17:00:00", "actual_price": 150.45},
    {"start date/time": "2025-02-23 18:00:00", "actual_price": 154.69},
    {"start date/time": "2025-02-23 19:00:00", "actual_price": 117.02},
    {"start date/time": "2025-02-23 20:00:00", "actual_price": 96.03},
    {"start date/time": "2025-02-23 21:00:00", "actual_price": 77.32},
    {"start date/time": "2025-02-23 22:00:00", "actual_price": 73.81},
    {"start date/time": "2025-02-23 23:00:00", "actual_price": 65.99}
]

actual_df = pd.DataFrame(actual_data)
actual_df['StartDateTime'] = pd.to_datetime(actual_df['start date/time'])

# XGBoost Forecasting
last_hist_xgb = historical_data.tail(48)
price_history_xgb = list(last_hist_xgb['Price'])
predictions_xgb = []

for idx, row in future_data.iterrows():
    feature_vector = []
    for feature in common_features:
        if feature in ['Lag_Price_1h', 'Lag_Price_24h']:
            lag = int(feature.split('_')[-1].replace('h', ''))
            value = predictions_xgb[-lag] if len(predictions_xgb) >= lag else price_history_xgb[-lag]
            feature_vector.append(value)
        elif 'Rolling' in feature or 'Std' in feature:
            feature_vector.append(row[feature] if pd.notna(row[feature]) else last_hist_xgb[feature].mean())
        else:
            feature_vector.append(row[feature])
    feature_vector_scaled = scaler_xgb.transform([feature_vector])
    pred_price = model_xgb.predict(feature_vector_scaled)[0]
    predictions_xgb.append(pred_price)
    price_history_xgb.append(pred_price)

future_data['Predicted Price [Euro/MWh] XGBoost'] = predictions_xgb

# LSTM Forecasting
last_hist_lstm = historical_data.tail(time_steps)
last_hist_lstm[common_features] = last_hist_lstm[common_features].ffill().bfill()
initial_sequence = scaler_X_lstm.transform(last_hist_lstm[common_features].values)
price_history_lstm = list(historical_data['Price'])
predictions_lstm = []

for idx, row in future_data.iterrows():
    features_for_pred = []
    for feature in common_features:
        if 'Lag_Price' in feature:
            lag = int(feature.split('_')[-1].replace('h', ''))
            value = (predictions_lstm[-lag] if len(predictions_lstm) >= lag and not np.isnan(predictions_lstm[-lag])
                     else price_history_lstm[-lag])
            features_for_pred.append(value)
        elif 'Rolling' in feature or 'Std' in feature:
            features_for_pred.append(row[feature] if pd.notna(row[feature]) else last_hist_lstm[feature].mean())
        else:
            features_for_pred.append(row[feature] if pd.notna(row[feature]) else historical_data[feature].mean())

    features_for_pred = pd.Series(features_for_pred, index=common_features).fillna(historical_data[common_features].mean()).values
    features_scaled = scaler_X_lstm.transform([features_for_pred])
    initial_sequence = np.roll(initial_sequence, -1, axis=0)
    initial_sequence[-1] = features_scaled[0]
    input_seq = initial_sequence.reshape(1, time_steps, len(common_features))
    pred_scaled = model_lstm.predict(input_seq, verbose=0).flatten()[0]

    if np.isnan(pred_scaled):
        pred_scaled = scaler_y_lstm.transform([[price_history_lstm[-1]]])[0][0]

    pred_price = scaler_y_lstm.inverse_transform([[pred_scaled]])[0][0]
    predictions_lstm.append(pred_price)
    price_history_lstm.append(pred_price)

future_data['Predicted Price [Euro/MWh] LSTM'] = predictions_lstm

# Hybrid Forecasting (LSTM + XGBoost)
last_hist_hybrid = historical_data.tail(24)
price_history_hybrid = list(last_hist_hybrid['Price'])
predictions_hybrid = []
hybrid_features = common_features + ['LSTM_pred']

for idx, row in future_data.iterrows():
    new_row = pd.Series(index=hybrid_features, dtype=float)
    for feature in common_features:
        if feature in ['total_consumption', 'temperature', 'wind_speed', 'relative_humidity', 'Hour_sin', 'Hour_cos']:
            new_row[feature] = row[feature]
        elif feature == 'Lag_Price_1h':
            new_row[feature] = predictions_hybrid[-1] if predictions_hybrid else price_history_hybrid[-1]
        elif feature == 'Lag_Price_24h':
            new_row[feature] = predictions_hybrid[-24] if len(predictions_hybrid) >= 24 else price_history_hybrid[-24]
        elif 'Rolling' in feature or 'Std' in feature:
            prices = price_history_hybrid + predictions_hybrid
            if feature == 'Price_RollingStd24':
                new_row[feature] = np.std(prices[-24:]) if len(prices) >= 24 else 0
            else:
                new_row[feature] = row[feature] if pd.notna(row[feature]) else last_hist_hybrid[feature].mean()
    new_row['LSTM_pred'] = predictions_lstm[idx]
    new_row = new_row.fillna(0)
    new_row_scaled = scaler_hybrid.transform(new_row.to_frame().T)
    pred_price_log = model_hybrid.predict(new_row_scaled)[0]
    pred_price = np.expm1(pred_price_log)
    predictions_hybrid.append(pred_price)
    price_history_hybrid.append(pred_price)

future_data['Predicted Price [Euro/MWh] Hybrid'] = predictions_hybrid

# Model Evaluation
evaluation_results = {}
for model_name, pred_col in [('XGBoost', 'Predicted Price [Euro/MWh] XGBoost'),
                             ('LSTM', 'Predicted Price [Euro/MWh] LSTM'),
                             ('Hybrid', 'Predicted Price [Euro/MWh] Hybrid')]:
    merged_df = pd.merge(
        future_data[['StartDateTime', pred_col]],
        actual_df[['StartDateTime', 'actual_price']],
        on='StartDateTime',
        how='inner'
    )
    if not merged_df.empty:
        print(f"\nEvaluating {model_name} on period: {merged_df['StartDateTime'].min()} to {merged_df['StartDateTime'].max()}")
        plot_predictions(
            merged_df['StartDateTime'],
            merged_df['actual_price'],
            merged_df[pred_col],
            f"{model_name}: Predicted vs Actual Prices (2025-02-10 to 2025-02-16)"
        )
        metrics = evaluate_model(merged_df['actual_price'], merged_df[pred_col], f"{model_name} (Future)", return_metrics=True)
        evaluation_results[model_name] = metrics
    else:
        print(f"No overlapping data found for {model_name} evaluation.")

# Compare model performance
if evaluation_results:
    print("\nModel Performance Comparison:")
    for model_name, (rmse, mae, r2, mape, smape) in evaluation_results.items():
        print(f"{model_name}: RMSE={rmse:.2f}, MAE={mae:.2f}, R²={r2:.3f}, MAPE={mape:.2f}%, SMAPE={smape:.2f}%")

####End