In [2]:
import pandas as pd
import numpy as np
from datetime import datetime, timedelta, timezone # Added timezone
import json
import pickle
import os
import sys
from sklearn.metrics import mean_absolute_error, mean_squared_error
from sklearn.preprocessing import MinMaxScaler
from supabase import create_client, Client
from lightgbm import LGBMRegressor
from sklearn.multioutput import MultiOutputRegressor
import plotly.express as px
import traceback

np.set_printoptions(suppress=True)
try:
    import tensorflow as tf
    from keras.models import Sequential, save_model, load_model
    from keras.layers import Dense, LSTM, Input, Dropout
    from keras.callbacks import EarlyStopping
    KERAS_AVAILABLE = True
except ImportError:
    print("TensorFlow/Keras not found. Keras models cannot be trained/saved natively.")
    KERAS_AVAILABLE = False
    # Define dummy classes if needed for type checking, though not strictly necessary here
    Sequential, save_model, load_model = object, lambda x, y: None, lambda x: None
    Dense, LSTM, Input, Dropout, EarlyStopping = object, object, object, object, object


try:
    from padasip.filters import FilterRLS
    PADASIP_AVAILABLE = True
except ImportError:
    print("padasip not found. RLS filters cannot be trained/used.")
    PADASIP_AVAILABLE = False
    FilterRLS = object


# --- Configuration & Constants --- (Same as before)
SUPABASE_URL = os.environ.get("SUPABASE_URL")
SUPABASE_KEY = os.environ.get("SUPABASE_SECRET_KEY")
if not SUPABASE_URL or not SUPABASE_KEY or "YOUR_SUPABASE_URL" in SUPABASE_URL:
     print("Error: SUPABASE_URL and SUPABASE_KEY environment variables must be set.")
     # sys.exit(1)

supabase: Client = create_client(SUPABASE_URL, SUPABASE_KEY)
DATA_SCHEMA = "data"
ML_SCHEMA = "ml"
STORAGE_BUCKET = "models"
FEEDER_ID_TO_TRAIN = 1
SCENARIO = "Day" # Example
MODEL_VERSION = "v1.1_Is_Weekend"  # Updated version
TRAIN_START_DATE = "2024-01-01 00:00:00+00"
TRAIN_END_DATE = "2024-05-31 23:59:59+00"
VALIDATION_START_DATE = "2024-06-01 00:00:00+00"
VALIDATION_END_DATE = "2024-06-30 23:59:59+00"
DAY_HOURS = list(range(6, 20 + 1))
NIGHT_HOURS = list(range(0, 6)) + list(range(21, 24))
script_dir = os.path.dirname(os.path.abspath(__file__)) if '__file__' in locals() else os.getcwd()
TEMP_DIR = os.path.join(script_dir, "tmp")


# --- Database Interaction Functions --- (fetch_data, log_model_metadata - unchanged)
def fetch_data(feeder_id, start_date, end_date):
    """Fetches combined feeder and weather data from Supabase."""
    print(f"Fetching data for Feeder {feeder_id} from {start_date} to {end_date}...")
    end_date_dt = pd.to_datetime(end_date) + timedelta(days=1) # Include the end date in the range
    end_date_str = end_date_dt.strftime("%Y-%m-%d %H:%M:%S%z")
    try:
        supabase.postgrest.schema(DATA_SCHEMA)
        response = (supabase.table(f"Feeder_Weather_Combined_Data").select("*").eq("Feeder_ID", feeder_id).gte("Timestamp", start_date).lt("Timestamp", end_date_str).order("Timestamp", desc=False).execute())
        if not response.data: print(f"Warning: No data found for Feeder {feeder_id} in the specified range."); return pd.DataFrame()
        df = pd.DataFrame(response.data); df["Timestamp"] = pd.to_datetime(df["Timestamp"]); df = df.set_index("Timestamp")
        print(f"Fetched {len(df)} records."); return df
    except Exception as e: print(f"Error fetching data: {e}"); raise

def log_model_metadata(metadata):
    """Inserts model metadata into the ml.Models table."""
    print(f"Logging metadata for model: {metadata.get('model_artifact_path')}")
    try:
        supabase.postgrest.schema(ML_SCHEMA)
        response = supabase.table(f"models").insert(metadata).execute()
        if hasattr(response, "data") and response.data: print("Metadata logged successfully."); return response.data[0]["model_id"]
        elif hasattr(response, "error") and response.error: print(f"Error logging metadata: {response.error}"); raise Exception(f"Failed to log model metadata: {response.error}")
        elif not hasattr(response, "error") and not hasattr(response, "data"): print("Metadata logged successfully (assumed based on response)."); return None
        else: print(f"Unknown error logging metadata. Response: {response}"); raise Exception("Unknown error logging metadata.")
    except Exception as e: print(f"Error inserting metadata into {ML_SCHEMA}.Models: {e}"); raise


# --- save_pickle_artifact (Renamed & Simplified: ONLY handles pickling) ---
def save_pickle_artifact(artifact_object, feeder_id, model_arch, scenario, version_tag):
    """Saves a Python object using pickle to Supabase Storage."""
    file_name = f"{model_arch}_{scenario}_{version_tag}.pkl" # Use specific tag
    storage_path = f"models/feeder_{feeder_id}/{file_name}"
    local_tmp_dir = TEMP_DIR # Use the globally defined TEMP_DIR
    local_temp_path = os.path.join(local_tmp_dir, file_name)

    os.makedirs(local_tmp_dir, exist_ok=True)
    print(f"Saving pickled artifact temporarily to: {local_temp_path}")
    print(f"Uploading pickled artifact to Supabase Storage path: {storage_path}...")
    try:
        # 1. Save artifact locally using pickle
        with open(local_temp_path, "wb") as f:
            pickle.dump(artifact_object, f)
        print(f"Artifact pickled locally: {local_temp_path}")

        # 2. Upload the local file to Supabase Storage
        if not os.path.exists(local_temp_path): raise FileNotFoundError(f"Temporary file not found after saving: {local_temp_path}")
        with open(local_temp_path, "rb") as f:
            response = supabase.storage.from_(STORAGE_BUCKET).upload(path=storage_path, file=f, file_options={"content-type": "application/octet-stream", "upsert": "true"})
            print(f"Supabase storage upload response: {response}")

        # 3. Clean up the temporary local file
        os.remove(local_temp_path)
        print(f"Temporary file {local_temp_path} removed.")
        print("Pickled artifact saved successfully to Supabase Storage.")
        return storage_path
    except Exception as e:
        print(f"Error saving pickled artifact: {e}")
        if os.path.exists(local_temp_path):
            try: os.remove(local_temp_path); print(f"Cleaned up temporary file {local_temp_path} after error.")
            except OSError as rm_err: print(f"Error removing temporary file {local_temp_path} after error: {rm_err}")
        raise

# --- Data Preparation Functions --- (prepare_daily_vectors, feature_engineer_and_scale - unchanged)
def prepare_daily_vectors(df, feature_cols, target_col_list, scenario_hours): # Expect list
    """Pivots hourly data into daily vectors (one row per day)."""
    print("Pivoting data into daily vectors...")
    df_copy = df.copy(); df_copy["date"] = df_copy.index.date; df_copy["hour"] = df_copy.index.hour
    if scenario_hours: df_copy = df_copy[df_copy["hour"].isin(scenario_hours)]
    pivoted_X = df_copy.pivot_table(index="date", columns="hour", values=feature_cols)
    pivoted_X.columns = [f"{col[0]}_Hour_{col[1]}" for col in pivoted_X.columns]
    pivoted_y = df_copy.pivot_table(index="date", columns="hour", values=target_col_list)
    if len(target_col_list) == 1:
         if isinstance(pivoted_y, pd.Series): pivoted_y = pivoted_y.to_frame()
         pivoted_y.columns = [f"{target_col_list[0]}_Hour_{col[1]}" for col in pivoted_y.columns] # Adjusted column naming
    else: pivoted_y.columns = [f"{col[0]}_Hour_{col[1]}" for col in pivoted_y.columns]
    expected_hours = scenario_hours if scenario_hours else list(range(24))
    ordered_X_columns = [f"{feat}_Hour_{hr}" for feat in feature_cols for hr in expected_hours]
    ordered_y_columns = [f"{tgt}_Hour_{hr}" for tgt in target_col_list for hr in expected_hours]
    pivoted_X = pivoted_X.reindex(columns=ordered_X_columns); pivoted_y = pivoted_y.reindex(columns=ordered_y_columns)
    pivoted_X.index = pd.to_datetime(pivoted_X.index); pivoted_X["Day_Of_Week"] = pivoted_X.index.dayofweek
    pivoted_X["Is_Holiday"] = 0
    
    pivoted_X['Is_Weekend'] = pivoted_X['Day_Of_Week'].apply(lambda x: 1 if x in [5, 6] else 0) # Saturday=5, Sunday=6
    
    pivoted_X = pd.get_dummies(pivoted_X, columns=["Day_Of_Week"], prefix="DOW", dtype="int")
    original_days = len(pivoted_X); valid_indices = pivoted_X.dropna().index
    pivoted_X = pivoted_X.loc[valid_indices]; pivoted_y = pivoted_y.loc[valid_indices]
    if len(pivoted_X) < original_days: print(f"Warning: Dropped {original_days - len(pivoted_X)} days due to missing data after pivoting/feature eng.")
    print(f"Reshaped data: X shape {pivoted_X.shape}, y shape {pivoted_y.shape}")
    return pivoted_X, pivoted_y

def feature_engineer_and_scale(df, scenario, x_scaler=None, y_scaler=None, change_in_load=False, apply_scaling=True):
    """Prepares features, reshapes data, applies MinMaxScaler to X and y."""
    print(f"Starting feature engineering for scenario: {scenario}...")
    df_processed = df.copy()
    df_processed["Net_Load_Change"] = df_processed["Net_Load_Demand"].diff(24).fillna(0)
    df_processed["Prev_Day_Net_Load_Demand"] = df_processed["Net_Load_Demand"].shift(24)
    df_processed["Prev_Day_Temperature_Historic"] = df_processed["Temperature_Historic"].shift(24)
    df_processed["Prev_Day_Shortwave_Radiation_Historic"] = df_processed["Shortwave_Radiation_Historic"].shift(24)
    df_processed["Prev_Day_Cloud_Cover_Historic"] = df_processed["Cloud_Cover_Historic"].shift(24)
    # df_processed.rename(columns={'temperature_2m_forecast': 'Temperature_Forecast', 'shortwave_radiation_forecast': 'Shortwave_Radiation_Forecast', 'cloud_cover_forecast': 'Cloud_Cover_Forecast'}, inplace=True)
    feature_cols = ['Prev_Day_Net_Load_Demand', 'Prev_Day_Temperature_Historic', 'Temperature_Forecast', 'Prev_Day_Shortwave_Radiation_Historic', 'Shortwave_Radiation_Forecast', 'Prev_Day_Cloud_Cover_Historic', 'Cloud_Cover_Forecast']
    target_col = ['Net_Load_Change'] if change_in_load else ['Net_Load_Demand']
    df_processed = df_processed.dropna(subset=feature_cols + target_col)
    scenario_hours = DAY_HOURS if scenario == "Day" else NIGHT_HOURS if scenario == "Night" else None
    X, y = prepare_daily_vectors(df_processed, feature_cols, target_col, scenario_hours)
    
    print("++++++++++++++++++++++++++++++")
    print(X)
    print("+++++++++++++++++++++++++++++++++++")
    if X.empty or y.empty: print("Warning: No data left after feature engineering and reshaping."); return X, y, None, None
    if not apply_scaling: print("Scaling is disabled. Returning original data without scaling."); return X, y, None, None
    is_training = (x_scaler is None) and (y_scaler is None)
    if is_training:
        print("Fitting new MinMaxScaler on training data (X and y)..."); x_scaler = MinMaxScaler(); y_scaler = MinMaxScaler()
        X_scaled = x_scaler.fit_transform(X); y_scaled = y_scaler.fit_transform(y); print("Scalers fitted.")
        X_scaled_df = pd.DataFrame(X_scaled, index=X.index, columns=X.columns); y_scaled_df = pd.DataFrame(y_scaled, index=y.index, columns=y.columns)
        return X_scaled_df, y_scaled_df, x_scaler, y_scaler
    else:
        if x_scaler is None or y_scaler is None: raise ValueError("Both x_scaler and y_scaler must be provided for non-training mode.")
        print("Transforming data using provided scalers (X and y)...")
        if not (hasattr(x_scaler, "transform") and hasattr(y_scaler, "transform")): raise ValueError("Provided scaler objects must have a 'transform' method.")
        try:
            if hasattr(x_scaler, "feature_names_in_") and list(X.columns) != list(x_scaler.feature_names_in_): raise ValueError("Input features mismatch between data and X scaler.")
            elif hasattr(x_scaler, "n_features_in_") and X.shape[1] != x_scaler.n_features_in_: raise ValueError(f"Input feature count mismatch: data has {X.shape[1]}, X scaler expects {x_scaler.n_features_in_}")
            if hasattr(y_scaler, "n_features_in_") and y.shape[1] != y_scaler.n_features_in_: raise ValueError(f"Target feature count mismatch: data has {y.shape[1]}, y scaler expects {y_scaler.n_features_in_}")
            X_scaled = x_scaler.transform(X); y_scaled = y_scaler.transform(y); print("Data transformed.")
            X_scaled_df = pd.DataFrame(X_scaled, index=X.index, columns=X.columns); y_scaled_df = pd.DataFrame(y_scaled, index=y.index, columns=y.columns)
            return X_scaled_df, y_scaled_df, None, None
        except Exception as e: print(f"Error applying scaler transform: {e}"); traceback.print_exc(); raise

# --- Model Training Functions --- (train_lightgbm_model, train_ann_model, train_lstm_model - unchanged)
# --- RLS Functions --- (convert_change_in_load_to_base_load, train_padasip_rls_combiner, predict_with_padasip_rls, run_rls_combination_stage - unchanged)
# --- [Include the definitions for these functions from your previous script here] ---
# Placeholder comment - make sure these functions are defined above run_training
def train_lightgbm_model(X_train_scaled, y_train_scaled, X_val_scaled, y_val_original, y_scaler):
    """Trains LightGBM model."""
    print(f"Training LightGBM model...")
    lgbm_estimator = LGBMRegressor(n_jobs=-1, random_state=42); model = MultiOutputRegressor(lgbm_estimator, n_jobs=-1)
    print("Fitting MultiOutputRegressor with LGBM on scaled data..."); model.fit(X_train_scaled, y_train_scaled); print("Model fitting complete.")
    print("Predicting on scaled validation set..."); y_pred_val_scaled = model.predict(X_val_scaled)
    print("Inverse transforming predictions to original scale...")
    try:
        if y_pred_val_scaled.shape[1] != y_scaler.n_features_in_: raise ValueError(f"Prediction shape mismatch: predicted {y_pred_val_scaled.shape[1]} features, y_scaler expects {y_scaler.n_features_in_}")
        y_pred_val_original = y_scaler.inverse_transform(y_pred_val_scaled)
    except Exception as e: print(f"Error during inverse transform: {e}"); traceback.print_exc(); return model, {"mae": np.nan, "rmse": np.nan, "smape": np.nan}, None # Return None for preds
    print("Calculating validation metrics on original scale...")
    y_val_np = y_val_original.values if isinstance(y_val_original, pd.DataFrame) else y_val_original; y_pred_val_np = y_pred_val_original
    try:
        mae = mean_absolute_error(y_val_np, y_pred_val_np); rmse = np.sqrt(mean_squared_error(y_val_np, y_pred_val_np))
        denominator = np.abs(y_val_np) + np.abs(y_pred_val_np); safe_denominator = np.where(denominator == 0, 1, denominator)
        smape_values = 200 * np.abs(y_pred_val_np - y_val_np) / safe_denominator; smape = np.mean(smape_values)
        validation_metrics = {"mae": mae, "rmse": rmse, "smape": smape}
    except ValueError as metric_err: print(f"ERROR calculating metrics: {metric_err}"); traceback.print_exc(); validation_metrics = {"mae": np.nan, "rmse": np.nan, "smape": np.nan}
    print("LightGBM training complete."); return model, validation_metrics, y_pred_val_original # Return preds

def convert_change_in_load_to_base_load(X_original, y_pred_change_original):
    """Converts predicted change_in_load back to base_load."""
    X_original_np = X_original.values if isinstance(X_original, pd.DataFrame) else X_original
    y_pred_change_np = y_pred_change_original.values if isinstance(y_pred_change_original, pd.DataFrame) else y_pred_change_original
    prev_day_cols = [col for col in X_original.columns if col.startswith("Prev_Day_Net_Load_Demand_Hour_")]
    if len(prev_day_cols) != y_pred_change_np.shape[1]: raise ValueError(f"Mismatch between number of previous day load columns ({len(prev_day_cols)}) and prediction columns ({y_pred_change_np.shape[1]})")
    num_hours = y_pred_change_np.shape[1]; prev_day_indices = [X_original.columns.get_loc(col) for col in prev_day_cols]
    prev_day_load = X_original_np[:, prev_day_indices]
    y_pred_base_np = prev_day_load.astype(float) + y_pred_change_np.astype(float)
    print("Converted change_in_load prediction back to base_load prediction.")
    return y_pred_base_np

def train_ann_model(X_train_scaled, y_train_scaled, X_val_scaled, y_val_scaled, X_train_original, X_val_original, y_train_original, y_val_original, y_scaler, change_in_load=False):
    """Trains ANN model and returns predictions on train and val sets."""
    print(f"Training ANN model (change_in_load={change_in_load})...")
    ann_model = Sequential()
    ann_model.add(Input(shape=(X_train_scaled.shape[1],)))
    ann_model.add(Dense(50, activation="sigmoid"))
    # ann_model.add(Dropout(0.2))
    ann_model.add(Dense(y_train_scaled.shape[1]))
    ann_model.compile(optimizer="adam", loss="mean_squared_error", metrics=["mae"])
    callbacks = [EarlyStopping(monitor="val_loss", patience=8, verbose=1, restore_best_weights=True)]
    print("Fitting ANN on scaled data..."); history = ann_model.fit(X_train_scaled, y_train_scaled, validation_data=(X_val_scaled, y_val_scaled), epochs=50, batch_size=32, callbacks=callbacks, verbose=1); print("Model fitting complete.")
    print("Predicting on scaled training set..."); y_pred_train_scaled = ann_model.predict(X_train_scaled)
    print("Inverse transforming training predictions..."); y_pred_train_original = y_scaler.inverse_transform(y_pred_train_scaled)
    if change_in_load: y_pred_train_original = convert_change_in_load_to_base_load(X_train_original, y_pred_train_original)
    print("Predicting on scaled validation set..."); y_pred_val_scaled = ann_model.predict(X_val_scaled)
    print("Inverse transforming validation predictions..."); y_pred_val_original = y_scaler.inverse_transform(y_pred_val_scaled)
    if change_in_load: y_pred_val_original = convert_change_in_load_to_base_load(X_val_original, y_pred_val_original)
    print("Calculating validation metrics on original scale...")
    y_val_np = y_val_original.values if isinstance(y_val_original, pd.DataFrame) else y_val_original
    y_pred_val_np = y_pred_val_original # Already converted if needed
    try:
        mae = mean_absolute_error(y_val_np, y_pred_val_np); rmse = np.sqrt(mean_squared_error(y_val_np, y_pred_val_np))
        denominator = np.abs(y_val_np) + np.abs(y_pred_val_np); safe_denominator = np.where(denominator == 0, 1, denominator)
        smape_values = 200 * np.abs(y_pred_val_np - y_val_np) / safe_denominator; smape = np.mean(smape_values)
        validation_metrics = {"mae": mae, "rmse": rmse, "smape": smape}
    except ValueError as metric_err: print(f"ERROR calculating metrics: {metric_err}"); traceback.print_exc(); validation_metrics = {"mae": np.nan, "rmse": np.nan, "smape": np.nan}
    print(f"ANN Model training complete. Validation Metrics (Original Scale): {validation_metrics}")
    return ann_model, validation_metrics, y_pred_train_original, y_pred_val_original

def train_lstm_model(X_train_scaled, y_train_scaled, X_val_scaled, y_val_scaled, X_train_original, X_val_original, y_train_original, y_val_original, y_scaler, change_in_load=False):
    """Trains LSTM model and returns predictions on train and val sets."""
    print(f"Training LSTM model (change_in_load={change_in_load})...")
    X_train_lstm = X_train_scaled.values.reshape((X_train_scaled.shape[0], 1, X_train_scaled.shape[1]))
    X_val_lstm = X_val_scaled.values.reshape((X_val_scaled.shape[0], 1, X_val_scaled.shape[1]))
    print(f"Reshaped X for LSTM: Train shape {X_train_lstm.shape}, Val shape {X_val_lstm.shape}")
    print(f"NaN check before LSTM fit:"); print(f"  X_train_lstm NaNs: {np.isnan(X_train_lstm).any()}"); print(f"  y_train_scaled NaNs: {np.isnan(y_train_scaled.values).any()}"); print(f"  X_val_lstm NaNs: {np.isnan(X_val_lstm).any()}"); print(f"  y_val_scaled NaNs: {np.isnan(y_val_scaled.values).any()}")
    if np.isnan(X_train_lstm).any() or np.isnan(y_train_scaled.values).any(): print("ERROR: NaNs detected in TRAINING data before LSTM fit!"); return None, {"mae": np.nan, "rmse": np.nan, "smape": np.nan}, None, None
    if np.isnan(X_val_lstm).any() or np.isnan(y_val_scaled.values).any(): print("ERROR: NaNs detected in VALIDATION data before LSTM fit!")
    
    lstm_model = Sequential(); lstm_model.add(Input(shape=(X_train_lstm.shape[1], X_train_lstm.shape[2])))
    lstm_model.add(LSTM(50, return_sequences=False))
    lstm_model.add(Dropout(0.3))
    lstm_model.add(Dense(25, activation="relu"))
    lstm_model.add(Dense(y_train_scaled.shape[1], activation='sigmoid'))
    
    optimizer = tf.keras.optimizers.Adam(learning_rate=0.01, clipvalue=1.0)
    lstm_model.compile(optimizer=optimizer, loss='mean_squared_error', metrics=['mae']); lstm_model.summary()
    callbacks = [EarlyStopping(monitor='val_loss', patience=8, verbose=1, restore_best_weights=True)]
    print("Fitting LSTM on scaled data..."); history = lstm_model.fit(X_train_lstm, y_train_scaled, validation_data=(X_val_lstm, y_val_scaled), epochs=50, batch_size=32, callbacks=callbacks, verbose=1); print("LSTM Model fitting complete.")
    if np.isnan(history.history['loss']).any() or np.isnan(history.history['val_loss']).any(): print("WARNING: NaN detected in loss during training history.")
    print("Predicting on scaled training set with LSTM..."); y_pred_train_scaled = lstm_model.predict(X_train_lstm)
    print("Inverse transforming training predictions..."); y_pred_train_original = y_scaler.inverse_transform(y_pred_train_scaled)
    if change_in_load: y_pred_train_original = convert_change_in_load_to_base_load(X_train_original, y_pred_train_original)
    print("Predicting on scaled validation set with LSTM..."); y_pred_val_scaled = lstm_model.predict(X_val_lstm)
    print("Inverse transforming validation predictions..."); y_pred_val_original = y_scaler.inverse_transform(y_pred_val_scaled)
    if change_in_load: y_pred_val_original = convert_change_in_load_to_base_load(X_val_original, y_pred_val_original)
    print("Calculating LSTM validation metrics on original scale...")
    y_val_np = y_val_original.values if isinstance(y_val_original, pd.DataFrame) else y_val_original
    y_pred_val_np = y_pred_val_original # Already converted if needed
    try:
        if np.isnan(y_val_np).any(): raise ValueError("NaN found in y_val_np for metrics")
        if np.isnan(y_pred_val_np).any(): raise ValueError("NaN found in y_pred_val_np for metrics")
        mae = mean_absolute_error(y_val_np, y_pred_val_np); rmse = np.sqrt(mean_squared_error(y_val_np, y_pred_val_np))
        denominator = np.abs(y_val_np) + np.abs(y_pred_val_np); safe_denominator = np.where(denominator == 0, 1, denominator)
        smape_values = 200 * np.abs(y_pred_val_np - y_val_np) / safe_denominator; smape = np.mean(smape_values)
        validation_metrics = {"mae": mae, "rmse": rmse, "smape": smape}
    except ValueError as metric_err: print(f"ERROR calculating metrics: {metric_err}"); traceback.print_exc(); validation_metrics = {"mae": np.nan, "rmse": np.nan, "smape": np.nan}
    print(f"LSTM Model training complete. Validation Metrics (Original Scale): {validation_metrics}")
    return lstm_model, validation_metrics, y_pred_train_original, y_pred_val_original

def train_padasip_rls_combiner(predictions1, predictions2, actuals, mu=0.98, eps=0.01):
    """Trains a list of padasip FilterRLS objects, one for each output hour."""
    n_samples, n_outputs = actuals.shape; n_inputs = 2
    print(f"Initializing {n_outputs} RLS filters (mu={mu}, eps={eps})...")
    rls_filters = [FilterRLS(n=n_inputs, mu=mu, w="zeros") for _ in range(n_outputs)]
    print("Adapting RLS filters sample by sample...")
    for t in range(n_samples): # Iterate through all samples
        for k in range(n_outputs):
            x_k = np.array([predictions1[t, k], predictions2[t, k]])
            d_k = actuals[t, k]
            try:
                if np.isnan(x_k).any() or np.isinf(x_k).any() or np.isnan(d_k) or np.isinf(d_k): print(f"Warning: Skipping RLS adapt at sample {t}, hour {k} due to NaN/Inf input/target."); continue
                rls_filters[k].adapt(d_k, x_k)
            except Exception as adapt_err: print(f"ERROR during RLS adapt at sample {t}, hour {k}: {adapt_err}"); pass
    print("RLS filter adaptation complete."); return rls_filters

def predict_with_padasip_rls(rls_filters, predictions1, predictions2):
    """Combines predictions using a list of fitted padasip RLS filters."""
    n_samples, n_outputs = predictions1.shape
    if len(rls_filters) != n_outputs: raise ValueError("Number of RLS filters does not match number of prediction outputs.")
    combined_predictions = np.zeros_like(predictions1)
    for t in range(n_samples):
        for k in range(n_outputs):
            x_k = np.array([predictions1[t, k], predictions2[t, k]])
            combined_predictions[t, k] = rls_filters[k].predict(x_k)
    return combined_predictions

# --- Helper function for RLS stages (Modified) ---
def run_rls_combination_stage(train_df_raw, val_df_raw, scenario, model_type):
    """
    Runs the RLS combination for either ANN or LSTM.
    Trains base models, adapts RLS on training preds, predicts RLS on validation preds.
    Returns:
        y_pred_rls_combined_train (np.array): RLS predictions on training set.
        y_pred_rls_combined_val (np.array): RLS predictions on validation set.
        y_val_base_orig (pd.DataFrame): Original validation actuals (base load).
        rls_filter_list (list): List of RLS filters fitted on training data.
    """
    print(f"\n--- Running Internal {model_type} RLS Combination Stage ---")
    base_model_func = train_ann_model if model_type == 'ANN' else train_lstm_model
    change_model_func = train_ann_model if model_type == 'ANN' else train_lstm_model
    rls_filter_list = None

    try:
        # --- Prepare Data ---
        # Train Data
        X_train_base_s, y_train_base_s, x_scaler_base, y_scaler_base = feature_engineer_and_scale(train_df_raw, scenario, change_in_load=False)
        X_train_change_s, y_train_change_s, x_scaler_change, y_scaler_change = feature_engineer_and_scale(train_df_raw, scenario, change_in_load=True)
        X_train_base_orig, y_train_base_orig, _, _ = feature_engineer_and_scale(train_df_raw, scenario, x_scaler=x_scaler_base, y_scaler=y_scaler_base, change_in_load=False, apply_scaling=False)
        X_train_change_orig, _, _, _ = feature_engineer_and_scale(train_df_raw, scenario, x_scaler=x_scaler_change, y_scaler=y_scaler_change, change_in_load=True, apply_scaling=False)
        # Align original train data
        common_train_index = y_train_base_s.index.intersection(y_train_change_s.index)
        X_train_base_orig = X_train_base_orig.loc[common_train_index]; y_train_base_orig = y_train_base_orig.loc[common_train_index]
        X_train_change_orig = X_train_change_orig.loc[common_train_index]
        X_train_base_s = X_train_base_s.loc[common_train_index]; y_train_base_s = y_train_base_s.loc[common_train_index]
        X_train_change_s = X_train_change_s.loc[common_train_index]; y_train_change_s = y_train_change_s.loc[common_train_index]

        # Validation Data
        X_val_base_s, y_val_base_s, _, _ = feature_engineer_and_scale(val_df_raw, scenario, x_scaler=x_scaler_base, y_scaler=y_scaler_base, change_in_load=False)
        X_val_change_s, y_val_change_s, _, _ = feature_engineer_and_scale(val_df_raw, scenario, x_scaler=x_scaler_change, y_scaler=y_scaler_change, change_in_load=True)
        X_val_base_orig, y_val_base_orig, _, _ = feature_engineer_and_scale(val_df_raw, scenario, x_scaler=x_scaler_base, y_scaler=y_scaler_base, change_in_load=False, apply_scaling=False)
        X_val_change_orig, _, _, _ = feature_engineer_and_scale(val_df_raw, scenario, x_scaler=x_scaler_change, y_scaler=y_scaler_change, change_in_load=True, apply_scaling=False)
        # Align original val data
        common_val_index = y_val_base_s.index.intersection(y_val_change_s.index)
        X_val_base_orig = X_val_base_orig.loc[common_val_index]; y_val_base_orig = y_val_base_orig.loc[common_val_index]
        X_val_change_orig = X_val_change_orig.loc[common_val_index]
        X_val_base_s = X_val_base_s.loc[common_val_index]; y_val_base_s = y_val_base_s.loc[common_val_index]
        X_val_change_s = X_val_change_s.loc[common_val_index]; y_val_change_s = y_val_change_s.loc[common_val_index]

        # --- Train Base Models & Get Train+Val Predictions ---
        # Base model
        base_model, _, y_pred_base_train_orig, y_pred_base_val_orig = base_model_func(
            X_train_base_s, y_train_base_s, X_val_base_s, y_val_base_s,
            X_train_base_orig, X_val_base_orig, y_train_base_orig, y_val_base_orig, # Pass train originals too
            y_scaler_base, change_in_load=False
        )
        if base_model is None: raise RuntimeError(f"{model_type}_Baseload training failed.")

        # Change model
        change_model, _, y_pred_change_converted_train_orig, y_pred_change_converted_val_orig = change_model_func(
            X_train_change_s, y_train_change_s, X_val_change_s, y_val_change_s,
            X_train_change_orig, X_val_change_orig, y_train_base_orig, y_val_base_orig, # Pass train originals, use BASE actuals for metrics
            y_scaler_change, change_in_load=True
        )
        if change_model is None: raise RuntimeError(f"{model_type}_Change_in_Load training failed.")

        # --- Convert Change Predictions (Train & Val) ---
        # y_pred_change_converted_train_orig = convert_change_in_load_to_base_load(X_train_change_orig, y_pred_change_train_orig)
        # y_pred_change_converted_val_orig = convert_change_in_load_to_base_load(X_val_change_orig, y_pred_change_val_orig)

        # --- Align Training Data for RLS Adaptation ---
        y_pred_base_train_aligned_np = pd.DataFrame(y_pred_base_train_orig, index=common_train_index).reindex(common_train_index).values
        y_pred_change_converted_train_aligned_np = pd.DataFrame(y_pred_change_converted_train_orig, index=common_train_index).reindex(common_train_index).values
        y_train_base_orig_np = y_train_base_orig.reindex(common_train_index).values

        # --- Train RLS Filters on Training Data ---
        print(f"\n--- Training {model_type} RLS Combiner on Training Data ---")
        rls_filter_list = train_padasip_rls_combiner(
            y_pred_base_train_aligned_np,
            y_pred_change_converted_train_aligned_np,
            y_train_base_orig_np,
            mu=0.5, eps=0.1 # Use defaults for intermediate stage
        )

        # --- Predict RLS on Validation Data ---
        print(f"\n--- Predicting {model_type} RLS Combiner on Validation Data ---")
        y_pred_base_val_aligned_np = pd.DataFrame(y_pred_base_val_orig, index=common_val_index).reindex(common_val_index).values
        y_pred_change_converted_val_aligned_np = pd.DataFrame(y_pred_change_converted_val_orig, index=common_val_index).reindex(common_val_index).values

        y_pred_rls_combined_val = predict_with_padasip_rls(
            rls_filter_list,
            y_pred_base_val_aligned_np,
            y_pred_change_converted_val_aligned_np
        )

        # --- Predict RLS on Training Data (Needed for Final Combiner) ---
        print(f"\n--- Predicting {model_type} RLS Combiner on Training Data ---")
        y_pred_rls_combined_train = predict_with_padasip_rls(
            rls_filter_list,
            y_pred_base_train_aligned_np,
            y_pred_change_converted_train_aligned_np
        )
        
        

        print(f"--- Internal {model_type} RLS Combination Stage Complete ---")
        # Return TRAIN RLS preds, VAL RLS preds, VAL actuals df, fitted filters
        return y_pred_rls_combined_train, y_pred_rls_combined_val, y_val_base_orig, rls_filter_list

    except Exception as e:
        print(f"Error during internal {model_type} RLS combination stage: {e}")
        traceback.print_exc()
        return None, None, None, None


# --- Main Training Workflow (Modified Saving Logic) ---
def run_training(feeder_id, model_arch, scenario, version, train_start, train_end, val_start, val_end):
    """Orchestrates the training process for different architectures."""
    print(f"\n--- Starting Training Run ---")
    print(f"Feeder: {feeder_id}, Arch: {model_arch}, Scenario: {scenario}, Version: {version}")
    print(f"Train Period: {train_start} to {train_end}")
    print(f"Validation Period: {val_start} to {val_end}")

    # Determine if the architecture uses Keras
    is_keras_model = model_arch in ['ANN_Baseload', 'ANN_Change_in_Load', 'LSTM_Baseload', 'LSTM_Change_in_Load']
    if is_keras_model and not KERAS_AVAILABLE:
        print(f"ERROR: Keras is required for architecture '{model_arch}' but is not installed.")
        return

    is_single_model_run = model_arch in ['LightGBM_Baseline', 'ANN_Baseload', 'ANN_Change_in_Load', 'LSTM_Baseload', 'LSTM_Change_in_Load']
    is_ann_rls_run = model_arch == 'ANN_RLS_Combined'
    is_lstm_rls_run = model_arch == 'LSTM_RLS_Combined'
    is_final_rls_run = model_arch == 'Final_RLS_Combined'

    fetch_train_start = (pd.to_datetime(train_start) - timedelta(days=1)).strftime("%Y-%m-%d %H:%M:%S%z")
    train_df_raw = fetch_data(feeder_id, fetch_train_start, train_end)
    fetch_val_start = (pd.to_datetime(val_start) - timedelta(days=1)).strftime("%Y-%m-%d %H:%M:%S%z")
    val_df_raw = fetch_data(feeder_id, fetch_val_start, val_end)
    if train_df_raw.empty or val_df_raw.empty: print("Insufficient raw data fetched. Aborting training."); return

    model_object_trained = None # Holds the primary trained object (model or filters)
    final_validation_metrics = None
    hyperparameters = {}
    feature_config = {}
    y_pred_val_plot = None
    y_val_original = None # Will store the appropriate actuals df for plotting
    # Variables needed for saving scalers separately
    fitted_x_scaler = None
    fitted_y_scaler = None
    feature_columns_list = None
    target_columns_list = None

    try:
        # =============================================
        # === Handle Single Model Architectures =====
        # =============================================
        if is_single_model_run:
            change_in_load = model_arch in ['ANN_Change_in_Load', 'LSTM_Change_in_Load']
            apply_scaling = True # Always scale for these models
            # Perform feature engineering and scaling
            X_train_scaled, y_train_scaled, fitted_x_scaler, fitted_y_scaler = feature_engineer_and_scale(train_df_raw, scenario, x_scaler=None, y_scaler=None, change_in_load=change_in_load, apply_scaling=apply_scaling)
            X_train_original, y_train_original_base, _, _ = feature_engineer_and_scale(train_df_raw, scenario, x_scaler=fitted_x_scaler, y_scaler=fitted_y_scaler, change_in_load=False, apply_scaling=False)
            y_train_original_base = y_train_original_base.loc[y_train_scaled.index] # Align
            X_val_scaled, y_val_scaled, _, _ = feature_engineer_and_scale(val_df_raw, scenario, x_scaler=fitted_x_scaler, y_scaler=fitted_y_scaler, change_in_load=change_in_load, apply_scaling=apply_scaling)
            X_val_original, y_val_original_base, _, _ = feature_engineer_and_scale(val_df_raw, scenario, x_scaler=fitted_x_scaler, y_scaler=fitted_y_scaler, change_in_load=False, apply_scaling=False)
            y_val_original = y_val_original_base.loc[y_val_scaled.index] # Actuals for plotting/metrics are base load
            if X_train_scaled.empty or X_val_scaled.empty or fitted_x_scaler is None or fitted_y_scaler is None: print("Data processing failed. Aborting."); return
            # Store column names needed later for saving scaler info
            feature_columns_list = X_train_scaled.columns.tolist()
            target_columns_list = y_train_original_base.columns.tolist()

            # Train the specific model
            if model_arch == 'LightGBM_Baseline':
                hyperparameters = {'n_estimators': 100, 'learning_rate': 0.1, 'num_leaves': 31, 'random_state': 42}
                feature_config = {'target': 'Net_Load_Demand', 'scaling_X': 'MinMaxScaler', 'scaling_y': 'MinMaxScaler', 'output_hours': y_train_scaled.shape[1]}
                model_object_trained, final_validation_metrics, y_pred_val_lgbm_orig = train_lightgbm_model(X_train_scaled, y_train_scaled, X_val_scaled, y_val_original, fitted_y_scaler)
                y_pred_val_plot = y_pred_val_lgbm_orig
            elif model_arch == 'ANN_Baseload' or model_arch == 'ANN_Change_in_Load':
                hyperparameters = {'layers': [50], 'dropout': 0.2, 'optimizer': 'adam', 'epochs': 50, 'batch_size': 32, 'patience': 8}
                feature_config = {'target': 'Net_Load_Change' if change_in_load else 'Net_Load_Demand', 'scaling_X': 'MinMaxScaler', 'scaling_y': 'MinMaxScaler', 'output_hours': y_train_scaled.shape[1]}
                model_object_trained, final_validation_metrics, _, y_pred_val_ann_orig = train_ann_model(X_train_scaled, y_train_scaled, X_val_scaled, y_val_scaled, X_train_original, X_val_original, y_train_original_base, y_val_original, fitted_y_scaler, change_in_load)
                y_pred_val_plot = y_pred_val_ann_orig
            elif model_arch == 'LSTM_Baseload' or model_arch == 'LSTM_Change_in_Load':
                hyperparameters = {'lstm_units': 50, 'dropout': 0.2, 'optimizer': 'adam', 'epochs': 50, 'batch_size': 32, 'patience': 8}
                feature_config = {'target': 'Net_Load_Change' if change_in_load else 'Net_Load_Demand', 'scaling_X': 'MinMaxScaler', 'scaling_y': 'MinMaxScaler', 'output_hours': y_train_scaled.shape[1]}
                model_object_trained, final_validation_metrics, _, y_pred_val_lstm_orig = train_lstm_model(X_train_scaled, y_train_scaled, X_val_scaled, y_val_scaled, X_train_original, X_val_original, y_train_original_base, y_val_original, fitted_y_scaler, change_in_load)
                y_pred_val_plot = y_pred_val_lstm_orig
            else: print(f"Error: Unknown single model architecture '{model_arch}'"); return

        # =======================================================
        # === Handle Intermediate RLS Combined Stages =========
        # =======================================================
        elif is_ann_rls_run or is_lstm_rls_run:
            model_type = 'ANN' if is_ann_rls_run else 'LSTM'
            _, y_pred_rls_combined_val, y_val_rls_orig_df, rls_filter_list_stage = run_rls_combination_stage(train_df_raw, val_df_raw, scenario, model_type)
            if y_pred_rls_combined_val is None: return
            model_object_trained = rls_filter_list_stage # Save filters
            y_val_original = y_val_rls_orig_df # Actuals for metrics/plotting
            target_columns_list = y_val_original.columns.tolist() # Get target columns
            # Calculate metrics
            y_val_rls_orig_np = y_val_original.values
            mae = mean_absolute_error(y_val_rls_orig_np, y_pred_rls_combined_val); rmse = np.sqrt(mean_squared_error(y_val_rls_orig_np, y_pred_rls_combined_val))
            denominator = np.abs(y_val_rls_orig_np) + np.abs(y_pred_rls_combined_val); safe_denominator = np.where(denominator == 0, 1, denominator)
            smape_values = 200 * np.abs(y_pred_rls_combined_val - y_val_rls_orig_np) / safe_denominator; smape = np.mean(smape_values)
            final_validation_metrics = {"mae": mae, "rmse": rmse, "smape": smape}
            hyperparameters = {'rls_mu': model_object_trained[0].mu if model_object_trained and isinstance(model_object_trained, list) else None,
                               'rls_eps': model_object_trained[0].eps if model_object_trained and isinstance(model_object_trained, list) else None}
            feature_config = {'input_models': [f'{model_type}_Baseload', f'{model_type}_Change_in_Load'], 'target': 'Net_Load_Demand', 'combiner': 'padasip.FilterRLS'}
            y_pred_val_plot = y_pred_rls_combined_val

        # =======================================================
        # === Handle Final RLS Combined =========================
        # =======================================================
        elif is_final_rls_run:
            print("\n--- Getting ANN_RLS Stage Results ---")
            y_pred_ann_rls_train, y_pred_ann_rls_val, y_val_ann_orig_df, _ = run_rls_combination_stage(train_df_raw, val_df_raw, scenario, 'ANN')
            if y_pred_ann_rls_train is None: return
            print("\n--- Getting LSTM_RLS Stage Results ---")
            y_pred_lstm_rls_train, y_pred_lstm_rls_val, y_val_lstm_orig_df, _ = run_rls_combination_stage(train_df_raw, val_df_raw, scenario, 'LSTM')
            if y_pred_lstm_rls_train is None: return
            print("\n--- Aligning Inputs for Final RLS Combiner Training ---")
            _, y_train_original_base, _, _ = feature_engineer_and_scale(train_df_raw, scenario, change_in_load=False, apply_scaling=False)
            common_train_index = y_train_original_base.index
            print("************************* \n", common_train_index)
            print("y_train_original_base.index: \n", y_train_original_base.index)
            print("y_pred_ann_rls_train.index: \n", pd.DataFrame(y_pred_ann_rls_train).index)
            print("y_pred_lstm_rls_train.index: \n", pd.DataFrame(y_pred_lstm_rls_train).index)
            print("*************************")
            y_train_final_orig_np = y_train_original_base.reindex(common_train_index).values
            y_pred_ann_rls_train_aligned_np = pd.DataFrame(y_pred_ann_rls_train, index=common_train_index).reindex(common_train_index).values # Use common index
            y_pred_lstm_rls_train_aligned_np = pd.DataFrame(y_pred_lstm_rls_train, index=common_train_index).reindex(common_train_index).values # Use common index
            print("\n--- Training Final RLS Combiner on Training Data ---")
            final_rls_mu = 0.98; final_rls_eps = 0.1
            final_rls_filter_list = train_padasip_rls_combiner(y_pred_ann_rls_train_aligned_np, y_pred_lstm_rls_train_aligned_np, y_train_final_orig_np, mu=final_rls_mu, eps=final_rls_eps)
            model_object_trained = final_rls_filter_list # Save final filters
            print("\n--- Predicting Final RLS Combiner on Validation Data ---")
            common_val_index = y_val_ann_orig_df.index
            y_pred_ann_rls_val_aligned_np = pd.DataFrame(y_pred_ann_rls_val, index=common_val_index).reindex(common_val_index).values
            y_pred_lstm_rls_val_aligned_np = pd.DataFrame(y_pred_lstm_rls_val, index=common_val_index).reindex(common_val_index).values
            y_pred_final_combined_val = predict_with_padasip_rls(final_rls_filter_list, y_pred_ann_rls_val_aligned_np, y_pred_lstm_rls_val_aligned_np)
            print("\n--- Evaluating Final RLS Combiner on Validation Data ---")
            y_val_final_orig_np = y_val_ann_orig_df.reindex(common_val_index).values
            if np.isnan(y_pred_final_combined_val).any() or np.isinf(y_pred_final_combined_val).any(): print("ERROR: NaN or Inf detected in FINAL combined validation predictions!"); final_validation_metrics = {"mae": np.nan, "rmse": np.nan, "smape": np.nan}
            else:
                 mae = mean_absolute_error(y_val_final_orig_np, y_pred_final_combined_val); rmse = np.sqrt(mean_squared_error(y_val_final_orig_np, y_pred_final_combined_val))
                 denominator = np.abs(y_val_final_orig_np) + np.abs(y_pred_final_combined_val); safe_denominator = np.where(denominator == 0, 1, denominator)
                 smape_values = 200 * np.abs(y_pred_final_combined_val - y_val_final_orig_np) / safe_denominator; smape = np.mean(smape_values)
                 final_validation_metrics = {"mae": mae, "rmse": rmse, "smape": smape}
            hyperparameters = {'final_rls_mu': final_rls_mu, 'final_rls_eps': final_rls_eps}
            feature_config = {'input_models': ['ANN_RLS_Combined', 'LSTM_RLS_Combined'], 'target': 'Net_Load_Demand', 'combiner': 'padasip.FilterRLS'}
            y_pred_val_plot = y_pred_final_combined_val
            y_val_original = y_val_ann_orig_df.reindex(common_val_index) # Use aligned validation actuals
            target_columns_list = y_val_original.columns.tolist() # Get target columns

        else: print(f"Error: Logic error, model architecture '{model_arch}' not handled."); return

        # =============================================
        # === Saving and Logging (Modified Logic) =====
        # =============================================
        if model_object_trained is None: print("No model object trained to save. Aborting."); return

        print(f"\nFinal Validation Metrics (Original Scale): {final_validation_metrics}")
        # --- Plotting (Unchanged) ---
        if y_pred_val_plot is not None and y_val_original is not None:
            try:
                plot_title_prefix = f"Validation ({model_arch})"
                print(f"Generating validation plot ({plot_title_prefix} - Original Scale)...")
                if not isinstance(y_val_original, pd.DataFrame): print("Warning: y_val_original is not a DataFrame, cannot extract column names for plotting."); raise TypeError("Cannot determine target columns for plotting.")
                else: plot_target_columns = y_val_original.columns.tolist()
                actual_flat = y_val_original.values.flatten(); pred_flat = y_pred_val_plot.flatten()
                actual_hours = sorted([int(col.split("_Hour_")[-1]) for col in plot_target_columns])
                num_hours = len(actual_hours); num_days = len(y_val_original)
                base_dates = pd.to_datetime(np.repeat(y_val_original.index, num_hours))
                hour_offsets = pd.to_timedelta(np.tile(actual_hours, num_days), unit="h")
                plot_index = base_dates + hour_offsets
                if len(actual_flat) != len(pred_flat) or len(actual_flat) != len(plot_index): min_len = min(len(actual_flat), len(pred_flat), len(plot_index)); print(f"Warning: Length mismatch plotting. Truncating to {min_len}"); actual_flat, pred_flat, plot_index = actual_flat[:min_len], pred_flat[:min_len], plot_index[:min_len]
                results_df = pd.DataFrame({"Actual": actual_flat, "Predicted": pred_flat}, index=plot_index)
                results_df = results_df.sort_index()
                print(f"Sample of {plot_title_prefix} Actual vs Predicted (Original Scale):"); print(results_df.head(min(72, len(results_df)))); print(results_df.describe())
                fig = px.line(results_df, title=f'{plot_title_prefix}: Feeder {feeder_id} - {model_arch} ({scenario}'); fig.show()
            except Exception as plot_err: print(f"Could not generate validation plot: {plot_err}"); traceback.print_exc()
        else: print("Skipping plotting: Prediction or actual data not available.")

        # --- Save Artifact(s) ---
        artifact_path_for_db = None # This will hold the single path or JSON string
        try:
            if is_keras_model:
                print("Saving Keras model natively and scalers separately...")
                # 1. Save Keras model
                keras_filename = f"{model_arch}_{scenario}_{version}.keras"
                keras_local_path = os.path.join(TEMP_DIR, keras_filename)
                model_object_trained.save(keras_local_path) # Use Keras native save
                keras_storage_path = f"models/feeder_{feeder_id}/{keras_filename}"
                with open(keras_local_path, "rb") as f:
                    supabase.storage.from_(STORAGE_BUCKET).upload(path=keras_storage_path, file=f, file_options={"upsert": "true"})
                os.remove(keras_local_path)
                print(f"Keras model saved to: {keras_storage_path}")

                # 2. Save Scalers and Columns via Pickle
                scaler_info = {
                    'x_scaler': fitted_x_scaler,
                    'y_scaler': fitted_y_scaler,
                    'feature_columns': feature_columns_list,
                    'target_columns': target_columns_list
                }
                # Use a distinct version tag for the scaler file
                scaler_version_tag = f"{version}_scalers"
                scaler_storage_path = save_pickle_artifact(scaler_info, feeder_id, model_arch, scenario, scaler_version_tag)
                print(f"Scalers/Columns saved to: {scaler_storage_path}")

                # 3. Store both paths as JSON in the DB path field
                artifact_path_for_db = json.dumps({
                    "keras_model": keras_storage_path,
                    "scalers_pkl": scaler_storage_path
                })

            else:
                # For non-Keras (LGBM, RLS filters), save bundled object using pickle
                print("Saving non-Keras model/filters and potentially scalers via pickle...")
                # RLS stages already have model_object_trained as the filters list
                if is_ann_rls_run or is_lstm_rls_run or is_final_rls_run:
                     artifact_to_save = {'rls_filters': model_object_trained, 'target_columns': target_columns_list}
                     # Scalers are not directly used by RLS filters, don't save them here
                elif model_arch == 'LightGBM_Baseline':
                     artifact_to_save = {
                         'model': model_object_trained,
                         'x_scaler': fitted_x_scaler,
                         'y_scaler': fitted_y_scaler,
                         'feature_columns': feature_columns_list,
                         'target_columns': target_columns_list
                     }
                else:
                     print(f"Warning: Unhandled non-Keras architecture for saving: {model_arch}")
                     artifact_to_save = {'model': model_object_trained} # Save model only as fallback

                # Use the main version tag for the single pickle file
                artifact_path_for_db = save_pickle_artifact(artifact_to_save, feeder_id, model_arch, scenario, version)

        except Exception as e:
            print(f"Error saving artifact(s): {e}")
            traceback.print_exc()
            return # Don't log metadata if saving failed

        # --- Log Metadata ---
        if artifact_path_for_db is None:
             print("Error: Artifact path for database is null. Cannot log metadata.")
             return

        metadata = {
            'feeder_id': feeder_id,
            'model_architecture_type': model_arch,
            'scenario_type': scenario,
            'model_version': version,
            'train_data_start_timestamp': train_start,
            'train_data_end_timestamp': train_end,
            'model_hyperparameters': json.dumps(hyperparameters),
            'feature_engineering_config': json.dumps(feature_config),
            'model_artifact_path': artifact_path_for_db, # Store single path or JSON string
            'validation_metrics': json.dumps(final_validation_metrics),
            'is_active_for_forecast': False
        }
        try:
            log_model_metadata(metadata)
        except Exception as e:
            print(f"Error logging metadata: {e}")
            # Consider cleanup of saved artifacts if logging fails

    except Exception as e:
        print(f"An error occurred during the training run for {model_arch}: {e}")
        traceback.print_exc()
    finally:
        supabase.postgrest.schema("public") # Reset schema

    print(f"--- Training Run Completed for Feeder {feeder_id}, {model_arch}, {scenario} ---")


# --- Example Execution --- (Unchanged)
if __name__ == "__main__":
    if not SUPABASE_URL or not SUPABASE_KEY or "YOUR_SUPABASE_URL" in SUPABASE_URL:
        print("Error: SUPABASE_URL and SUPABASE_KEY environment variables must be set.")
    else:
        run_training(
            feeder_id=2,
            model_arch='LSTM_Change_in_Load', # Example: Train a Keras model
            scenario='24hr',
            version=MODEL_VERSION,
            train_start=TRAIN_START_DATE,
            train_end=TRAIN_END_DATE,
            val_start=VALIDATION_START_DATE,
            val_end=VALIDATION_END_DATE,
        )
        # run_training(
        #     feeder_id=FEEDER_ID_TO_TRAIN,
        #     model_arch='LightGBM_Baseline', # Example: Train a non-Keras model
        #     scenario='24hr',
        #     version=MODEL_VERSION,
        #     train_start=TRAIN_START_DATE,
        #     train_end=TRAIN_END_DATE,
        #     val_start=VALIDATION_START_DATE,
        #     val_end=VALIDATION_END_DATE,
        # )


--- Starting Training Run ---
Feeder: 2, Arch: LSTM_Change_in_Load, Scenario: 24hr, Version: v1.1_Is_Weekend
Train Period: 2024-01-01 00:00:00+00 to 2024-05-31 23:59:59+00
Validation Period: 2024-06-01 00:00:00+00 to 2024-06-30 23:59:59+00
Fetching data for Feeder 2 from 2023-12-31 00:00:00+0000 to 2024-05-31 23:59:59+00...
Fetched 3672 records.
Fetching data for Feeder 2 from 2024-05-31 00:00:00+0000 to 2024-06-30 23:59:59+00...
Fetched 720 records.
Starting feature engineering for scenario: 24hr...
Pivoting data into daily vectors...
Reshaped data: X shape (152, 177), y shape (152, 24)
++++++++++++++++++++++++++++++
            Prev_Day_Net_Load_Demand_Hour_0  Prev_Day_Net_Load_Demand_Hour_1  \
date                                                                           
2024-01-02                            276.0                            276.0   
2024-01-03                            259.2                            264.0   
2024-01-04                            327.0          

Saving Keras model natively and scalers separately...
Keras model saved to: models/feeder_2/LSTM_Change_in_Load_24hr_v1.1_Is_Weekend.keras
Saving pickled artifact temporarily to: h:\My Drive\Barbados_Forecasting_Tool_Final\tmp\LSTM_Change_in_Load_24hr_v1.1_Is_Weekend_scalers.pkl
Uploading pickled artifact to Supabase Storage path: models/feeder_2/LSTM_Change_in_Load_24hr_v1.1_Is_Weekend_scalers.pkl...
Artifact pickled locally: h:\My Drive\Barbados_Forecasting_Tool_Final\tmp\LSTM_Change_in_Load_24hr_v1.1_Is_Weekend_scalers.pkl
Supabase storage upload response: UploadResponse(path='models/feeder_2/LSTM_Change_in_Load_24hr_v1.1_Is_Weekend_scalers.pkl', full_path='models/models/feeder_2/LSTM_Change_in_Load_24hr_v1.1_Is_Weekend_scalers.pkl', fullPath='models/models/feeder_2/LSTM_Change_in_Load_24hr_v1.1_Is_Weekend_scalers.pkl')
Temporary file h:\My Drive\Barbados_Forecasting_Tool_Final\tmp\LSTM_Change_in_Load_24hr_v1.1_Is_Weekend_scalers.pkl removed.
Pickled artifact saved successfully to S

In [4]:
supabase.postgrest.schema("ml")

response = supabase.table("forecasts").select("forecast_value, actual_value").eq("feeder_id", 2).eq("model_id", 3).gte("target_timestamp", "2024-07-07").lt("target_timestamp", "2024-07-16").order("target_timestamp", desc=False).execute()

px.line(pd.DataFrame(response.data))