In [1]:
import pandas as pd
from sklearn.model_selection import train_test_split
from sklearn.ensemble import GradientBoostingRegressor
from sklearn.metrics import mean_pinball_loss
import optuna

  from .autonotebook import tqdm as notebook_tqdm


In [2]:
df = pd.read_csv("/Users/florian/Documents/github/DP2/Energy_production_price_prediction/HEFTcom24/data/features.csv")

In [3]:
df = df.drop(columns=['Unnamed: 0'])   
df = df.dropna()

In [4]:
df.columns

Index(['valid_time', 'Solar_MWh_credit', 'SolarDownwardRadiation_Mean',
       'SolarDownwardRadiation_ncep_Mean', 'SolarDownwardRadiation_dwd_Mean',
       'SolarDownwardRadiation_RW_Mean_1h',
       'SolarDownwardRadiation_RW_dwd_Mean_30min',
       'SolarDownwardRadiation_RW_dwd_Mean_1h',
       'SolarDownwardRadiation_dwd_Mean_Lag_30min',
       'SolarDownwardRadiation_Mean_Lag_1h',
       'SolarDownwardRadiation_Mean_Lag_24h', 'Panel_Efficiency_Mean',
       'Panel_Efficiency_Std', 'Panel_Temperature_Mean',
       'Panel_Temperature_Std', 'Temperature_dwd_Std', 'Temperature_dwd_Mean',
       'cos_hour', 'cos_day'],
      dtype='object')

In [5]:
X_train, X_test, y_train, y_test = train_test_split(df.drop(columns=["valid_time", "Solar_MWh_credit"]), df["Solar_MWh_credit"], test_size=0.2, random_state=42, shuffle= False)

In [6]:
y_test

50906    541.010965
50907    533.581000
50908    573.276220
50909    538.836155
50910    450.362355
            ...    
63616      0.000000
63617      0.000000
63618      0.000000
63619      0.000000
63620      0.000000
Name: Solar_MWh_credit, Length: 12715, dtype: float64

In [7]:
import logging
logging.basicConfig(level=logging.INFO)

In [8]:
alphas = [0.1, 0.2, 0.3, 0.4, 0.5, 0.6, 0.7, 0.8, 0.9]

def objective(trial, alpha):
    """
    Objective function for the Optuna optimization. Trains a Gradient Boosting Regressor model with the given hyperparameters.

    args:   trial: optuna.trial.Trial
            alpha: float, the quantile to be used in the loss function
    returns: float
    """
    n_estimators = trial.suggest_int("n_estimators", 100, 1000)
    max_depth = trial.suggest_int("max_depth", 3, 20)
    learning_rate = trial.suggest_float("learning_rate", 0.01, 0.1, log=True)
    min_samples_split = trial.suggest_int("min_samples_split", 2, 50)
    min_samples_leaf = trial.suggest_int("min_samples_leaf", 1, 20)

    model = GradientBoostingRegressor(
        loss="quantile",
        alpha=alpha,
        n_estimators=n_estimators,
        max_depth=max_depth,
        learning_rate=learning_rate,
        min_samples_split=min_samples_split,
        min_samples_leaf=min_samples_leaf,
        random_state=0
    )
    
    model.fit(X_train, y_train)
    
    y_pred = model.predict(X_test)
    
    loss = mean_pinball_loss(y_test, y_pred, alpha=alpha)
    
    # Log the step, loss and alpha
    logging.info(f"Trial {trial.number} - Alpha: {alpha}, Loss: {loss}, Params: {trial.params}")

    # Report the loss for pruning
    trial.report(loss, 0)

    # Prune trial if needed
    if trial.should_prune():
        raise optuna.exceptions.TrialPruned()
    
    return loss



In [9]:
best_params = {}
all_trials = []

for alpha in alphas:
    study = optuna.create_study(direction="minimize", study_name=f"alpha_{alpha}")
    study.optimize(lambda trial: objective(trial, alpha), n_trials=50)

    trial = study.best_trial
    logging.info(f"Best trial for alpha {alpha}:")
    logging.info(f"  Value: {trial.value}")
    logging.info("  Params: ")
    for key, value in trial.params.items():
        logging.info(f"    {key}: {value}")

    best_params[alpha] = trial.params

[I 2024-10-08 12:14:35,116] A new study created in memory with name: alpha_0.1
INFO:root:Trial 0 - Alpha: 0.1, Loss: 10.274475614231271, Params: {'n_estimators': 220, 'max_depth': 5, 'learning_rate': 0.09347694659806383, 'min_samples_split': 25, 'min_samples_leaf': 6}
[I 2024-10-08 12:14:35,680] Trial 0 finished with value: 10.274475614231271 and parameters: {'n_estimators': 220, 'max_depth': 5, 'learning_rate': 0.09347694659806383, 'min_samples_split': 25, 'min_samples_leaf': 6}. Best is trial 0 with value: 10.274475614231271.
INFO:root:Trial 1 - Alpha: 0.1, Loss: 10.274475614231271, Params: {'n_estimators': 109, 'max_depth': 3, 'learning_rate': 0.017649090404394423, 'min_samples_split': 34, 'min_samples_leaf': 19}
[I 2024-10-08 12:14:35,952] Trial 1 finished with value: 10.274475614231271 and parameters: {'n_estimators': 109, 'max_depth': 3, 'learning_rate': 0.017649090404394423, 'min_samples_split': 34, 'min_samples_leaf': 19}. Best is trial 0 with value: 10.274475614231271.
INFO:ro

KeyboardInterrupt: 