In [6]:
import pandas as pd
import numpy as np
import plotly.graph_objects as go
import torch
import torch.nn as nn
import torch.optim as optim
import pickle
import os

In [7]:
def resample_and_interpolate(group):
    return group.reset_index(level=[1, 2]).resample('30T').asfreq().interpolate()

Load Weather Data, solar total and wind total

In [41]:
base_dir = os.getcwd()

weather_df = pd.read_csv(os.path.join(base_dir, '..', 'weather_data', 'DWD_ICON-EU.csv'))
solar_total = pd.read_csv(os.path.join(base_dir, '..', 'basic_files', 'solar_total_production.csv'))
wind_total = pd.read_csv(os.path.join(base_dir, '..', 'basic_files', 'wind_total_production.csv'))
solar_total.generation_mw = solar_total.generation_mw * 0.5
wind_total.generation_mw = wind_total.generation_mw * 0.5 - wind_total.boa
weather_df.sort_values(by='ref_datetime', inplace=True)
weather_df = weather_df.groupby(["valid_datetime","latitude","longitude"]).last().reset_index()
weather_df.reset_index(inplace=True)

Interpolate weather data to 30 minutes periodes

In [42]:
weather_df.valid_datetime = pd.to_datetime(weather_df.valid_datetime)
weather_df = weather_df.set_index(["valid_datetime","latitude","longitude"])
df_resampled = weather_df.groupby(['latitude', 'longitude'], group_keys=False).apply(resample_and_interpolate)
df_resampled = df_resampled.reset_index()

  return group.reset_index(level=[1, 2]).resample('30T').asfreq().interpolate()
  return group.reset_index(level=[1, 2]).resample('30T').asfreq().interpolate()
  return group.reset_index(level=[1, 2]).resample('30T').asfreq().interpolate()
  return group.reset_index(level=[1, 2]).resample('30T').asfreq().interpolate()
  return group.reset_index(level=[1, 2]).resample('30T').asfreq().interpolate()
  return group.reset_index(level=[1, 2]).resample('30T').asfreq().interpolate()
  return group.reset_index(level=[1, 2]).resample('30T').asfreq().interpolate()
  return group.reset_index(level=[1, 2]).resample('30T').asfreq().interpolate()
  return group.reset_index(level=[1, 2]).resample('30T').asfreq().interpolate()
  return group.reset_index(level=[1, 2]).resample('30T').asfreq().interpolate()
  return group.reset_index(level=[1, 2]).resample('30T').asfreq().interpolate()
  return group.reset_index(level=[1, 2]).resample('30T').asfreq().interpolate()
  return group.reset_index(level=[1, 2])

merge wind total and create own wind and soalr dfs with their respective longtitude and latitude

In [43]:
solar_total.timestamp_utc = pd.to_datetime(solar_total.timestamp_utc)
wind_total.timestamp_utc = pd.to_datetime(wind_total.timestamp_utc)
df_resampled.drop(columns=['index','ref_datetime'], inplace=True)
df_resampled_merged = pd.merge(df_resampled, solar_total, how='left', left_on='valid_datetime', right_on='timestamp_utc')
df_resampled_merged_solar = df_resampled_merged.loc[~(df_resampled_merged.latitude == 53.935) & ~(df_resampled_merged.longitude == 1.8645)]
df_resampled_merged_solar1 = df_resampled_merged_solar.groupby("valid_datetime").mean().reset_index()
distinct_lat_lon_pairs = df_resampled_merged_solar[['latitude', 'longitude']].drop_duplicates()

In [44]:
df_resampled_merged_wind = pd.merge(df_resampled, wind_total, how='inner', left_on='valid_datetime', right_on='timestamp_utc')
df_resampled_merged_wind = df_resampled_merged_wind.loc[(df_resampled_merged_wind.latitude == 53.935) & (df_resampled_merged_wind.longitude == 1.8645)]
df_resampled_merged_wind.drop_duplicates(inplace=True)

In [47]:
def set_up_wind_features(df):
    R_d = 287.05  # Specific gas constant for dry air (J/(kg·K))
    R_v = 461.5   # Specific gas constant for water vapor (J/(kg·K))
    p = 101325    # Standard atmospheric pressure in Pa
    # Calculate saturation vapor pressure (using temperature in Celsius), Tetens formula
    df['Temperature_K'] = df['Temperature'] + 273.15
    e_s = 0.61078 * np.exp((17.27 * (df['Temperature'])) / (df['Temperature'] +237.3))
    # in pa
    e_s = 1000 * e_s
    # Calculate actual vapor pressure
    e = df['RelativeHumidity'] / 100 * e_s
    df['AirDensity'] = (p - e) / (R_d * df['Temperature_K']) + (e / (R_v * df['Temperature_K']))
    # Turbine stats
    rotor_diameter = 154  # in meters
    approximated_total_efficiency = 0.348
    limiter = 0.94
    minimum_wind_speed = 3  # in m/s
    maximum_wind_speed_for_power_curve = 12.5  # in m/s
    maximum_wind_speed_for_operation = 25  # in m/s
    rotor_area = np.pi * (rotor_diameter / 2) ** 2  # in m²
    # turbine requires 3m/s to start rotating
    const_internal_friction_coefficient = 0.5 * 1.240 * np.pi * 77**2 * 3**3 * approximated_total_efficiency * 174 / 1000000
    maximum_power_per_turbine = 7 # in MW
    # Same for full
    df['WindSpeed_full_avg'] = (df['WindSpeed'] + df['WindSpeed:100']) / 2
    df['WindPower_full'] = 0.5 * df['AirDensity'] * rotor_area * df['WindSpeed:100'] ** 3 * 174 / 1000000
    df['UsableWindPower_full'] = np.minimum(df['WindPower_full'], maximum_power_per_turbine * 174 * limiter / approximated_total_efficiency)
    df['PowerOutput_full'] = np.where((df['WindSpeed:100'] >= minimum_wind_speed) & (df['WindSpeed:100'] <= maximum_wind_speed_for_operation), df['UsableWindPower_full'] * approximated_total_efficiency - const_internal_friction_coefficient, 0)

    # wind_df["Temperature_avg"] = (wind_df["Temperature"] + wind_df["Temperature:100"]) / 2
    # wind_df["RelativeHumidity_avg"] = (wind_df["RelativeHumidity"] + wind_df["RelativeHumidity:100"]) / 2
    df["Temperature_avg"] = df["Temperature"]
    df["RelativeHumidity_avg"] = df["RelativeHumidity"]   
    df["WindSpeed:100_dwd_lag1"] = df["WindSpeed:100"].shift(1)
    df["WindSpeed:100_dwd_lag2"] = df["WindSpeed:100"].shift(2)
    df["WindSpeed:100_dwd_lag3"] = df["WindSpeed:100"].shift(3)
    df["UsableWindPower_opt"] = df.UsableWindPower_full
    df["WindSpeed:100_dwd"] = df["WindSpeed:100"].shift(1)
    return df

In [48]:
df_resampled_merged_wind_2 = set_up_wind_features(df_resampled_merged_wind)
df_resampled_merged_wind_2.dropna(inplace=True)
X_wind = df_resampled_merged_wind_2[['WindSpeed:100_dwd', 'Temperature_avg', 'RelativeHumidity_avg', 'AirDensity', 'WindSpeed:100_dwd_lag1', 'WindSpeed:100_dwd_lag2', 'WindSpeed:100_dwd_lag3','UsableWindPower_opt']]
y_wind = df_resampled_merged_wind_2['generation_mw']

In [49]:
def set_up_solar_features(df):
    df["hour"] = df.valid_datetime.dt.hour
    df["day_of_year"] = df.valid_datetime.dt.dayofyear
    df["cos_day_of_year"] = np.cos(2 * np.pi * df.day_of_year / 365)
    df["cos_hour"] = np.cos(2 * np.pi * df.hour / 24)
    df["Mean_SolarDownwardRadiation"] = df.SolarDownwardRadiation
    df["Mean_Temperature"] = df.Temperature
    df["Std_Temperature"] = df_resampled_merged_solar.groupby("valid_datetime").std().reset_index().Temperature
    df["SolarDownwardRadiation_RW_Mean_30min"] = df.Mean_SolarDownwardRadiation.rolling(window=1, min_periods=1).mean()
    df["SolarDownwardRadiation_RW_Mean_1hour"] = df.Mean_SolarDownwardRadiation.rolling(window=2, min_periods=1).mean()
    df["SolarDownwardRadiation_dwd_Mean_Lag_30min"] = df.Mean_SolarDownwardRadiation.shift(1)
    df["SolarDownwardRadiation_dwd_Mean_Lag_1h"] = df.Mean_SolarDownwardRadiation.shift(2)
    df["SolarDownwardRadiation_dwd_Mean_Lag_24h"] = df.Mean_SolarDownwardRadiation.shift(48)
    for i in range(len(distinct_lat_lon_pairs)):
        lat = distinct_lat_lon_pairs.latitude.iloc[i]
        lon = distinct_lat_lon_pairs.longitude.iloc[i]
        mask = (df_resampled_merged_solar.latitude == lat) & (df_resampled_merged_solar.longitude == lon)
        df[f"Temperature_{i}"] = pd.Series(df_resampled_merged_solar.Temperature[mask].values)[:len(df)]  # Fill gaps with NaN
        df[f"SolarDownwardRadiation_{i}"] = pd.Series(df_resampled_merged_solar.SolarDownwardRadiation[mask].values)[:len(df)]  # Fill gaps with NaN
    return df
df_resampled_merged_solar2 = set_up_solar_features(df_resampled_merged_solar1)

In [50]:
def pv_temperature_efficiency(irradiance, ambient_temp, NOCT=45, wind_speed=1, eta_0=0.18, beta=0.004):
    # Calculate cell temperature using the simplified NOCT model
    Tc = ambient_temp + (NOCT - 20) * (irradiance / 800)
    
    # Calculate the efficiency loss due to increased cell temperature
    efficiency = eta_0 * (1 - beta * (Tc - 25))
    
    return Tc, efficiency

In [51]:
for i in range(20):
    temp_col = f'Temperature_{i}'
    irradiance_col = f'SolarDownwardRadiation_{i}'
    panel_temp_col = f'Panel_Temperature_Point{i}'
    panel_eff_col = f'Panel_Efficiency_Point{i}'
    df_resampled_merged_solar2[panel_temp_col], df_resampled_merged_solar2[panel_eff_col] = pv_temperature_efficiency(df_resampled_merged_solar2[irradiance_col], df_resampled_merged_solar2[temp_col])
df_resampled_merged_solar2["Panel_Temperature_dwd_mean"] = df_resampled_merged_solar2.filter(regex= r"Panel_Temperature.*").mean(axis= 1)
df_resampled_merged_solar2["Panel_Efficiency_dwd_mean"] = df_resampled_merged_solar2.filter(regex= r"Panel_Efficiency.*").mean(axis= 1)
df_resampled_merged_solar2["Panel_Temperature_dwd_std"] = df_resampled_merged_solar2.filter(regex= r"Panel_Temperature.*").std(axis= 1)
df_resampled_merged_solar2["Panel_Efficiency_dwd_std"] = df_resampled_merged_solar2.filter(regex= r"Panel_Efficiency.*").std(axis= 1)
df_resampled_merged_solar2["solar_mw_lag_48h"] = df_resampled_merged_solar2.generation_mw.shift(periods= 96)
df_resampled_merged_solar2["capacity_mwp_lag_48h"] = df_resampled_merged_solar2.capacity_mwp.shift(periods= 96)
df_resampled_merged_solar2["Target_Capacity_MWP%"] = df_resampled_merged_solar2.generation_mw / df_resampled_merged_solar2.capacity_mwp
df_resampled_merged_solar2["Target_Capacity_MWP%_lag_48h"] = df_resampled_merged_solar2["Target_Capacity_MWP%"].shift(periods= 96)


  df_resampled_merged_solar2["Target_Capacity_MWP%_lag_48h"] = df_resampled_merged_solar2["Target_Capacity_MWP%"].shift(periods= 96)


In [52]:
df_resampled_merged_solar3 = df_resampled_merged_solar2[[ 
    "Mean_SolarDownwardRadiation",
    "SolarDownwardRadiation_RW_Mean_1hour",
    "SolarDownwardRadiation_RW_Mean_30min",
    "SolarDownwardRadiation_dwd_Mean_Lag_30min",
    "SolarDownwardRadiation_dwd_Mean_Lag_1h",
    "SolarDownwardRadiation_dwd_Mean_Lag_24h",
    "Panel_Efficiency_dwd_mean",
    "Panel_Efficiency_dwd_std",
    "Panel_Temperature_dwd_mean",
    "Panel_Temperature_dwd_std",
    "Std_Temperature",
    "Mean_Temperature",
    "cos_hour",
    "cos_day_of_year",
    "solar_mw_lag_48h",
    "capacity_mwp_lag_48h",
    "Target_Capacity_MWP%_lag_48h",
    "Target_Capacity_MWP%"
    ]]
df_resampled_merged_solar3.dropna(inplace=True)
Y_solar = df_resampled_merged_solar3["Target_Capacity_MWP%"]
X_solar = df_resampled_merged_solar3.drop(columns=["Target_Capacity_MWP%"])
X_solar.capacity_mwp_lag_48h.mean()

A value is trying to be set on a copy of a slice from a DataFrame

See the caveats in the documentation: https://pandas.pydata.org/pandas-docs/stable/user_guide/indexing.html#returning-a-view-versus-a-copy
  df_resampled_merged_solar3.dropna(inplace=True)


2778.992852002693

In [53]:
def modified_pinball_loss(y_true, y_pred, quantile):
    delta = y_true - y_pred
    return np.mean(np.maximum(quantile * delta, (quantile - 1) * delta))

In [54]:
from sklearn.ensemble import HistGradientBoostingRegressor  # Dies ist nur für die Typisierung notwendig

In [55]:
import pickle
import numpy as np
import pandas as pd
from sklearn.ensemble import HistGradientBoostingRegressor
from sklearn.exceptions import NotFittedError

In [56]:
import joblib
def load_pickle1(path):
    return joblib.load(path)

In [57]:
import plotly.express as px
import plotly.graph_objects as go
from sklearn.exceptions import InconsistentVersionWarning
import warnings

# Suppress the version mismatch warning
warnings.filterwarnings("ignore", category=InconsistentVersionWarning)
def modified_pinball_loss(y_true, y_pred, quantile):
    error = y_true - y_pred
    return np.mean(np.maximum(quantile * error, (quantile - 1) * error))

path = os.path.join(base_dir, '..', 'Generation_forecast', 'Wind_forecast', 'models', 'gbr_quantile_0.')
quantiles = [1, 2, 3, 4, 5, 6, 7, 8, 9]
losses = []

for quantile in quantiles:
    with open(f"{path}{quantile}_boa_v3_res-True_calc-False.pkl", "rb") as f:
        model_wind = load_pickle1(f)
    
    if not hasattr(model_wind, '_preprocessor'):
        model_wind._preprocessor = None

    predictions_wind = model_wind.predict(X_wind)
    df_resampled_merged_wind_2[f"generation_mw_quantile_{quantile}"] = predictions_wind + df_resampled_merged_wind_2.PowerOutput_full /2
    loss = modified_pinball_loss(y_wind, df_resampled_merged_wind_2[f"generation_mw_quantile_{quantile}"], quantile/10)
    losses.append(loss)
    print(f"Quantile {quantile} loss: {loss}")
print(f"Mean loss: {np.mean(losses)}")

Quantile 1 loss: 13.782812913582198
Quantile 2 loss: 19.793336970924532
Quantile 3 loss: 25.86278654636211
Quantile 4 loss: 25.304253905695592
Quantile 5 loss: 25.93714996047382
Quantile 6 loss: 24.93072709938239
Quantile 7 loss: 22.958426369920673
Quantile 8 loss: 19.588305351504534
Quantile 9 loss: 10.308355685585807
Mean loss: 20.940683867047962


In [64]:
losses = []
for quantile in quantiles:
    with open(f"{path}{quantile}_boa_v3_res-True_calc-False.pkl", "rb") as f:
        model_wind = load_pickle1(f)
    
    if not hasattr(model_wind, '_preprocessor'):
        model_wind._preprocessor = None

    predictions_wind = model_wind.predict(X_wind)
    df_resampled_merged_wind_2[f"generation_mw_quantile_{quantile}"] = predictions_wind + df_resampled_merged_wind_2.PowerOutput_full /2
    loss = modified_pinball_loss(y_wind, df_resampled_merged_wind_2[f"generation_mw_quantile_{quantile}"], quantile/10)
    losses.append(loss)
    print(f"Quantile {quantile} loss: {loss}")
print(f"Mean loss: {np.mean(losses)}")

Quantile 1 loss: 13.782812913582198
Quantile 2 loss: 19.793336970924532
Quantile 3 loss: 25.86278654636211
Quantile 4 loss: 25.304253905695592
Quantile 5 loss: 25.93714996047382
Quantile 6 loss: 24.93072709938239
Quantile 7 loss: 22.958426369920673
Quantile 8 loss: 19.588305351504534
Quantile 9 loss: 10.308355685585807
Mean loss: 20.940683867047962


In [21]:
df_resampled_merged_wind_2

Unnamed: 0,valid_datetime,latitude,longitude,Temperature,WindSpeed,WindSpeed:100,WindDirection:100,CloudCover,RelativeHumidity,PressureReducedMSL,...,WindSpeed:100_dwd,generation_mw_quantile_1,generation_mw_quantile_2,generation_mw_quantile_3,generation_mw_quantile_4,generation_mw_quantile_5,generation_mw_quantile_6,generation_mw_quantile_7,generation_mw_quantile_8,generation_mw_quantile_9
83,2024-10-07 01:30:00+00:00,53.935,1.8645,14.865694,9.685278,12.327083,184.986806,1.000000,94.389861,99286.175278,...,13.151389,302.619719,405.116629,435.168681,465.508207,488.039479,502.353209,513.394406,535.641854,553.694477
104,2024-10-07 02:00:00+00:00,53.935,1.8645,14.878056,9.134167,11.502778,204.012222,1.000000,93.634444,99241.963056,...,12.327083,197.497526,331.716790,379.531646,366.723707,436.092742,442.173692,520.529883,569.130119,614.825115
125,2024-10-07 02:30:00+00:00,53.935,1.8645,14.742222,9.188611,11.431667,219.084722,0.993472,93.060556,99267.524028,...,11.502778,188.340166,322.367433,370.182289,409.862050,444.557040,480.512643,519.402252,568.666988,620.509447
146,2024-10-07 03:00:00+00:00,53.935,1.8645,14.606389,9.243056,11.360556,234.157222,0.986944,92.486667,99293.085000,...,11.431667,195.047115,313.126317,361.262535,400.120119,451.173769,490.688554,514.186322,559.798814,620.043748
167,2024-10-07 03:30:00+00:00,53.935,1.8645,14.319583,9.863611,11.946944,241.937917,0.960833,91.888333,99312.772917,...,11.360556,300.558348,381.195685,429.486161,449.795440,479.407933,493.295487,501.304738,519.772286,549.001983
...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...
14132,2024-10-20 19:30:00+00:00,53.935,1.8645,15.198333,14.084410,19.197612,213.740694,0.858056,80.060139,100574.530000,...,19.067271,420.257372,470.836566,481.290215,495.678525,512.124149,523.959360,533.761851,539.430085,556.226542
14153,2024-10-20 20:00:00+00:00,53.935,1.8645,15.125556,14.259988,19.327952,213.492778,0.902778,79.446944,100598.362778,...,19.197612,420.257372,470.836566,481.290215,495.678525,512.124149,523.959360,533.616836,539.430085,556.226542
14194,2024-10-20 20:30:00+00:00,53.935,1.8645,14.907500,14.772636,19.834505,212.610417,0.884722,79.988611,100619.087361,...,19.327952,420.257372,470.836566,481.290215,495.678525,512.124149,523.959360,533.761851,539.430085,556.226542
14216,2024-10-20 21:00:00+00:00,53.935,1.8645,14.689444,15.285284,20.341057,211.728056,0.866667,80.530278,100639.811944,...,19.834505,420.257372,470.836566,481.290215,495.678525,512.124149,523.959360,533.761851,540.716714,556.557235


In [18]:
import plotly.graph_objects as go

# Create an empty figure
fig = go.Figure()

# Plot actual values
fig.add_trace(go.Scatter(
    x=df_resampled_merged_wind_2.index,  # Assuming the index is time or some sequence
    y=y_wind,  # Actual values
    mode='lines',
    customdata=df_resampled_merged_wind_2.WindSpeed:100	,
    hovertemplate='Wind Speed: %{customdata} m/s<br>Generation: %{y} MW<br>%{x}<extra></extra>',
    name='Actual Generation',
    line=dict(color='black', width=2)  # Style for actual values
))

# Plot predicted values for each quantile
for quantile in quantiles:
    fig.add_trace(go.Scatter(
        x=df_resampled_merged_wind_2.index,  # Assuming same x-axis for predictions
        y=df_resampled_merged_wind_2[f'generation_mw_quantile_{quantile}'],  # Predictions for each quantile
        mode='lines',
        name=f'Quantile {quantile} Prediction',
        line=dict(width=1.5)  # Style for predicted values
    ))

# Update layout for better visualization
fig.update_layout(
    title='Actual vs Predicted Wind Power Generation',
    xaxis_title='Time',
    yaxis_title='Power Generation (MW)',
    legend_title='Legend',
    hovermode='x',
    template='plotly_white'
)

# Show the plot
fig.show()


In [19]:
path = "D:/Users/paulh/Desktop/Domäneprojekt2/Energy_production_price_prediction/Generation_forecast/Solar_forecast/models/lgbr_model/models/i5_models/lgbr_q"
quantiles = [1, 2, 3, 4, 5, 6, 7, 8, 9]
losses = []

# Berechnung der Verluste für jedes Quantil
for i in quantiles:
    model_light = pickle.load(open(path + str(i) + ".pkl", 'rb'))
    predictions = model_light.predict(X_solar)
    loss = modified_pinball_loss(Y_solar.values, predictions, i/10)
    losses.append(loss)
    print(f"Quantile {i/10}: {loss*2779.3337282577586}")

Quantile 0.1: 4.74462303246347
Quantile 0.2: 6.760044310893633
Quantile 0.3: 8.786987678681788
Quantile 0.4: 9.07612388168284
Quantile 0.5: 9.330828299491756
Quantile 0.6: 9.5931075994488
Quantile 0.7: 9.433897843574556
Quantile 0.8: 9.25513395725032
Quantile 0.9: 4.881881376497694


In [46]:
import plotly.express as px
# Erstellen eines DataFrames für Plotly
df = pd.DataFrame({
    'Quantile': [i/10 for i in quantiles],
    'Loss': losses
})

# Plotten der Verluste mit Plotly
fig = px.line(df, x='Quantile', y='Loss', markers=True, title='Modified Pinball Loss for Different Quantiles')
fig.update_layout(
    xaxis_title='Quantile',
    yaxis_title='Modified Pinball Loss',
    template='plotly_white'
)
fig.show()

In [47]:
import pickle
import plotly.express as px
import pandas as pd

# Pfad zu den Modellen
path = "D:/Users/paulh/Desktop/Domäneprojekt2/Energy_production_price_prediction/Generation_forecast/Solar_forecast/models/lgbr_model/models/i5_models/lgbr_q"
quantiles = [1, 2, 3, 4, 5, 6, 7, 8, 9]
predictions_dict = {}

# Berechnung der Vorhersagen für jedes Quantil
for i in quantiles:
    model_light = pickle.load(open(path + str(i) + ".pkl", 'rb'))
    predictions = model_light.predict(X_solar)
    predictions_dict[f'Quantile {i/10}'] = predictions

# Erstellen eines DataFrames für Plotly
df = pd.DataFrame(predictions_dict)
df['Actual'] = Y_solar.values

# Plotten der Vorhersagen und der tatsächlichen Werte mit Plotly
fig = px.line(df, title='Predictions and Actual Values for Different Quantiles')
for quantile in predictions_dict.keys():
    fig.add_scatter(x=df.index, y=df[quantile], mode='lines', name=quantile)
fig.add_scatter(x=df.index, y=df['Actual'], mode='lines', name='Actual', line=dict(color='black', width=2))
fig.update_layout(
    xaxis_title='Index',
    yaxis_title='Value',
    template='plotly_white'
)
fig.show()

### create training data

In [63]:
df_solar_history = pd.read_csv('D:/Users/paulh/Desktop/Domäneprojekt2/Energy_production_price_prediction/paul_analyse/train.csv')
df_wind_history = pd.read_csv('D:/Users/paulh/Desktop/Domäneprojekt2/Energy_production_price_prediction/HEFTcom24/data/wind2.csv')
df_wind_history.valid_time = pd.to_datetime(df_wind_history.valid_time)
df_day_ahead = pd.read_csv('D:/Users/paulh/Desktop/Domäneprojekt2/Energy_production_price_prediction/basic_files/day_ahead_price.csv')
df_imbalance = pd.read_csv('D:/Users/paulh/Desktop/Domäneprojekt2/Energy_production_price_prediction/basic_files/imbalance_price.csv')
market_index = pd.read_csv('D:/Users/paulh/Desktop/Domäneprojekt2/Energy_production_price_prediction/basic_files/market_index.csv')

In [57]:
df_together = pd.merge(df_solar_history, df_wind_history, how='left', left_on='timestamp_utc', right_on='valid_time')
df_together["Target_MW"] = df_together["Wind_MWh_credit"] + df_together["Solar_MWh_credit"]
df_together.timestamp_utc = pd.to_datetime(df_together.timestamp_utc)
df_together.columns

Index(['timestamp_utc', 'Mean_SolarRadiation_dwd',
       'SolarDownwardRadiation_RW_dwd_Mean_30min',
       'SolarDownwardRadiation_RW_dwd_Mean_1h',
       'SolarDownwardRadiation_dwd_Mean_Lag_30min',
       'SolarDownwardRadiation_dwd_Mean_Lag_1h',
       'SolarDownwardRadiation_dwd_Mean_Lag_24h', 'Panel_Efficiency_dwd_mean',
       'Panel_Efficiency_dwd_std', 'Panel_Temperature_dwd_mean',
       'Panel_Temperature_dwd_std', 'Std_Temperature_dwd',
       'Mean_Temperature_dwd', 'cos_hour', 'cos_day', 'solar_mw_lag_48h',
       'capacity_mwp_lag_48h', 'Target_Capacity_MWP%_lag_48h',
       'Target_Capacity_MWP%', 'Solar_MWh_credit', 'reference_time',
       'valid_time', 'RelativeHumidity_dwd', 'Temperature_dwd',
       'WindDirection_dwd', 'WindDirection:100_dwd', 'WindSpeed^3_dwd',
       'WindSpeed:100^3_dwd', 'WindSpeed_dwd', 'WindSpeed:100_dwd',
       'WindSpeed^3:100_dwd', 'RelativeHumidity_ncep', 'Temperature_ncep',
       'WindDirection_ncep', 'WindDirection:100_ncep', 'WindS

In [64]:
full_time_index = pd.date_range(start=df_wind_history['valid_time'].min(), end=df_wind_history['valid_time'].max(), freq='30T')
missing_intervals = full_time_index.difference(df_wind_history['valid_time'])
print("Fehlende Zeitintervalle:", missing_intervals)

Fehlende Zeitintervalle: DatetimeIndex(['2020-11-05 17:30:00+00:00', '2020-11-05 18:00:00+00:00',
               '2020-11-05 18:30:00+00:00', '2020-11-05 19:00:00+00:00',
               '2020-11-05 19:30:00+00:00', '2020-11-05 20:00:00+00:00',
               '2020-11-05 20:30:00+00:00', '2020-11-05 21:00:00+00:00',
               '2020-11-05 21:30:00+00:00', '2020-11-05 22:00:00+00:00',
               ...
               '2023-11-13 06:30:00+00:00', '2023-11-13 07:00:00+00:00',
               '2023-11-13 07:30:00+00:00', '2023-11-13 08:00:00+00:00',
               '2024-01-10 04:00:00+00:00', '2024-01-10 04:30:00+00:00',
               '2024-01-13 13:00:00+00:00', '2024-01-14 06:30:00+00:00',
               '2024-01-14 16:30:00+00:00', '2024-04-13 07:00:00+00:00'],
              dtype='datetime64[ns, UTC]', length=603, freq=None)


In [None]:
df['timestamp_utc'] = pd.to_datetime(df['timestamp_utc'])


In [30]:
df_together_solar = df_together[[ 
    "Mean_SolarRadiation_dwd",
    "SolarDownwardRadiation_RW_dwd_Mean_1h",
    "SolarDownwardRadiation_RW_dwd_Mean_30min",
    "SolarDownwardRadiation_dwd_Mean_Lag_30min",
    "SolarDownwardRadiation_dwd_Mean_Lag_1h",
    "SolarDownwardRadiation_dwd_Mean_Lag_24h",
    "Panel_Efficiency_dwd_mean",
    "Panel_Efficiency_dwd_std",
    "Panel_Temperature_dwd_mean",
    "Panel_Temperature_dwd_std",
    "Std_Temperature_dwd",
    "Mean_Temperature_dwd",
    "cos_hour",
    "cos_day",
    "solar_mw_lag_48h",
    "capacity_mwp_lag_48h",
    "Target_Capacity_MWP%_lag_48h",
    ]]
df_together_solar.dropna(inplace=True)
mean_to_multiply = df_together_solar["capacity_mwp_lag_48h"].mean()

path = "D:/Users/paulh/Desktop/Domäneprojekt2/Energy_production_price_prediction/Generation_forecast/Solar_forecast/models/lgbr_model/models/i5_models/lgbr_q"
quantiles = [1, 2, 3, 4, 5, 6, 7, 8, 9]
losses = []

# Berechnung der Verluste für jedes Quantil
for i in quantiles:
    model_light = pickle.load(open(path + str(i) + ".pkl", 'rb'))
    predictions = model_light.predict(df_together_solar)
    predictions = predictions * mean_to_multiply
    df_together[f"{i}"] = predictions



A value is trying to be set on a copy of a slice from a DataFrame

See the caveats in the documentation: https://pandas.pydata.org/pandas-docs/stable/user_guide/indexing.html#returning-a-view-versus-a-copy



In [31]:
df_together.isna().sum()

timestamp_utc                                0
Mean_SolarRadiation_dwd                      0
SolarDownwardRadiation_RW_dwd_Mean_30min     0
SolarDownwardRadiation_RW_dwd_Mean_1h        0
SolarDownwardRadiation_dwd_Mean_Lag_30min    0
SolarDownwardRadiation_dwd_Mean_Lag_1h       0
SolarDownwardRadiation_dwd_Mean_Lag_24h      0
Panel_Efficiency_dwd_mean                    0
Panel_Efficiency_dwd_std                     0
Panel_Temperature_dwd_mean                   0
Panel_Temperature_dwd_std                    0
Std_Temperature_dwd                          0
Mean_Temperature_dwd                         0
cos_hour                                     0
cos_day                                      0
solar_mw_lag_48h                             0
capacity_mwp_lag_48h                         0
Target_Capacity_MWP%_lag_48h                 0
Target_Capacity_MWP%                         0
Solar_MWh_credit                             0
reference_time                               0
valid_time   

In [33]:
df_together["Temperature"] = df_together["Temperature_dwd"]
df_together["RelativeHumidity"] = df_together["RelativeHumidity_dwd"]
df_together["WindSpeed"] = df_together["WindSpeed_dwd"]
df_together["WindSpeed:100"] = df_together["WindSpeed:100_dwd"]

In [34]:
df_together_wind = set_up_wind_features(df_together)

In [37]:
df_together_wind1 = df_together_wind[[
    'WindSpeed:100_dwd', 'Temperature_avg', 'RelativeHumidity_avg', 'AirDensity', 'WindSpeed:100_dwd_lag1', 'WindSpeed:100_dwd_lag2', 'WindSpeed:100_dwd_lag3','UsableWindPower_opt'
]]
df_together_wind1.dropna(inplace=True)
value_to_add = df_together_wind["PowerOutput_full"] / 2



A value is trying to be set on a copy of a slice from a DataFrame

See the caveats in the documentation: https://pandas.pydata.org/pandas-docs/stable/user_guide/indexing.html#returning-a-view-versus-a-copy



In [38]:
path = 'D:/Users/paulh/Desktop/Domäneprojekt2/Energy_production_price_prediction/Generation_forecast/Wind_forecast/models/gbr_quantile_0.'
quantiles = [1, 2, 3, 4, 5, 6, 7, 8, 9]
losses = []

for quantile in quantiles:
    with open(f"{path}{quantile}_res-True_calc-False.pkl", "rb") as f:
        model_wind = load_pickle1(f)
    
    if not hasattr(model_wind, '_preprocessor'):
        model_wind._preprocessor = None

    predictions_wind = model_wind.predict(df_together_wind1)
    predictions_wind = predictions_wind + value_to_add[3:]
    df_together[f"{quantile}"] = df_together[f"{quantile}"][4:] + predictions_wind

In [39]:
df_together

Unnamed: 0,timestamp_utc,Mean_SolarRadiation_dwd,SolarDownwardRadiation_RW_dwd_Mean_30min,SolarDownwardRadiation_RW_dwd_Mean_1h,SolarDownwardRadiation_dwd_Mean_Lag_30min,SolarDownwardRadiation_dwd_Mean_Lag_1h,SolarDownwardRadiation_dwd_Mean_Lag_24h,Panel_Efficiency_dwd_mean,Panel_Efficiency_dwd_std,Panel_Temperature_dwd_mean,...,WindSpeed_full_avg,WindPower_full,UsableWindPower_full,PowerOutput_full,Temperature_avg,RelativeHumidity_avg,WindSpeed:100_dwd_lag1,WindSpeed:100_dwd_lag2,WindSpeed:100_dwd_lag3,UsableWindPower_opt
0,2020-09-22 00:00:00+00:00,0.000000,0.000000,0.000000,0.000000,0.000000,0.000000,0.188614,0.000551,13.036279,...,5.245064,509.390097,509.390097,158.387177,16.023140,85.735146,,,,509.390097
1,2020-09-22 00:30:00+00:00,0.000000,0.000000,0.000000,0.000000,0.000000,0.000000,0.188661,0.000563,12.971021,...,5.121709,478.569212,478.569212,147.661509,16.041790,85.351560,6.374486,,,478.569212
2,2020-09-22 01:00:00+00:00,0.000000,0.000000,0.000000,0.000000,0.000000,0.000000,0.188708,0.000577,12.905762,...,4.998354,449.018088,449.018088,137.377717,16.060438,84.967970,6.243333,6.374486,,449.018088
3,2020-09-22 01:30:00+00:00,0.000000,0.000000,0.000000,0.000000,0.000000,0.000000,0.188702,0.000595,12.914154,...,4.868452,406.265595,406.265595,122.499850,16.015215,83.303420,6.112179,6.243333,6.374486,406.265595
4,2020-09-22 02:00:00+00:00,0.000000,0.000000,0.000000,0.000000,0.000000,0.000000,0.188696,0.000616,12.922546,...,4.738551,366.309231,366.309231,108.595035,15.969991,81.638880,5.911122,6.112179,6.243333,366.309231
...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...
50026,2023-08-25 23:30:00+00:00,0.009818,0.009818,0.004048,-0.001721,-0.003729,0.004615,0.187994,0.000419,13.896639,...,6.096540,517.393352,517.393352,161.172309,16.262043,80.340330,7.193197,7.253932,7.314668,517.393352
50027,2023-08-26 00:00:00+00:00,0.021356,0.021356,0.015587,0.009818,-0.001721,0.017088,0.188246,0.000398,13.547243,...,5.425915,352.220570,352.220570,103.692181,15.913723,84.427540,6.408854,7.193197,7.253932,352.220570
50028,2023-08-26 00:30:00+00:00,0.012875,0.012875,0.017116,0.021356,0.009818,-0.000814,0.188469,0.000386,13.237374,...,5.305044,327.673709,327.673709,95.149873,15.783813,84.867800,5.635854,6.408854,7.193197,327.673709
50029,2023-08-26 01:00:00+00:00,0.004395,0.004395,0.008635,0.012875,0.021356,-0.018716,0.188692,0.000381,12.927505,...,5.184173,304.287730,304.287730,87.011553,15.653904,85.308050,5.500908,5.635854,6.408854,304.287730


In [40]:
df_together.dropna(inplace=True)

In [41]:
df_together.timestamp_utc = pd.to_datetime(df_together.timestamp_utc)
df_day_ahead.timestamp_utc = pd.to_datetime(df_day_ahead.timestamp_utc)
df_imbalance.timestamp_utc = pd.to_datetime(df_imbalance.timestamp_utc)
market_index.timestamp_utc = pd.to_datetime(market_index.timestamp_utc)

In [42]:
df_day_ahead.dtypes

timestamp_utc        datetime64[ns, UTC]
settlement_date                   object
settlement_period                  int64
price                            float64
dtype: object

In [43]:
df_together1 = pd.merge(df_together, df_day_ahead, how='inner', left_on='timestamp_utc', right_on='timestamp_utc')
df_together2 = pd.merge(df_together1, df_imbalance, how='inner', left_on='timestamp_utc', right_on='timestamp_utc')
df_together3 = pd.merge(df_together2, market_index, how='inner', left_on='timestamp_utc', right_on='timestamp_utc')
df_together3

Unnamed: 0,timestamp_utc,Mean_SolarRadiation_dwd,SolarDownwardRadiation_RW_dwd_Mean_30min,SolarDownwardRadiation_RW_dwd_Mean_1h,SolarDownwardRadiation_dwd_Mean_Lag_30min,SolarDownwardRadiation_dwd_Mean_Lag_1h,SolarDownwardRadiation_dwd_Mean_Lag_24h,Panel_Efficiency_dwd_mean,Panel_Efficiency_dwd_std,Panel_Temperature_dwd_mean,...,settlement_period_x,price_x,settlement_date_y,settlement_period_y,imbalance_price,settlement_date,settlement_period,data_provider,price_y,volume
0,2021-02-19 23:00:00+00:00,0.000000,0.000000,0.000000,0.000000,0.000000,0.000000,0.191314,0.000215,9.285992,...,47,32.38,2021-02-19,47,32.75000,2021-02-19,47,APXMIDP,26.46,671.75
1,2021-02-19 23:30:00+00:00,0.000000,0.000000,0.000000,0.000000,0.000000,0.000000,0.191230,0.000199,9.402743,...,48,32.38,2021-02-19,48,62.50000,2021-02-19,48,APXMIDP,29.81,905.45
2,2021-02-21 00:00:00+00:00,0.000000,0.000000,0.000000,0.000000,0.000000,0.000000,0.190715,0.000309,10.118579,...,1,17.34,2021-02-21,1,11.79015,2021-02-21,1,APXMIDP,7.67,781.05
3,2021-02-21 00:30:00+00:00,0.000000,0.000000,0.000000,0.000000,0.000000,0.000000,0.190812,0.000289,9.983511,...,2,17.34,2021-02-21,2,12.29000,2021-02-21,2,APXMIDP,9.82,694.45
4,2021-02-21 01:00:00+00:00,0.000000,0.000000,0.000000,0.000000,0.000000,0.000000,0.190909,0.000278,9.848444,...,3,14.25,2021-02-21,3,16.88000,2021-02-21,3,APXMIDP,10.40,847.35
...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...
42280,2023-08-25 23:30:00+00:00,0.009818,0.009818,0.004048,-0.001721,-0.003729,0.004615,0.187994,0.000419,13.896639,...,2,84.41,2023-08-26,2,66.01000,2023-08-26,2,APXMIDP,83.91,1201.85
42281,2023-08-26 00:00:00+00:00,0.021356,0.021356,0.015587,0.009818,-0.001721,0.017088,0.188246,0.000398,13.547243,...,3,79.96,2023-08-26,3,66.01000,2023-08-26,3,APXMIDP,82.13,1066.95
42282,2023-08-26 00:30:00+00:00,0.012875,0.012875,0.017116,0.021356,0.009818,-0.000814,0.188469,0.000386,13.237374,...,4,79.96,2023-08-26,4,66.01000,2023-08-26,4,APXMIDP,78.76,997.60
42283,2023-08-26 01:00:00+00:00,0.004395,0.004395,0.008635,0.012875,0.021356,-0.018716,0.188692,0.000381,12.927505,...,5,73.84,2023-08-26,5,66.01000,2023-08-26,5,APXMIDP,77.47,913.25


In [44]:
df_together3.to_csv("bidding_training.csv", index=False)