In [None]:
import pandas as pd
import warnings
warnings.filterwarnings("ignore")
import matplotlib.pyplot as plt
import seaborn as sns
import numpy as np
from sklearn.ensemble import RandomForestRegressor, GradientBoostingRegressor
import xgboost as xgb
from sklearn.metrics import mean_squared_error, mean_absolute_error, root_mean_squared_error
import pickle

In [None]:
df_test = pd.read_csv("../Daten/test_data.csv")

In [None]:
df_test['Date and time'] = pd.to_datetime(df_test['Date and time'], format='mixed', dayfirst=True, errors='coerce')
df_test.set_index('Date and time', inplace=True)

## SARIMAX

In [None]:
with open('sarimax_model.pkl', 'rb') as file:
    sarimax = pickle.load(file)

In [None]:
sarimax_forecast = sarimax.get_forecast(steps=len(df_test), exog=df_test["Wind speed (m/s)"])

In [None]:
sarimax_forecast.predicted_mean = sarimax_forecast.predicted_mean.clip(upper=2050)

In [None]:
from sklearn.metrics import mean_absolute_error
import numpy as np

actual_values = df_test["Power (kW)"].iloc[:-2016].values
predicted_values_sar = sarimax_forecast.predicted_mean[:-2016].values

rmse = np.sqrt(np.mean((actual_values - predicted_values_sar) ** 2))

mad = np.median(np.abs(actual_values - predicted_values_sar))

smape = 100 * np.mean(
    np.abs(actual_values - predicted_values_sar) / ((np.abs(actual_values) + np.abs(predicted_values_sar)) / 2)
)

me = np.mean(actual_values - predicted_values_sar)

print("RMSE:", rmse)
print("MAD:", mad)
print("sMAPE:", smape, "%")
print("ME:", me)

In [None]:
if actual_values.mean() == 0:
    print("Warnung: Der Mittelwert der tatsächlichen Werte ist 0!")

In [None]:
horizon = 432
start = 288

plt.figure(figsize=(10, 6))
plt.title("SARIMAX Nowcast")
plt.plot(df_test.index[start:horizon], df_test["Power (kW)"].iloc[start:horizon], label="Tatsächliche Werte")
plt.plot(df_test.index[start:horizon], predicted_values_sar[start:horizon], label="Nowcasts")
plt.ylabel("Power (kW)")
plt.xlabel("Datum")
plt.legend()
plt.show()

## GAM

In [None]:
with open('gam_model.pkl', 'rb') as file:
    gam = pickle.load(file)

In [None]:
gam_forecast = gam.predict(df_test["Wind speed (m/s)"])

In [None]:
from sklearn.metrics import mean_absolute_error, r2_score
import numpy as np

actual_values = df_test["Power (kW)"].iloc[:-2016]
predicted_values_gam = gam_forecast[:-2016]

rmse = np.sqrt(np.mean((actual_values - predicted_values_gam) ** 2))

mad = np.median(np.abs(actual_values - predicted_values_gam))

smape = 100 * np.mean(
    np.abs(actual_values - predicted_values_gam) / ((np.abs(actual_values) + np.abs(predicted_values_gam)) / 2)
)
me = np.mean(actual_values - predicted_values_gam)

print("RMSE:", rmse)
print("MAD:", mad)
print("sMAPE:", smape, "%")
print("ME:", me)

In [None]:
horizon = 432
start = 288

plt.figure(figsize=(10, 6))
plt.title("GAM Nowcast")
plt.plot(df_test.index[start:horizon], df_test["Power (kW)"].iloc[start:horizon], label="Tatsächliche Werte (Validierung)")
plt.plot(df_test.index[start:horizon], predicted_values_gam[start:horizon], label="Vorhersagen")
plt.ylabel("Power (kW)")
plt.xlabel("Datum")
plt.legend()
plt.show()

## Random Forest

In [None]:
with open('rf_model.pkl', 'rb') as file:
    rf = pickle.load(file)

In [None]:
rf_forecast = rf.predict(df_test[["Density adjusted wind speed (m/s)"]])

In [None]:
from sklearn.metrics import mean_absolute_error, r2_score
import numpy as np

actual_values = df_test["Power (kW)"].iloc[:-2016]
predicted_values_rf = rf_forecast[:-2016]

rmse = np.sqrt(np.mean((actual_values - predicted_values_rf) ** 2))

mad = np.median(np.abs(actual_values - predicted_values_rf))

smape = 100 * np.mean(
    np.abs(actual_values - predicted_values_rf) / ((np.abs(actual_values) + np.abs(predicted_values_rf)) / 2)
)

me = np.mean(actual_values - predicted_values_rf)

print("RMSE:", rmse)
print("MAD:", mad)
print("sMAPE:", smape, "%")
print("ME:", me)

In [None]:
horizon = 432
start = 288

plt.figure(figsize=(10, 6))
plt.title("Random Forest Nowcast")
plt.plot(df_test.index[start:horizon], df_test["Power (kW)"].iloc[start:horizon], label="Tatsächliche Werte (Validierung)")
plt.plot(df_test.index[start:horizon], predicted_values_rf[start:horizon], label="Vorhersagen")
plt.ylabel("Power (kW)")
plt.xlabel("Datum")
plt.legend()
plt.show()

## Gradient Boosting

In [None]:
with open('gb_model.pkl', 'rb') as file:
    gb = pickle.load(file)

In [None]:
gb_forecast = gb.predict(df_test[["Density adjusted wind speed (m/s)"]])

In [None]:
from sklearn.metrics import mean_absolute_error, r2_score
import numpy as np

actual_values = df_test["Power (kW)"].iloc[:-2016]
predicted_values_gb = gb_forecast[:-2016]

rmse = np.sqrt(np.mean((actual_values - predicted_values_gb) ** 2))

mad = np.median(np.abs(actual_values - predicted_values_gb))

smape = 100 * np.mean(
    np.abs(actual_values - predicted_values_gb) / ((np.abs(actual_values) + np.abs(predicted_values_gb)) / 2)
)

me = np.mean(actual_values - predicted_values_gb)

print("RMSE:", rmse)
print("MAD:", mad)
print("sMAPE:", smape, "%")
print("ME:", me)

In [None]:
horizon = 432
start = 288

plt.figure(figsize=(10, 6))
plt.title("Gradient Boosting Nowcast")
plt.plot(df_test.index[start:horizon], df_test["Power (kW)"].iloc[start:horizon], label="Tatsächliche Werte (Validierung)")
plt.plot(df_test.index[start:horizon], predicted_values_gb[start:horizon], label="Vorhersagen")
plt.ylabel("Power (kW)")
plt.xlabel("Datum")
plt.legend()
plt.show()

## XGBoost

In [None]:
with open('xgb_model.pkl', 'rb') as file:
    xgb = pickle.load(file)

In [None]:
xgb_forecast = xgb.predict(df_test[["Density adjusted wind speed (m/s)"]])

In [None]:
from sklearn.metrics import mean_absolute_error, r2_score
import numpy as np

actual_values = df_test["Power (kW)"].iloc[:-2016]
predicted_values_xgb = xgb_forecast[:-2016]

rmse = np.sqrt(np.mean((actual_values - predicted_values_xgb) ** 2))

mad = np.median(np.abs(actual_values - predicted_values_xgb))

smape = 100 * np.mean(
    np.abs(actual_values - predicted_values_xgb) / ((np.abs(actual_values) + np.abs(predicted_values_xgb)) / 2)
)

me = np.mean(actual_values - predicted_values_xgb)

print("RMSE:", rmse)
print("MAD:", mad)
print("sMAPE:", smape, "%")
print("ME:", me)

In [None]:
horizon = 432
start = 288

plt.figure(figsize=(10, 6))
plt.title("XGBoost Nowcast")
plt.plot(df_test.index[start:horizon], df_test["Power (kW)"].iloc[start:horizon], label="Tatsächliche Werte (Validierung)")
plt.plot(df_test.index[start:horizon], predicted_values_xgb[start:horizon], label="Vorhersagen")
plt.ylabel("Power (kW)")
plt.xlabel("Datum")
plt.legend()
plt.show()

## S-Kurve

In [None]:

P_max_fixed = 2050 

def s_curve(v, k, v_0):
    return P_max_fixed / (1 + np.exp(-k * (v - v_0)))

with open("s_curve_model.pkl", "rb") as file:
    s_curve, params = pickle.load(file)

In [None]:
new_wind_speeds = df_test["Density adjusted wind speed (m/s)"]

predicted_power_s = s_curve(new_wind_speeds, *params)

for wind, power in zip(new_wind_speeds, predicted_power_s):
    print(f"Windgeschwindigkeit: {wind:.1f} m/s -> Vorhergesagte Leistung: {power:.2f} kW")

In [None]:
from sklearn.metrics import mean_absolute_error, r2_score
import numpy as np

actual_values = df_test["Power (kW)"].iloc[:-2016]
predicted_values_s = predicted_power_s[:-2016]

rmse = np.sqrt(np.mean((actual_values - predicted_values_s) ** 2))

mad = np.median(np.abs(actual_values - predicted_values_s))

smape = 100 * np.mean(
    np.abs(actual_values - predicted_values_s) / ((np.abs(actual_values) + np.abs(predicted_values_s)) / 2)
)
me = np.mean(actual_values - predicted_values_s)

print("RMSE:", rmse)
print("MAD:", mad)
print("sMAPE:", smape, "%")
print("ME:", me)

In [None]:
actual_values.min()

In [None]:
predicted_power_s.min()

In [None]:
horizon = 144
start = 0

plt.figure(figsize=(10, 6))
plt.title("S-Kurve Nowcast")
plt.plot(df_test.index[start:horizon], df_test["Power (kW)"].iloc[start:horizon], label="Tatsächliche Werte (Validierung)")
plt.plot(df_test.index[start:horizon], predicted_values_s[start:horizon], label="Vorhersagen")
plt.ylabel("Power (kW)")
plt.xlabel("Datum")
plt.legend()
plt.show()

In [None]:
horizon = 432
start = 288

plt.figure(figsize=(12, 6))
plt.title("Nowcasts aller Modelle im Vergleich")
plt.plot(df_test.index[start:horizon], df_test["Power (kW)"].iloc[start:horizon], label="Tatsächliche Werte (Testdaten)")
plt.plot(df_test.index[start:horizon], predicted_values_s[start:horizon], label="S-Kurve")
plt.plot(df_test.index[start:horizon], predicted_values_sar[start:horizon], label="SARIMAX")
plt.plot(df_test.index[start:horizon], predicted_values_gam[start:horizon], label="GAM")
plt.plot(df_test.index[start:horizon], predicted_values_rf[start:horizon], label="Random Forest")
plt.plot(df_test.index[start:horizon], predicted_values_gb[start:horizon], label="Gradient Boosting")
plt.plot(df_test.index[start:horizon], predicted_values_xgb[start:horizon], label="XGBoost")
plt.ylabel("Leisung (kW)")
plt.xlabel("Datum")
plt.rcParams.update({'font.size': 14})
plt.legend()
plt.show()

In [None]:
import matplotlib.pyplot as plt
import pandas as pd

horizon = -10000
start = 0

residuals = {
    "S-Kurve": actual_values[start:horizon] - predicted_values_s[start:horizon],
    "SARIMAX": actual_values[start:horizon] - predicted_values_sar[start:horizon],
    "GAM": actual_values[start:horizon] - predicted_values_gam[start:horizon],
    "Random Forest": actual_values[start:horizon] - predicted_values_rf[start:horizon],
    "Gradient Boosting": actual_values[start:horizon] - predicted_values_gb[start:horizon],
    "XGBoost": actual_values[start:horizon] - predicted_values_xgb[start:horizon],
}

residuals_df = pd.DataFrame(residuals)

plt.figure(figsize=(12, 6))
plt.title("Abweichungen der Vorhersagen der Modelle (Boxplot)")
residuals_df.boxplot(notch=True)
plt.ylabel("Abweichungen (kW)")
plt.xlabel("Modelle")
plt.xticks(rotation=45)
plt.rcParams.update({'font.size': 15})
plt.grid(False)
plt.show()