In [41]:
import pandas as pd
import numpy as np
from xgboost import XGBRegressor
from sklearn.metrics import mean_absolute_error, mean_squared_error
import matplotlib.pyplot as plt

In [44]:
df = pd.read_csv("data/data_wind.csv")

# Parse time and set index
df["Time"] = pd.to_datetime(df["Time"])
df = df.set_index("Time")


In [45]:
lags = [0, 1, 2, 3, 4]  # 5–60 minutes history

for lag in lags:
    df[f"wp_lag_{lag}"] = df["Wind_production"].shift(lag)
    df[f"ws_lag_{lag}"] = df["Wind_speed"].shift(lag)

# Rolling statistics
df["wp_roll_mean_6"] = df["Wind_production"].shift(1).rolling(6).mean()
df["ws_roll_mean_6"] = df["Wind_speed"].shift(1).rolling(6).mean()

# Time features
df["hour"] = df.index.hour
df["minute"] = df.index.minute

In [46]:
import joblib
from sklearn.preprocessing import MinMaxScaler

# 1. Clean data
df = df.dropna()

# 2. Drop specific solar-related columns
# GHI (Global Horizontal Irradiance), DNI (Direct Normal), DHI (Diffuse Horizontal)
df = df.drop(columns=['GHI', 'DNI', 'DHI'])

# ================================
# 3. Apply MinMax Scaler
# ================================
scale_cols = ['Wind_speed', 'Humidity', 'Temperature']
scaler = MinMaxScaler()

# Scale only the requested columns
df[scale_cols] = scaler.fit_transform(df[scale_cols])

# Save the scaler
joblib.dump(scaler, 'windscaler5.pkl')

# ================================
# 4. Define X and y (5-min ahead)
# ================================
X = df.drop("Wind_production", axis=1)
y = df["Wind_production"]

In [47]:
train_size = int(len(df) * 0.8)

X_train = X.iloc[:train_size]
y_train = y.iloc[:train_size]

X_test = X.iloc[train_size:]
y_test = y.iloc[train_size:]


In [48]:
model = XGBRegressor(
    n_estimators=400,
    learning_rate=0.05,
    max_depth=5,
    subsample=0.8,
    colsample_bytree=0.8,
    objective="reg:squarederror",
    random_state=42
)

In [49]:
model.fit(X_train, y_train)

# ================================
# 7. Prediction (5-minute ahead)
# ================================
y_pred = model.predict(X_test)

# Physical limits
y_pred = np.clip(y_pred, 0, y.max())

In [50]:
from sklearn.metrics import mean_absolute_error, mean_squared_error, r2_score
import numpy as np

mae = mean_absolute_error(y_test, y_pred)
rmse = np.sqrt(mean_squared_error(y_test, y_pred))
r2 = r2_score(y_test, y_pred)

print(f"MAE  : {mae:.4f}")
print(f"RMSE : {rmse:.4f}")
print(f"R²   : {r2:.4f}")


MAE  : 0.0012
RMSE : 0.0030
R²   : 0.9998


In [51]:
# Persistence model: P(t+1) = P(t)
y_persist = y_test.shift(1).dropna()
y_true = y_test.iloc[1:]

from sklearn.metrics import r2_score
print("Persistence R²:", r2_score(y_true, y_persist))


Persistence R²: 0.9984350909157432


In [52]:
model.save_model('wind_5.pkl')

  self.get_booster().save_model(fname)
