In [1]:
!pip install pandas numpy matplotlib seaborn scikit-learn
!pip install pytorch-lightning
!pip install torch
!pip install optuna

Collecting pytorch-lightning
  Downloading pytorch_lightning-2.6.1-py3-none-any.whl.metadata (21 kB)
Collecting torchmetrics>0.7.0 (from pytorch-lightning)
  Downloading torchmetrics-1.8.2-py3-none-any.whl.metadata (22 kB)
Collecting lightning-utilities>=0.10.0 (from pytorch-lightning)
  Downloading lightning_utilities-0.15.3-py3-none-any.whl.metadata (5.5 kB)
Downloading pytorch_lightning-2.6.1-py3-none-any.whl (857 kB)
[2K   [90m━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━[0m [32m857.3/857.3 kB[0m [31m21.7 MB/s[0m eta [36m0:00:00[0m
[?25hDownloading lightning_utilities-0.15.3-py3-none-any.whl (31 kB)
Downloading torchmetrics-1.8.2-py3-none-any.whl (983 kB)
[2K   [90m━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━[0m [32m983.2/983.2 kB[0m [31m54.2 MB/s[0m eta [36m0:00:00[0m
[?25hInstalling collected packages: lightning-utilities, torchmetrics, pytorch-lightning
Successfully installed lightning-utilities-0.15.3 pytorch-lightning-2.6.1 torchmetrics-1.8.2
Collecting optuna
  Dow

In [2]:
# =====================================================
# 1. IMPORT LIBRARIES
# =====================================================

import pandas as pd
import numpy as np

from xgboost import XGBRegressor
from sklearn.preprocessing import MinMaxScaler
from sklearn.metrics import mean_squared_error, mean_absolute_error, r2_score

In [4]:
# =====================================================
# 2. LOAD & MERGE DATA
# =====================================================

historical = pd.read_csv("/content/drive/MyDrive/Skripsi/Dataset/Wind Power/wind farm historical data.csv")
nwp = pd.read_csv("/content/drive/MyDrive/Skripsi/Dataset/Wind Power/NWP.csv")

historical.columns = historical.columns.str.strip()
nwp.columns = nwp.columns.str.strip()

# Convert datetime
historical['Date'] = pd.to_datetime(historical['Date'], dayfirst=True)
nwp['time'] = pd.to_datetime(nwp['time'], format='mixed')

# Rename
historical = historical.rename(columns={
    'Date':'timestamp',
    'Speed':'wind_speed',
    'Direction':'wind_direction',
    'Energy':'power'
})

nwp = nwp.rename(columns={
    'time':'timestamp',
    'mod':'wind_speed_nwp',
    'dir':'wind_dir_nwp',
    'temp':'temperature_nwp',
    'rh':'humidity_nwp',
    'mslp':'pressure_nwp'
})

# Merge
data = pd.merge(historical, nwp, on='timestamp', how='inner')
data = data.sort_values('timestamp').reset_index(drop=True)
data = data.ffill().dropna()

print("Merged shape:", data.shape)

Merged shape: (8784, 9)


In [5]:
# =====================================================
# 3. FEATURE ENGINEERING
# =====================================================

data_fe = data.copy()

# Time features
data_fe['hour'] = data_fe['timestamp'].dt.hour
data_fe['month'] = data_fe['timestamp'].dt.month

data_fe['hour_sin'] = np.sin(2*np.pi*data_fe['hour']/24)
data_fe['hour_cos'] = np.cos(2*np.pi*data_fe['hour']/24)
data_fe['month_sin'] = np.sin(2*np.pi*data_fe['month']/12)
data_fe['month_cos'] = np.cos(2*np.pi*data_fe['month']/12)

# Lag features
for lag in [1,2,3,6,12,24]:
    data_fe[f'power_lag_{lag}'] = data_fe['power'].shift(lag)

# Rolling statistics
data_fe['rolling_mean_6'] = data_fe['power'].rolling(6).mean()
data_fe['rolling_std_6'] = data_fe['power'].rolling(6).std()

# Physics-informed features
data_fe['wind_speed_cubed'] = data_fe['wind_speed']**3
data_fe['wind_speed_nwp_cubed'] = data_fe['wind_speed_nwp']**3

data_fe = data_fe.dropna().reset_index(drop=True)

print("After feature engineering:", data_fe.shape)

After feature engineering: (8760, 25)


In [6]:
# =====================================================
# 4. MULTI-HORIZON TARGET CREATION
# =====================================================

data_mh = data_fe.copy()

data_mh['power_t+1'] = data_mh['power'].shift(-1)
data_mh['power_t+3'] = data_mh['power'].shift(-3)
data_mh['power_t+6'] = data_mh['power'].shift(-6)

data_mh = data_mh.dropna().reset_index(drop=True)

print("After multi-horizon shift:", data_mh.shape)

After multi-horizon shift: (8754, 28)


In [7]:
# =====================================================
# 5. TIME-BASED SPLIT
# =====================================================

train_size = int(len(data_mh)*0.7)
val_size = int(len(data_mh)*0.15)

train = data_mh[:train_size]
val = data_mh[train_size:train_size+val_size]
test = data_mh[train_size+val_size:]

features = data_mh.drop([
    'timestamp',
    'power',
    'power_t+1',
    'power_t+3',
    'power_t+6'
], axis=1).columns

In [8]:
# =====================================================
# 6. SCALING (TRAIN ONLY)
# =====================================================

scaler = MinMaxScaler()

X_train = scaler.fit_transform(train[features])
X_val = scaler.transform(val[features])
X_test = scaler.transform(test[features])

In [9]:
# =====================================================
# 7. TRAIN & EVALUATE PER HORIZON
# =====================================================

horizons = ['power_t+1', 'power_t+3', 'power_t+6']
results = {}

for target in horizons:

    y_train = train[target]
    y_test = test[target]

    model = XGBRegressor(
        n_estimators=800,
        max_depth=6,
        learning_rate=0.05,
        subsample=0.8,
        colsample_bytree=0.8,
        random_state=42
    )

    model.fit(X_train, y_train)

    preds = model.predict(X_test)

    rmse = np.sqrt(mean_squared_error(y_test, preds))
    mae = mean_absolute_error(y_test, preds)
    r2 = r2_score(y_test, preds)
    nrmse = rmse / np.mean(y_test)

    results[target] = {
        'RMSE': rmse,
        'MAE': mae,
        'R2': r2,
        'nRMSE': nrmse
    }

    print(f"\n===== Horizon {target} =====")
    print("RMSE:", round(rmse,4))
    print("MAE:", round(mae,4))
    print("R2:", round(r2,4))
    print("nRMSE:", round(nrmse,4))


===== Horizon power_t+1 =====
RMSE: 275.2514
MAE: 190.0442
R2: 0.7467
nRMSE: 0.597

===== Horizon power_t+3 =====
RMSE: 356.9052
MAE: 253.4486
R2: 0.5745
nRMSE: 0.7746

===== Horizon power_t+6 =====
RMSE: 428.0059
MAE: 313.2754
R2: 0.388
nRMSE: 0.929


In [10]:
# =====================================================
# 8. SUMMARY COMPARISON
# =====================================================

print("\n========== FINAL COMPARISON ==========")
for h in results:
    print(f"{h} -> RMSE: {round(results[h]['RMSE'],2)} | R2: {round(results[h]['R2'],4)}")


power_t+1 -> RMSE: 275.25 | R2: 0.7467
power_t+3 -> RMSE: 356.91 | R2: 0.5745
power_t+6 -> RMSE: 428.01 | R2: 0.388
