In [1]:
import xgboost as xgb
import pandas as pd
import numpy as np
import torch
import matplotlib.pyplot as plt
from sklearn.model_selection import train_test_split
from sklearn.metrics import mean_squared_error, mean_absolute_error

In [2]:
# Load Training Data
data_path = "../data/cache/wind_farm_data/wind_data_train_seq24_pred3_num5_normrobust_minmax_normalize_modetrain_clusternearest.pt"
loaded_data = torch.load(data_path)
input_sequences = loaded_data['input_sequences_tensor'].numpy()
ground_truth = loaded_data['ground_truth_tensor'].numpy()
input_sequences = input_sequences[:, :-3, :]

In [3]:
# Load Test Data
test_data_path = "../data/cache/wind_farm_data/wind_data_test_seq24_pred3_num5_normrobust_minmax_normalize_modetest_clusternearest.pt"
loaded_test_data = torch.load(test_data_path)
input_sequences_test = loaded_test_data['input_sequences_tensor'].numpy()
ground_truth_test = loaded_test_data['ground_truth_tensor'].numpy()
input_sequences_test = input_sequences_test[:, :-3, :]

In [4]:
# Load Weather Data for Training
weather_data_path = "../data/cache/weather/wind_data_train_seq24_pred3_num5_normrobust_minmax_normalize_modetrain_clusternearest_temp2m_rh2m_wind100m_winddir100m.pt"
loaded_weather_train = torch.load(weather_data_path)
weather_features_train = loaded_weather_train['weather_data_tensor'].numpy()

https://scikit-learn.org/stable/model_persistence.html#security-maintainability-limitations
https://scikit-learn.org/stable/model_persistence.html#security-maintainability-limitations


In [5]:
# Load Weather Data for Test
weather_data_test_path = "../data/cache/weather/wind_data_test_seq24_pred3_num5_normrobust_minmax_normalize_modetest_clusternearest_temp2m_rh2m_wind100m_winddir100m.pt"
loaded_weather_test = torch.load(weather_data_test_path)
weather_features_test = loaded_weather_test['weather_data_tensor'].numpy()

In [6]:
# Load Spatial Data for Training
spatial_data_path = "../data/cache/spatial/train_seq24_pred3_num5_clusternearest.pt"
spatial_data = torch.load(spatial_data_path)
correlation = spatial_data['correlation'].numpy()
distance = spatial_data['distance'].numpy()

In [7]:
# Load Spatial Data for Test
spatial_data_test_path = "../data/cache/spatial/test_seq24_pred3_num5_clusternearest.pt"
spatial_data_test = torch.load(spatial_data_test_path)
correlation_test = spatial_data_test['correlation'].numpy()
distance_test = spatial_data_test['distance'].numpy()

In [8]:
# Flatten the weather data for training and testing
weather_features_train_flat = weather_features_train.reshape(weather_features_train.shape[0], -1)
weather_features_test_flat = weather_features_test.reshape(weather_features_test.shape[0], -1)

In [9]:
# Prepare Training Data
X_flat = input_sequences.reshape(input_sequences.shape[0], -1)
y_train = ground_truth

# Prepare Testing Data
X_test_flat = input_sequences_test.reshape(input_sequences_test.shape[0], -1)
y_test = ground_truth_test

In [10]:
X_augmented_train = np.concatenate([X_flat, weather_features_train_flat, correlation, distance], axis=1)
X_augmented_test = np.concatenate([X_test_flat, weather_features_test_flat, correlation_test, distance_test], axis=1)

In [11]:
models = []
predictions = []

for i in range(3):
    model = xgb.XGBRegressor(objective='reg:squarederror')
    model.fit(X_augmented_train, y_train[:, i]) 
    models.append(model)
    pred = model.predict(X_augmented_test)
    predictions.append(pred)

In [15]:
mse_values = []
mae_values = []
rmse_values = []
mbe_values = []
maape_values = []

for i in range(3):
    mse = mean_squared_error(y_test[:, i, :], predictions[i].reshape(-1, 5))
    mae = mean_absolute_error(y_test[:, i, :], predictions[i].reshape(-1, 5))
    rmse = np.sqrt(mse)
    mbe = np.mean(predictions[i].reshape(-1, 5) - y_test[:, i, :])
    maape = np.mean(np.arctan(np.abs((y_test[:, i, :] - predictions[i].reshape(-1, 5))) / np.abs(y_test[:, i, :])))
    
    mse_values.append(mse)
    mae_values.append(mae)
    rmse_values.append(rmse)
    mbe_values.append(mbe)
    maape_values.append(maape)

overall_mse = np.mean(mse_values)
overall_mae = np.mean(mae_values)
overall_rmse = np.mean(rmse_values)
overall_mbe = np.mean(mbe_values)
overall_maape = np.mean(maape_values)

error_metrics_df = pd.DataFrame({
    'Metric': ['MSE', 'RMSE', 'MAE', 'MBE', 'MAAPE'],
    'Target 1': [mse_values[0], rmse_values[0], mae_values[0], mbe_values[0], maape_values[0]],
    'Target 2': [mse_values[1], rmse_values[1], mae_values[1], mbe_values[1], maape_values[1]],
    'Target 3': [mse_values[2], rmse_values[2], mae_values[2], mbe_values[2], maape_values[2]],
    'Overall': [overall_mse, overall_rmse, overall_mae, overall_mbe, overall_maape]
})

print(error_metrics_df)

  Metric  Target 1  Target 2  Target 3   Overall
0    MSE  0.024103  0.035430  0.047597  0.035710
1   RMSE  0.155251  0.188229  0.218168  0.187216
2    MAE  0.111349  0.139989  0.165913  0.139084
3    MBE  0.000716  0.001493  0.002297  0.001502
4  MAAPE  0.357780  0.439015  0.553706  0.450167


  maape = np.mean(np.arctan(np.abs((y_test[:, i, :] - predictions[i].reshape(-1, 5))) / np.abs(y_test[:, i, :])))
  maape = np.mean(np.arctan(np.abs((y_test[:, i, :] - predictions[i].reshape(-1, 5))) / np.abs(y_test[:, i, :])))
  maape = np.mean(np.arctan(np.abs((y_test[:, i, :] - predictions[i].reshape(-1, 5))) / np.abs(y_test[:, i, :])))


In [16]:
import seaborn as sns
import ipywidgets as widgets
from ipywidgets import Layout, Button, HBox, VBox

sns.set_theme(style="darkgrid", palette="mako")
sns.set_context("talk")

predictions_reshaped = np.array(predictions).T.reshape(-1, 3, 5)
example_index_slider = widgets.IntSlider(value=0, min=0, max=len(y_test)-1, step=1, description='Example Index:', readout=True, style={'description_width': 'initial'})
output_plot = widgets.Output()

def plot_results(example_index):
    output_plot.clear_output()
    with output_plot:
        for feature in range(5):
            fig, ax = plt.subplots(figsize=(17, 1.5))
            historical_series = input_sequences[example_index, :, feature]
            
            x_historical_series = list(range(1, 25))
            sns.lineplot(x=x_historical_series, y=historical_series, marker='o', dashes=False, color='#165DB1', ax=ax)
            
            full_ground_truth_series = np.concatenate([historical_series[-1:], y_test[example_index, :, feature]])
            full_prediction_series = np.concatenate([historical_series[-1:], predictions_reshaped[example_index, :, feature]])
            
            
            x_extended_series = list(range(24, 28))
            
            sns.lineplot(x=x_extended_series, y=full_ground_truth_series, marker='o', dashes=True, color='#165DB1', ax=ax)
            sns.lineplot(x=x_extended_series, y=full_prediction_series, marker='o', dashes=True, color='#C680BB', ax=ax)

            ax.set_xlabel('')
            ax.set_ylabel('')
            ax.set_xticks([])
            ax.set_yticks([])

            mae = mean_absolute_error(y_test[example_index, :, feature], predictions_reshaped[example_index, :, feature])
            mse = mean_squared_error(y_test[example_index, :, feature], predictions_reshaped[example_index, :, feature])
            
            metrics_text = f"Wind Farm {feature+1}\n\nMAE: {mae:.5f}\nMSE: {mse:.5f}"
            ax.text(1.05, 0.5, metrics_text, transform=ax.transAxes, fontsize=15, verticalalignment='center', bbox=dict(facecolor='white', edgecolor='black', boxstyle='round,pad=1'))

            sns.despine()
            plt.legend([],[], frameon=False)
            plt.show()

def example_index_changed(change):
    plot_results(change['new'])

example_index_slider.observe(example_index_changed, names='value')

display(example_index_slider, output_plot)
plot_results(example_index_slider.value)


IntSlider(value=0, description='Example Index:', max=66409, style=SliderStyle(description_width='initial'))

Output()