In [22]:
import pandas as pd
import numpy as np
from sklearn.model_selection import train_test_split
from sklearn.ensemble import RandomForestRegressor
from xgboost import XGBRegressor
from keras.models import Sequential
from keras.layers import Dense
from sklearn.metrics import r2_score

In [23]:
data_frame = pd.read_csv("electricity_prices.csv", na_values=['?'])
data_frame.head()
data_frame = data_frame.dropna()
data_frame.info()
X = data_frame[['ActualWindProduction', 'SystemLoadEP2', 'SMPEA', 'SystemLoadEA', 'ForecastWindProduction', 
     'DayOfWeek', 'Year', 'ORKWindspeed', 'CO2Intensity', 'PeriodOfDay']]
y = data_frame['SMPEP2']

<class 'pandas.core.frame.DataFrame'>
Int64Index: 37682 entries, 0 to 38013
Data columns (total 18 columns):
 #   Column                  Non-Null Count  Dtype  
---  ------                  --------------  -----  
 0   DateTime                37682 non-null  object 
 1   Holiday                 37682 non-null  object 
 2   HolidayFlag             37682 non-null  int64  
 3   DayOfWeek               37682 non-null  int64  
 4   WeekOfYear              37682 non-null  int64  
 5   Day                     37682 non-null  int64  
 6   Month                   37682 non-null  int64  
 7   Year                    37682 non-null  int64  
 8   PeriodOfDay             37682 non-null  int64  
 9   ForecastWindProduction  37682 non-null  float64
 10  SystemLoadEA            37682 non-null  float64
 11  SMPEA                   37682 non-null  float64
 12  ORKTemperature          37682 non-null  float64
 13  ORKWindspeed            37682 non-null  float64
 14  CO2Intensity            37682 non-null

In [24]:
X_train, X_test, y_train, y_test = train_test_split(X, y, test_size=0.2, random_state=42)

# Train RandomForestRegressor

In [32]:
%%time
rf_regressor = RandomForestRegressor(n_estimators=100, random_state=42, n_jobs=5)
rf_regressor.fit(X_train, y_train)

Wall time: 10.9 s


# Train XGBoost

In [26]:
xgb_regressor = XGBRegressor(n_estimators=100, random_state=42)
xgb_regressor.fit(X_train, y_train)

# Train Keras.Sequential

In [27]:
model = Sequential()
model.add(Dense(64, activation='relu', input_dim=X_train.shape[1]))
model.add(Dense(32, activation='relu'))
model.add(Dense(1, activation='linear'))
model.compile(loss='mean_squared_error', optimizer='adam')
model.fit(X_train, y_train, epochs=50, batch_size=32, verbose=0)

<keras.callbacks.History at 0x1a4abfc2a90>

# Ensemble Predictions

In [44]:
%%time
from time import process_time

t1_start = process_time()
%time rf_predictions = rf_regressor.predict(X_test)
xgb_predictions = xgb_regressor.predict(X_test)
keras_predictions = model.predict(X_test)

# Combine predictions using simple averaging
ensemble_predictions = (rf_predictions + xgb_predictions + keras_predictions.flatten()) / 3
t1_stop = process_time()
print(t1_stop-t1_start)

Wall time: 185 ms
0.984375
Wall time: 706 ms


# Evaluate Ensemble

In [30]:
ensemble_mse = np.mean((ensemble_predictions - y_test) ** 2)
print(f"Ensemble Mean Squared Error: {ensemble_mse:.2f}")

# Calculate R-squared for the ensemble
ensemble_r2 = r2_score(y_test, ensemble_predictions)
print(f"Ensemble R-squared: {ensemble_r2:.2f}")

Ensemble Mean Squared Error: 618.95
Ensemble R-squared: 0.52
