In [1]:
import pandas as pd
import numpy as np
from sklearn.model_selection import train_test_split
from sklearn.ensemble import RandomForestRegressor
from sklearn.metrics import explained_variance_score
from sklearn.metrics import r2_score
from sklearn.metrics import mean_absolute_error
from sklearn.metrics import mean_squared_error

Possibility to predict for different values

In [2]:
potential_labels = ['solar_generation_kw', 'power_use_kw', 'wind_generation_kw', 'green_energy']

label = 'green_energy'

features = pd.read_csv('features.csv', index_col=0, float_precision='high')
features.drop(columns=[item for item in potential_labels if item != label], inplace=True)

labels = features[label]
features.drop(columns=[label], inplace=True)

Seperating data into training dataset and testing dataset

In [3]:
_ = train_test_split(features, labels, test_size=0.25, random_state=1337)
train_features, test_features, train_labels, test_labels = _

Model generation and prediction creation

In [4]:
rf = RandomForestRegressor(n_estimators=400, max_depth=None, bootstrap=False,
                           max_features='sqrt', n_jobs=-1, random_state=1337,
                           min_samples_leaf=1, min_samples_split=2)

rf.fit(train_features, train_labels)

predictions = rf.predict(test_features)

<h1>Final results

In [5]:
def print_results(labels, predictions):
    print('explained_variance_score:', round(explained_variance_score(labels, predictions), 6))
    print('r2_score:                ', round(r2_score(labels, predictions), 6))
    print('mean_squared_error:      ', round(mean_squared_error(labels, predictions), 6))
    print('mean_absolute_error:     ', round(mean_absolute_error(labels, predictions), 6))
    print()

print_results(np.asarray(test_labels, dtype='float64'), np.asarray(predictions, dtype='float64'))

importances = list(rf.feature_importances_)
feature_importances = [(feature, round(importance, 2)) for feature, importance in zip(features.columns, importances)]
feature_importances = sorted(feature_importances, key = lambda x: x[1], reverse = True)
[print('Variable: {:20} Importance: {}'.format(*pair)) for pair in feature_importances];

explained_variance_score: 0.718118
r2_score:                 0.717005
mean_squared_error:       0.084584
mean_absolute_error:      0.207803

Variable: lagged_wind_speed    Importance: 0.16
Variable: solar_radiation      Importance: 0.13
Variable: wind_speed           Importance: 0.11
Variable: angle_of_sun         Importance: 0.11
Variable: humidity             Importance: 0.1
Variable: time_of_day_y        Importance: 0.08
Variable: weekday_x            Importance: 0.06
Variable: pressure             Importance: 0.06
Variable: time_of_day_x        Importance: 0.05
Variable: month_x              Importance: 0.05
Variable: weekday_y            Importance: 0.03
Variable: month_y              Importance: 0.03
Variable: chp_online           Importance: 0.01
