# Experiments

<br>

### Imports

In [15]:
import numpy as np
import pandas as pd

import copy
from tqdm import tqdm

from sklearn.inspection import permutation_importance

from evtdemand import suite, data, feature

In [2]:
from IPython.display import JSON

In [3]:
default_params = {
    'data_dir': '../data',
    'model_1': 'lightgbm.LGBMRegressor', 
    'model_2': 'lightgbm.LGBMRegressor',
    'model_1_kwargs': {
        'num_leaves': 50,
        'n_estimators': 100,
        'reg_lambda': 0.1
    },
    'model_2_kwargs': {
        'num_leaves': 50,
        'n_estimators': 100,
        'reg_lambda': 0.1
    },
    'data_kwargs': {
        'real_power_sub_dir': 'real_power',
        'weather_sub_dir': 'weather',
        'real_power_time_period': '_pre_august',
        'real_power_site': 'Staplegrove_CB905',
        'weather_grid_point': 'staplegrove_1',
        'weather_interpolate_method': 'interpolate'
    },
    'y1_col': 'value_max',
    'y2_col': 'value_min',
    'split_kwargs': {
        'n_splits': 5, 
        'shuffle': False
    },
    'cols_subset': ['value', 'temperature', 'solar_irradiance', 'pressure',
                    'spec_humidity', 'hour', 'windspeed_north', 'windspeed_east',
                    'doy', 'speed', 'direction', 'weekend', 'hcdh'],#, 'prev_month_max_avg',
                    #'prev_month_max_max', 'prev_month_min_avg', 'prev_month_min_min'],
    'features_kwargs': {
        'features': ['temporal', 'dir_speed', 'hcdh']
    }
}

In [4]:
%%time

model_suite, error_metrics, df_pred, input_data = suite.run_parameterised_model(**default_params)

error_metrics

Wall time: 5.71 s


{'y1_rmse': 10.389345154377155,
 'y2_rmse': 11.177505324655117,
 'combined_rmse': 15.305702367282791,
 'skill_score': 0.5152324587016828}

In [5]:
%%time 

# comparing the two wind speed encodings

params = copy.deepcopy(default_params)

params['cols_subset'] = ['value', 'temperature', 'solar_irradiance', 'pressure',
                         'spec_humidity', 'hour', 'doy', 'speed', 'direction', 'weekend', 'hcdh']
model_suite, error_metrics, df_pred, input_data = suite.run_parameterised_model(**params)
vectors_error = error_metrics['skill_score']

params['cols_subset'] = ['value', 'temperature', 'solar_irradiance', 'pressure', 'spec_humidity', 
                         'hour', 'windspeed_north', 'windspeed_east', 'doy', 'weekend', 'hcdh']
model_suite, error_metrics, df_pred, input_data = suite.run_parameterised_model(**params)
speed_dir_error = error_metrics['skill_score']


vectors_error, speed_dir_error

Wall time: 9.71 s


(0.5173457556059888, 0.5149958383696378)

In [6]:
# investigating the impact of reduced training size

df_results = pd.DataFrame()
all_input_data = []
test_splits = list(range(2, 7))
params = copy.deepcopy(default_params)

for run_id, test_split in tqdm(enumerate(test_splits), total=len(test_splits)):    
    params['split_kwargs'] = {'n_splits': test_split, 'shuffle': False}
    model_suite, error_metrics, df_pred, input_data = suite.run_parameterised_model(**params)
    
    df_results = df_results.append(pd.DataFrame([error_metrics], index=[run_id]))
    all_input_data += [input_data]

df_results.index.name = 'run_id'
    
df_results

100%|████████████████████████████████████████████████████████████████████████████████████| 5/5 [00:23<00:00,  4.68s/it]


Unnamed: 0_level_0,y1_rmse,y2_rmse,combined_rmse,skill_score
run_id,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1
0,17.657929,17.963724,25.195639,0.522551
1,14.212972,14.888986,20.606912,0.52667
2,11.898792,12.542129,17.324525,0.522463
3,10.389345,11.177505,15.305702,0.515232
4,9.622927,10.113562,14.011024,0.522844


In [7]:
%%time

# tuning lightgbm params

params = copy.deepcopy(default_params)

params['model_1'] = 'lightgbm.LGBMRegressor'
params['model_2'] = 'lightgbm.LGBMRegressor'
params['model_1_kwargs'] = {
    'num_leaves': 50,
    'n_estimators': 100,
    'reg_lambda': 0.1
}
params['model_2_kwargs'] = {
    'num_leaves': 50,
    'n_estimators': 100,
    'reg_lambda': 0.1
}

model_suite, error_metrics, df_pred, input_data = suite.run_parameterised_model(**params)

error_metrics

Wall time: 6.16 s


{'y1_rmse': 10.389345154377155,
 'y2_rmse': 11.177505324655117,
 'combined_rmse': 15.305702367282791,
 'skill_score': 0.5152324587016828}

In [44]:
# assessing feature importance

df_features, df_target = data.construct_baseline_features_target_dfs(default_params['data_dir'], **default_params['data_kwargs'])
df_features = feature.create_additional_features(df_features, df_target, **default_params['features_kwargs'])
df_features = feature.process_features(df_features, cols_subset=default_params['cols_subset'])

model_1 = suite.load_module_attr(default_params['model_1'])(**default_params['model_1_kwargs'])
model_2 = suite.load_module_attr(default_params['model_2'])(**default_params['model_2_kwargs'])

model_1

LGBMRegressor(num_leaves=50, reg_lambda=0.1)

In [42]:
%%time

X, y = df_features.values, df_target['value_max'].values
model_1.fit(X, y)

result = permutation_importance(
    model_1, X, y, n_repeats=10, random_state=42, n_jobs=2
)

forest_importances = pd.Series(result.importances_mean, index=df_features.columns)

forest_importances.sort_values(ascending=False)

Wall time: 6.45 s


value               1.931557
hour                0.020073
doy                 0.016353
pressure            0.012139
spec_humidity       0.011056
solar_irradiance    0.010787
windspeed_east      0.010363
temperature         0.007676
windspeed_north     0.004392
speed               0.003682
hcdh                0.003653
direction           0.002466
weekend             0.001439
dtype: float64

In [43]:
%%time

X, y = df_features.values, df_target['value_min'].values
model_2.fit(X, y)

result = permutation_importance(
    model_2, X, y, n_repeats=10, random_state=42, n_jobs=2
)

forest_importances = pd.Series(result.importances_mean, index=df_features.columns)

forest_importances.sort_values(ascending=False)

Wall time: 9.53 s


value               1.620178
hour                0.018918
solar_irradiance    0.018652
doy                 0.014086
windspeed_east      0.005720
spec_humidity       0.005241
pressure            0.005102
speed               0.002317
windspeed_north     0.001869
temperature         0.001845
direction           0.001500
hcdh                0.000791
weekend             0.000635
dtype: float64