# Submission

<br>

### Imports

In [1]:
import numpy as np
import pandas as pd

from evtdemand import data, feature, suite

In [14]:
params = {
    'data_dir': '../data',
    'model_1': 'lightgbm.LGBMRegressor', 
    'model_2': 'lightgbm.LGBMRegressor',
    'model_1_kwargs': {
        'num_leaves': 50,
        'n_estimators': 100
    },
    'model_2_kwargs': {
        'num_leaves': 50,
        'n_estimators': 100
    },
    'data_kwargs': {
        'real_power_sub_dir': 'real_power',
        'weather_sub_dir': 'weather',
        'real_power_time_period': '_pre_august',
        'real_power_site': 'Staplegrove_CB905',
        'weather_grid_point': 'staplegrove_1',
        'weather_interpolate_method': 'interpolate'
    },
    'y1_col': 'value_max',
    'y2_col': 'value_min',
    'split_kwargs': {
        'n_splits': 5, 
        'shuffle': False
    },
    'cols_subset': ['value', 'temperature', 'solar_irradiance', 'pressure',
                     'spec_humidity', 'hour', 'windspeed_north', 'windspeed_east',
                     'doy', 'speed', 'direction', 'weekend', 'hcdh'],
    'features_kwargs': {
        'features': ['temporal', 'dir_speed', 'hcdh']
    }
}

In [15]:
%%time

model_suite, error_metrics, df_pred, input_data = suite.run_parameterised_model(**params)

error_metrics

Wall time: 7.05 s


{'y1_rmse': 10.403771748179379,
 'y2_rmse': 11.154202014484861,
 'combined_rmse': 15.303688775083884,
 'skill_score': 0.5152711676062375}

In [9]:
0.515271

0.515271

In [16]:
data_dir = params['data_dir']

df_train_features, df_train_target = data.construct_baseline_features_target_dfs(data_dir, **params['data_kwargs'])
df_train_features = feature.create_additional_features(df_train_features, **params['features_kwargs'])
df_train_features = feature.process_features(df_train_features, cols_subset=params['cols_subset'])

In [17]:
df_observation_submission = data.load_real_power_dataset(f'{data_dir}/real_power', site='Staplegrove_CB905', real_power_variable='observation_variable_half_hourly', time_period='_august')
df_weather = data.load_weather_df(f'{data_dir}/weather', 'staplegrove_1')

common_idxs = df_observation_submission.index.intersection(df_weather.index)

df_submission_features = df_observation_submission.loc[common_idxs].copy()
df_submission_features[df_weather.columns] = df_weather.loc[common_idxs].copy()
df_submission_features = feature.create_additional_features(df_submission_features, **params['features_kwargs'])
df_submission_features = feature.process_features(df_submission_features, cols_subset=params['cols_subset'])

X_submission = df_submission_features.values

X_submission.shape

(1488, 13)

In [18]:
df_pred = model_suite.run_submission(
    df_train_target,
    df_train_features,
    df_submission_features,
    save_submission=True
)

df_pred.head()

Unnamed: 0_level_0,value_max,value_min
time,Unnamed: 1_level_1,Unnamed: 2_level_1
2021-08-01 00:00:00,0.643898,0.585375
2021-08-01 00:30:00,0.592978,0.541695
2021-08-01 01:00:00,0.568907,0.510669
2021-08-01 01:30:00,0.519232,0.478118
2021-08-01 02:00:00,0.498093,0.458542


In [20]:
save = False
fp = f'../data/params/archive/params - {pd.Timestamp.now().strftime("%Y-%m-%d %H-%M-%S")}.yml'

if save == True:
    suite.save_params(input_data, fp)

In [None]:
# do feature permutation importance
# should shuffle the indexes before training in the run_submission method of the suite class
# add in a long-term trend term
# get the settlement period feature working and compare with time-of-day

# give the spread distribution params with a 12 month lag
# look into resampling later dates more - weighted bootstrapping?
# start visualising the model tuning/decisions (LIME?) - https://www.scikit-yb.org/en/latest/api/model_selection/validation_curve.html
# can we create a `feels like` temperature?
# try a stacked regressor - http://rasbt.github.io/mlxtend/user_guide/regressor/StackingRegressor/
# try xgboost
# create an example pytorch model with skorch - https://github.com/skorch-dev/skorch
# get some automated model tuning running with skopt or sk-deap