In [1]:
#default_exp models

# Custom Models

<br>

### Inputs

In [2]:
#exports
import numpy as np
import pandas as pd

from evtdemand import data, feature

In [3]:
data_dir = '../data'

data_kwargs = {
    'real_power_sub_dir': 'real_power',
    'weather_sub_dir': 'weather',
    'real_power_time_period': '_pre_august',
    'real_power_site': 'Staplegrove_CB905',
    'weather_sites': ['staplegrove', 'mousehole'],
    'weather_grid_points': None,
    'weather_interpolate_method': 'interpolate',
    'use_target_delta': False
}

features_kwargs = {
        'features': ['temporal', 'dir_speed', 'hcdh', 'lagged'],
        'feature_lags': {
            'value': [1, 2, 3, 4, 5, 6, 48, 96, 336],
            'solar_irradiance_staplegrove_1': [1, 2],
            'solar_irradiance_staplegrove_2': [1, 2],
            'solar_irradiance_staplegrove_3': [1, 2],
            'solar_irradiance_staplegrove_4': [1, 2],
            'solar_irradiance_staplegrove_5': [1, 2],
            'solar_irradiance_mousehole_1': [1, 2],
            'solar_irradiance_mousehole_2': [1, 2],
            'solar_irradiance_mousehole_3': [1, 2],
            'solar_irradiance_mousehole_4': [1, 2],
            'solar_irradiance_mousehole_5': [1, 2],
            'temperature_mousehole_1': [1, 2],
            'temperature_mousehole_2': [1, 2],
            'temperature_mousehole_3': [1, 2],
            'temperature_mousehole_4': [1, 2],
            'temperature_mousehole_5': [1, 2],
            'temperature_staplegrove_1': [1, 2],
            'temperature_staplegrove_2': [1, 2],
            'temperature_staplegrove_3': [1, 2],
            'temperature_staplegrove_4': [1, 2],
            'temperature_staplegrove_5': [1, 2]
        }
    }

cols_subset = None#['value', 'temperature', 'solar_irradiance', 'pressure',
               # 'spec_humidity', 'hour', 'windspeed_north', 'windspeed_east', 
               # 'doy', 'speed', 'direction', 'weekend', 'hcdh']

In [4]:
df_features, df_target = data.construct_baseline_features_target_dfs(data_dir, **data_kwargs)
df_features = feature.create_additional_features(df_features, df_target, **features_kwargs)
df_features = feature.process_features(df_features, cols_subset=cols_subset)

df_features.head()

Unnamed: 0_level_0,value,temperature_mousehole_1,solar_irradiance_mousehole_1,windspeed_north_mousehole_1,windspeed_east_mousehole_1,pressure_mousehole_1,spec_humidity_mousehole_1,temperature_mousehole_2,solar_irradiance_mousehole_2,windspeed_north_mousehole_2,...,temperature_staplegrove_1_lag_1,temperature_staplegrove_1_lag_2,temperature_staplegrove_2_lag_1,temperature_staplegrove_2_lag_2,temperature_staplegrove_3_lag_1,temperature_staplegrove_3_lag_2,temperature_staplegrove_4_lag_1,temperature_staplegrove_4_lag_2,temperature_staplegrove_5_lag_1,temperature_staplegrove_5_lag_2
time,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1,Unnamed: 5_level_1,Unnamed: 6_level_1,Unnamed: 7_level_1,Unnamed: 8_level_1,Unnamed: 9_level_1,Unnamed: 10_level_1,Unnamed: 11_level_1,Unnamed: 12_level_1,Unnamed: 13_level_1,Unnamed: 14_level_1,Unnamed: 15_level_1,Unnamed: 16_level_1,Unnamed: 17_level_1,Unnamed: 18_level_1,Unnamed: 19_level_1,Unnamed: 20_level_1,Unnamed: 21_level_1
2019-11-08 00:00:00+00:00,1.161581,10.573663,0.0,-6.217352,-0.673667,99839.76,0.00642,9.768976,0.0,-4.420477,...,3.958994,4.363855,1.845712,2.469324,5.441415,5.719324,3.699228,4.242761,5.95704,6.149011
2019-11-08 00:30:00+00:00,1.15358,10.596094,0.0,-6.478864,-0.838259,99893.173,0.006402,9.808984,0.0,-4.805036,...,3.554132,3.958994,1.222101,1.845712,5.163507,5.441415,3.155695,3.699228,5.76507,5.95704
2019-11-08 01:00:00+00:00,1.11379,10.618524,0.0,-6.740375,-1.002851,99946.586,0.006385,9.848993,0.0,-5.189594,...,3.363672,3.554132,0.870508,1.222101,5.072656,5.163507,2.945703,3.155695,5.670313,5.76507
2019-11-08 01:30:00+00:00,1.038219,10.613245,0.0,-7.00716,-1.147348,100006.408,0.006375,9.912073,0.0,-5.540363,...,3.173212,3.363672,0.518915,0.870508,4.981805,5.072656,2.735712,2.945703,5.575555,5.670313
2019-11-08 02:00:00+00:00,0.988177,10.607965,0.0,-7.273944,-1.291845,100066.23,0.006366,9.975153,0.0,-5.891131,...,3.081995,3.173212,0.526331,0.518915,4.869104,4.981805,2.787073,2.735712,5.480432,5.575555


In [5]:
def clean_and_normalise_data(df_features):
    df_features = np.asanyarray(df_features).astype('float32')
    df_target = np.asanyarray(df_target).astype('float32')

    x_mean = np.mean(df_features, axis=0)
    x_std = np.std(df_features, axis=0)
    df_features_cleaned = (df_features - x_mean)/x_std

    y_mean = np.mean(df_target, axis=0)
    y_std = np.std(df_target, axis=0)
    df_target_cleaned = (df_target - y_mean)/y_std
    
    return df_features_cleaned, df_target_cleaned

In [6]:
# update process and suite defaults

In [7]:
x_mean = np.mean(df_features, axis=0)
x_std = np.std(df_features, axis=0)
y_mean = np.mean(df_target, axis=0)
y_std = np.std(df_target, axis=0)

x_mean, x_std, y_mean, y_std

(value                                0.649425
 temperature_mousehole_1             12.013905
 solar_irradiance_mousehole_1       149.886804
 windspeed_north_mousehole_1          0.472769
 windspeed_east_mousehole_1           1.966751
                                       ...    
 temperature_staplegrove_3_lag_2      9.994442
 temperature_staplegrove_4_lag_1      9.883219
 temperature_staplegrove_4_lag_2      9.882923
 temperature_staplegrove_5_lag_1      9.969454
 temperature_staplegrove_5_lag_2      9.969211
 Length: 119, dtype: float64,
 value                                0.702942
 temperature_mousehole_1              3.056129
 solar_irradiance_mousehole_1       230.084927
 windspeed_north_mousehole_1          4.849437
 windspeed_east_mousehole_1           5.248014
                                       ...    
 temperature_staplegrove_3_lag_2      5.336648
 temperature_staplegrove_4_lag_1      5.858784
 temperature_staplegrove_4_lag_2      5.858842
 temperature_staplegrove_5_lag

In [26]:
import copy

default_params = {
    'data_dir': '../data',
    'model_1': 'lightgbm.LGBMRegressor', 
    'model_2': 'lightgbm.LGBMRegressor',
    'model_1_kwargs': {
        'num_leaves': 50,
        'n_estimators': 100,
        'reg_lambda': 0.1
    },
    'model_2_kwargs': {
        'num_leaves': 50,
        'n_estimators': 100,
        'reg_lambda': 0.1
    },
    'data_kwargs': {
        'real_power_sub_dir': 'real_power',
        'weather_sub_dir': 'weather',
        'real_power_time_period': '_pre_august',
        'real_power_site': 'Staplegrove_CB905',
        'weather_sites': ['staplegrove'],
        'weather_grid_points': [1],
        'weather_interpolate_method': 'interpolate',
        'use_target_delta': False
    },
    'y1_col': 'value_max',
    'y2_col': 'value_min',
    'split_kwargs': {
        'n_splits': 5, 
        'shuffle': False
    },
    'cols_subset': ['value', 'temperature_staplegrove_1', 'solar_irradiance_staplegrove_1', 'pressure_staplegrove_1',
                    'spec_humidity_staplegrove_1', 'hour', 'local_hour', 'windspeed_north_staplegrove_1', 
                    'windspeed_east_staplegrove_1', 'doy', 'speed_staplegrove_1', 'direction_staplegrove_1', 'weekend', 'hcdh_staplegrove_1'],#, 'prev_month_max_avg',
                    #'prev_month_max_max', 'prev_month_min_avg', 'prev_month_min_min'],
    'features_kwargs': {
        'features': ['temporal', 'dir_speed', 'hcdh']
    }
}

params = copy.deepcopy(default_params)

params['model_1'] = 'sklearn.ensemble.RandomForestRegressor'
params['model_2'] = None
params['model_1_kwargs'] = {
    'n_estimators': 500,
}
params['data_kwargs'] = {
        'real_power_sub_dir': 'real_power',
        'weather_sub_dir': 'weather',
        'real_power_time_period': '_pre_august',
        'real_power_site': 'Staplegrove_CB905',
        'weather_sites': ['staplegrove', 'mousehole'],
        'weather_grid_points': None,
        'weather_interpolate_method': 'interpolate',
        'use_target_delta': False
    }
params['features_kwargs'] = {
    'features': ['temporal', 'dir_speed', 'lagged', 'solar', 'ts_pcs', 'roc'],
    'feature_lags': {
        'value': [1, 2, 3, 4, 5, 6, 48, 96, 336],
        'solar_irradiance_staplegrove_1': [1, 2],
        'solar_irradiance_staplegrove_2': [1, 2],
        'solar_irradiance_staplegrove_3': [1, 2],
        'solar_irradiance_staplegrove_4': [1, 2],
        'solar_irradiance_staplegrove_5': [1, 2],
        'solar_irradiance_mousehole_1': [1, 2],
        'solar_irradiance_mousehole_2': [1, 2],
        'solar_irradiance_mousehole_3': [1, 2],
        'solar_irradiance_mousehole_4': [1, 2],
        'solar_irradiance_mousehole_5': [1, 2],
        'temperature_mousehole_1': [1, 2],
        'temperature_mousehole_2': [1, 2],
        'temperature_mousehole_3': [1, 2],
        'temperature_mousehole_4': [1, 2],
        'temperature_mousehole_5': [1, 2],
        'temperature_staplegrove_1': [1, 2],
        'temperature_staplegrove_2': [1, 2],
        'temperature_staplegrove_3': [1, 2],
        'temperature_staplegrove_4': [1, 2],
        'temperature_staplegrove_5': [1, 2]
    },
    'roc_features': {
        'value': 3
    },
    'sites': ['staplegrove', 'mousehole'],
    'grid_points': None
}
params['cols_subset'] = [
    'value',
    'temperature_mousehole_1',
    'solar_irradiance_mousehole_1',
    'pressure_mousehole_1',
    'spec_humidity_mousehole_1',
    'temperature_mousehole_2',
    'solar_irradiance_mousehole_2',
    'pressure_mousehole_2',
    'spec_humidity_mousehole_2',
    'temperature_mousehole_3',
    'solar_irradiance_mousehole_3',
    'pressure_mousehole_3',
    'spec_humidity_mousehole_3',
    'temperature_mousehole_4',
    'solar_irradiance_mousehole_4',
    'pressure_mousehole_4',
    'spec_humidity_mousehole_4',
    'temperature_mousehole_5',
    'solar_irradiance_mousehole_5',
    'pressure_mousehole_5',
    'spec_humidity_mousehole_5',
    'temperature_staplegrove_1',
    'solar_irradiance_staplegrove_1',
    'pressure_staplegrove_1',
    'spec_humidity_staplegrove_1',
    'temperature_staplegrove_2',
    'solar_irradiance_staplegrove_2',
    'pressure_staplegrove_2',
    'spec_humidity_staplegrove_2',
    'temperature_staplegrove_3',
    'solar_irradiance_staplegrove_3',
    'pressure_staplegrove_3',
    'spec_humidity_staplegrove_3',
    'temperature_staplegrove_4',
    'solar_irradiance_staplegrove_4',
    'pressure_staplegrove_4',
    'spec_humidity_staplegrove_4',
    'temperature_staplegrove_5',
    'solar_irradiance_staplegrove_5',
    'pressure_staplegrove_5',
    'spec_humidity_staplegrove_5',
    'solar_irradiance_staplegrove_mean',
    'solar_irradiance_staplegrove_max',
    'solar_irradiance_staplegrove_min',
    'solar_irradiance_staplegrove_max___diff',
    'solar_irradiance_staplegrove_min___diff',
    'solar_irradiance_staplegrove_max___ewm__alpha_0.9__mean',
    'solar_irradiance_staplegrove_min___ewm__alpha_0.9__mean',
    'solar_irradiance_mousehole_mean',
    'solar_irradiance_mousehole_max',
    'solar_irradiance_mousehole_min',
    'solar_irradiance_mousehole_max___diff',
    'solar_irradiance_mousehole_min___diff',
    'solar_irradiance_mousehole_max___ewm__alpha_0.9__mean',
    'solar_irradiance_mousehole_min___ewm__alpha_0.9__mean',
    'value___ewm__alpha_0.9__std',
    'value___ewm__alpha_0.9__mean',
    'value___ewm__alpha_0.01__mean',
    'value___rolling__window_12__max',
    'value___rolling__window_24__min',
    'hour',
    'weekday',
    'doy',
    'month',
    'direction_staplegrove_1',
    'speed_staplegrove_1',
    'direction_staplegrove_2',
    'speed_staplegrove_2',
    'direction_staplegrove_3',
    'speed_staplegrove_3',
    'direction_staplegrove_4',
    'speed_staplegrove_4',
    'direction_staplegrove_5',
    'speed_staplegrove_5',
    'direction_mousehole_1',
    'speed_mousehole_1',
    'direction_mousehole_2',
    'speed_mousehole_2',
    'direction_mousehole_3',
    'speed_mousehole_3',
    'direction_mousehole_4',
    'speed_mousehole_4',
    'direction_mousehole_5',
    'speed_mousehole_5',
    'demand_pc1',
    'demand_pc2',
    'demand_pc3',
    'demand_pc4',
    'demand_pc5',
    'demand_pc6',
    'demand_pc7',
    'demand_pc8',
    'demand_pc9',
    'demand_pc10',
    'demand_pc11',
    'demand_pc12',
    'demand_pc13',
    'demand_pc14',
    'demand_pc15',
    'demand_pc16',
    'demand_pc17',
    'demand_pc18',
    'demand_pc19',
    'demand_pc20',
    'value_lag_1',
    'value_lag_2',
    'value_lag_3',
    'value_lag_4',
    'value_lag_5',
    'value_lag_6',
    'value_lag_48',
    'value_lag_96',
    'value_lag_336',
    'solar_irradiance_lag_1',
    'solar_irradiance_lag_2',
    'temperature_lag_1',
    'temperature_lag_2',
    'value_diff_1',
    'value_diff_2',
    'value_diff_3'
]

In [17]:
%%time

df_features, df_target = data.construct_baseline_features_target_dfs(params['data_dir'], **params['data_kwargs'])
df_features = feature.create_additional_features(df_features, df_target, **params['features_kwargs'])
df_features = feature.process_features(df_features, cols_subset=params['cols_subset'])

df_features.head()

  df_trajectory_mat['value-'+str(lag)] = df_trajectory_mat['value'].shift(lag)


Wall time: 4.67 s


Unnamed: 0_level_0,value,temperature_mousehole_1,solar_irradiance_mousehole_1,pressure_mousehole_1,spec_humidity_mousehole_1,temperature_mousehole_2,solar_irradiance_mousehole_2,pressure_mousehole_2,spec_humidity_mousehole_2,temperature_mousehole_3,...,value_lag_3,value_lag_4,value_lag_5,value_lag_6,value_lag_48,value_lag_96,value_lag_336,value_diff_1,value_diff_2,value_diff_3
time,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1,Unnamed: 5_level_1,Unnamed: 6_level_1,Unnamed: 7_level_1,Unnamed: 8_level_1,Unnamed: 9_level_1,Unnamed: 10_level_1,Unnamed: 11_level_1,Unnamed: 12_level_1,Unnamed: 13_level_1,Unnamed: 14_level_1,Unnamed: 15_level_1,Unnamed: 16_level_1,Unnamed: 17_level_1,Unnamed: 18_level_1,Unnamed: 19_level_1,Unnamed: 20_level_1,Unnamed: 21_level_1
2019-11-15 00:00:00+00:00,1.293602,8.670587,0.0,100492.2,0.005844,6.998712,0.0,100228.2,0.005545,9.010431,...,1.02382,1.190803,1.317124,1.440534,1.265761,1.17412,1.161581,0.309181,0.319189,0.299805
2019-11-15 00:30:00+00:00,1.311762,8.646768,0.0,100498.575,0.005857,7.015909,0.0,100238.575,0.005566,9.041299,...,0.994428,1.02382,1.190803,1.317124,1.268779,1.177694,1.15358,0.01816,-0.291021,-0.610209
2019-11-15 01:00:00+00:00,1.243869,8.622949,0.0,100504.95,0.005871,7.033105,0.0,100248.95,0.005587,9.072168,...,0.984421,0.994428,1.02382,1.190803,1.232437,1.129524,1.11379,-0.067893,-0.086053,0.204967
2019-11-15 01:30:00+00:00,1.169707,8.674875,0.0,100516.557,0.005891,7.135812,0.0,100256.557,0.005603,9.102609,...,1.293602,0.984421,0.994428,1.02382,1.167723,1.06054,1.038219,-0.074162,-0.006269,0.079784
2019-11-15 02:00:00+00:00,1.150759,8.726801,0.0,100528.164,0.005911,7.238519,0.0,100264.164,0.005619,9.133051,...,1.311762,1.293602,0.984421,0.994428,1.129886,1.032406,0.988177,-0.018948,0.055214,0.061484


In [18]:
import pandas as pd

df_ak_features = pd.read_csv('../data/submission/akylas/features.csv')
df_ak_features['time'] = pd.to_datetime(df_ak_features['time'], utc=True)
df_ak_features = df_ak_features.set_index('time')

common_idxs = sorted(df_features.index.intersection(df_ak_features.index))
df_ak_features = df_ak_features.loc[common_idxs]
df_features = df_features.loc[common_idxs]

df_ak_features.head()

Unnamed: 0_level_0,value,"('staplegrove', 'temperature_1')","('staplegrove', 'solar_irradiance_1')","('staplegrove', 'pressure_1')","('staplegrove', 'spec_humidity_1')","('staplegrove', 'speed_1')","('staplegrove', 'direction_1')","('staplegrove', 'temperature_2')","('staplegrove', 'solar_irradiance_2')","('staplegrove', 'pressure_2')",...,"('mousehole', 'solar_irradiance_4_lag_2')","('mousehole', 'temperature_5_lag_1')","('mousehole', 'temperature_5_lag_2')","('mousehole', 'solar_irradiance_5_lag_1')","('mousehole', 'solar_irradiance_5_lag_2')",value_roll_std,value_roll_mean_fast,value_roll_mean_slow,value_roll_min,value_roll_max
time,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1,Unnamed: 5_level_1,Unnamed: 6_level_1,Unnamed: 7_level_1,Unnamed: 8_level_1,Unnamed: 9_level_1,Unnamed: 10_level_1,Unnamed: 11_level_1,Unnamed: 12_level_1,Unnamed: 13_level_1,Unnamed: 14_level_1,Unnamed: 15_level_1,Unnamed: 16_level_1,Unnamed: 17_level_1,Unnamed: 18_level_1,Unnamed: 19_level_1,Unnamed: 20_level_1,Unnamed: 21_level_1
2019-11-15 00:00:00+00:00,1.293602,3.385431,0.0,99100.2,0.004808,4.081852,0.982325,3.592462,0.0,99364.2,...,0.0,4.513834,4.521143,0.0,0.0,0.216581,1.262831,1.123481,0.984421,1.867565
2019-11-15 00:30:00+00:00,1.311762,3.400674,0.0,99110.575,0.004823,4.049076,0.93443,3.567667,0.0,99378.575,...,0.0,4.506525,4.513834,0.0,0.0,0.076654,1.306869,1.125366,0.984421,1.843143
2019-11-15 01:00:00+00:00,1.243869,3.415918,0.0,99120.95,0.004837,4.016301,0.886535,3.542871,0.0,99392.95,...,0.0,4.539346,4.506525,0.0,0.0,0.05052,1.250169,1.126552,0.984421,1.710702
2019-11-15 01:30:00+00:00,1.169707,3.466867,0.0,99132.557,0.004855,4.017713,0.808951,3.555734,0.0,99404.557,...,0.0,4.572168,4.539346,0.0,0.0,0.058821,1.177753,1.126984,0.984421,1.646056
2019-11-15 02:00:00+00:00,1.150759,3.517816,0.0,99144.164,0.004872,4.019125,0.731367,3.568597,0.0,99416.164,...,0.0,4.583078,4.572168,0.0,0.0,0.026584,1.153458,1.127222,0.984421,1.551003


In [46]:
idx = 20
std_to_ak_feature_map = {}

s_feature_sample = df_features.iloc[idx]
s_ak_feature_sample = df_ak_features.iloc[idx]
feature_cols = df_features.columns
ak_feature_cols = df_ak_features.columns

feature_cols_with_dupe_vals = list(feature_cols[s_feature_sample.duplicated(keep=False)])
feature_cols_non_dupe = sorted(list(set(feature_cols) - set(feature_cols_with_dupe_vals)))
unmatched = copy.copy(feature_cols_with_dupe_vals)

for feature_col in feature_cols_non_dupe:
    s_matched_feature = s_ak_feature_sample[s_ak_feature_sample==s_feature_sample.loc[feature_col]]
    
    if s_matched_feature.size == 1:
        std_to_ak_feature_map[feature_col] = s_matched_feature.index[0]
    else:
        unmatched += [feature_col]

std_to_ak_feature_map.update({
    'solar_irradiance_mousehole_2': "('mousehole', 'solar_irradiance_2')",
    'solar_irradiance_mousehole_3': "('mousehole', 'solar_irradiance_3')",
    'solar_irradiance_staplegrove_2': "('staplegrove', 'solar_irradiance_2')",
    'solar_irradiance_staplegrove_3': "('staplegrove', 'solar_irradiance_3')",
    'solar_irradiance_staplegrove_5': "('staplegrove', 'solar_irradiance_5')",
    'solar_irradiance_staplegrove_max': "('staplegrove', 'solar_max')",
    'solar_irradiance_staplegrove_min': "('staplegrove', 'solar_min')",
    'solar_irradiance_mousehole_max': "('mousehole', 'solar_max')",
    'solar_irradiance_mousehole_min': "('mousehole', 'solar_min')",
    'value_diff_1': 'val_grad',
    'value_diff_2': 'val_grad_2',
    'value_diff_3': 'val_grad_3',
    # '': '',
})
std_to_ak_feature_map.update({f'value_lag_{lag}': f'val_lag_{lag}' for lag in [1, 2, 3, 4, 5, 6, 48, 336]})
std_to_ak_feature_map.update({f'demand_pc{pc}': f'PC{pc}' for pc in range(1, 21)})

for site in params['features_kwargs']['sites']:
    for grid_point in range(1, 6):
        std_to_ak_feature_map.update({f'solar_irradiance_{site}_{grid_point}_lag_{lag}': f"('{site}', 'solar_irradiance_{grid_point}_lag_{lag}')" for lag in range(1, 3)})
        std_to_ak_feature_map.update({f'temperature_{site}_{grid_point}_lag_{lag}': f"('{site}', 'temperature_{grid_point}_lag_{lag}')" for lag in range(1, 3)})
        std_to_ak_feature_map.update({f'direction_{site}_{grid_point}': f"('{site}', 'direction_{grid_point}')"})
        std_to_ak_feature_map.update({f'speed_{site}_{grid_point}': f"('{site}', 'speed_{grid_point}')"})
            
unmatched_ak = sorted(list(set(ak_feature_cols) - set(std_to_ak_feature_map.values())))
unmatched = sorted(list(set(unmatched) - set(std_to_ak_feature_map.keys())))

len(std_to_ak_feature_map)

142

In [47]:
from IPython.display import JSON

JSON([std_to_ak_feature_map])

<IPython.core.display.JSON object>

In [48]:
sorted(list(set(feature_cols) - set(std_to_ak_feature_map.keys())))

['solar_irradiance_mousehole_max___diff',
 'solar_irradiance_mousehole_max___ewm__alpha_0.9__mean',
 'solar_irradiance_mousehole_min___diff',
 'solar_irradiance_mousehole_min___ewm__alpha_0.9__mean',
 'solar_irradiance_staplegrove_max___diff',
 'solar_irradiance_staplegrove_max___ewm__alpha_0.9__mean',
 'solar_irradiance_staplegrove_min___diff',
 'solar_irradiance_staplegrove_min___ewm__alpha_0.9__mean',
 'value']

In [49]:
{
    'solar_irradiance_mousehole_max___diff': "('mousehole', 'solar_max_grad')",
    'solar_irradiance_mousehole_max___ewm__alpha_0.9__mean': '',
    'solar_irradiance_mousehole_min___diff': '',
    'solar_irradiance_mousehole_min___ewm__alpha_0.9__mean': '',
    'solar_irradiance_staplegrove_max___diff': '',
    'solar_irradiance_staplegrove_max___ewm__alpha_0.9__mean': '',
    'solar_irradiance_staplegrove_min___diff': '',
    'solar_irradiance_staplegrove_min___ewm__alpha_0.9__mean': '',
    'value': ''
}

['solar_irradiance_mousehole_max___diff',
 'solar_irradiance_mousehole_max___ewm__alpha_0.9__mean',
 'solar_irradiance_mousehole_min___diff',
 'solar_irradiance_mousehole_min___ewm__alpha_0.9__mean',
 'solar_irradiance_staplegrove_max___diff',
 'solar_irradiance_staplegrove_max___ewm__alpha_0.9__mean',
 'solar_irradiance_staplegrove_min___diff',
 'solar_irradiance_staplegrove_min___ewm__alpha_0.9__mean',
 'value']

In [50]:
unmatched_ak

["('mousehole', 'solar_max_grad')",
 "('mousehole', 'solar_max_roll')",
 "('mousehole', 'solar_mean_grad')",
 "('mousehole', 'solar_mean_roll')",
 "('mousehole', 'solar_range')",
 "('staplegrove', 'solar_max_grad')",
 "('staplegrove', 'solar_max_roll')",
 "('staplegrove', 'solar_mean_grad')",
 "('staplegrove', 'solar_mean_roll')",
 "('staplegrove', 'solar_range')",
 'Minute',
 'value',
 'value_roll_max',
 'value_roll_mean_fast',
 'value_roll_mean_slow',
 'value_roll_min',
 'value_roll_std']

In [14]:
#hide
from nbdev.export import notebook2script
    
notebook2script()

Converted 01-data-processing.ipynb.
Converted 02-spatial-analysis.ipynb.
Converted 03a-feature-generation.ipynb.
Converted 03b-eda.ipynb.
Converted 04-model-suite.ipynb.
Converted 05-custom-models.ipynb.
Converted 06-submission.ipynb.
Converted 07-experiments.ipynb.
Converted 08-testing.ipynb.
