# Import libraries

In [None]:
import pandas as pd
import numpy as np

pd.set_option('display.max_columns', 100)
pd.set_option('display.max_rows', 250)

import warnings
warnings.filterwarnings("ignore")

# Define feature engineering pipeline

In [2]:
non_feature_cols = ['site','timestamp','demand_response','demand_response_capacity','date','busday','time','minute']
base_features = ['temp','irr','power']
target_cls = 'demand_response'
target_reg = 'demand_response_capacity'
working_hours = [10,11,12,13,14,15,16,17]

def engineer_features(df):
    df['timestamp'] = pd.to_datetime(df['timestamp'])
    df['busday'] = np.is_busday(df['timestamp'].to_numpy().astype("datetime64[D]")).astype(int)
    df['date'] = pd.to_datetime(df['timestamp'].dt.date)
    df['day_of_week'] = df['date'].dt.weekday
    df['week'] = df['timestamp'].dt.isocalendar().week.astype(int)
    df['hour'] = df['timestamp'].dt.hour
    df['time'] = df['timestamp'].dt.time
    df['quarter_hour'] = df['timestamp'].dt.minute//15
    df['month'] = df['timestamp'].dt.month
    df['year'] = df['timestamp'].dt.year
    df['season'] = 0
    df.loc[df['month'].isin([12,1,2]), 'season'] = 0
    df.loc[df['month'].isin([3,4,5]), 'season'] = 1
    df.loc[df['month'].isin([6,7,8]), 'season'] = 2
    df.loc[df['month'].isin([9,10,11]), 'season'] = 3
    df['working_hours'] = 0
    df.loc[df['hour'].isin(working_hours), 'working_hours'] = 1

    daily_totals = df[df['working_hours'] == 1].groupby(['site','date'])['power'].transform('sum')
    df['power_share'] = 0
    df.loc[df['working_hours'] == 1, 'power_share'] = df.loc[df['working_hours'] == 1, 'power'] / (daily_totals + 1e-8)

    for var in ['temp', 'irr']:
        # per-site, month, time correlation of power with var
        corr_map = df.groupby(['site','month','time','year']).apply(
            lambda g: g['power'].corr(g[var])
        ).rename(f'{var}_power_corr')

        df = df.merge(corr_map, on=['site','month','time','year'], how='left')

        # median of var per site, month, time
        med_map = df.groupby(['site','month','time','year'])[var].median().rename(f'{var}_median')
        df = df.merge(med_map, on=['site','month','time','year'], how='left')

        # interaction term
        df[f'{var}_corr_dev'] = df[f'{var}_power_corr'] * (df[var] - df[f'{var}_median'])

        # clean up
        df.drop(columns=[f'{var}_median'], inplace=True)

    df['mean_usg_residual'] = df['power'] - df.groupby(['site','season','day_of_week','time','year'])['power'].transform('mean')

    for feature in ['power','power_share','mean_usg_residual']:
        # group stats
        df[f'{feature}_mean_mt'] = df.groupby(['site','month','day_of_week','time','year'])[feature].transform('mean')
        df[f'{feature}_std_mt'] = df.groupby(['site','month','day_of_week','time','year'])[feature].transform('std')
        df[f'{feature}_zscore_mt'] = (df[feature] - df[f'{feature}_mean_mt']) / (df[f'{feature}_std_mt'] + 1e-8)

        df[f'{feature}_mean_mh'] = df.groupby(['site','month','day_of_week','hour','year'])[feature].transform('mean')
        df[f'{feature}_std_mh'] = df.groupby(['site','month','day_of_week','hour','year'])[feature].transform('std')
        df[f'{feature}_zscore_mh'] = (df[feature] - df[f'{feature}_mean_mh']) / (df[f'{feature}_std_mh'] + 1e-8)

        df[f'{feature}_mean_st'] = df.groupby(['site','season','day_of_week','time','year'])[feature].transform('mean')
        df[f'{feature}_std_st'] = df.groupby(['site','season','day_of_week','time','year'])[feature].transform('std')
        df[f'{feature}_zscore_st'] = (df[feature] - df[f'{feature}_mean_st']) / (df[f'{feature}_std_st'] + 1e-8)

        df[f'{feature}_mean_sh'] = df.groupby(['site','season','day_of_week','hour','year'])[feature].transform('mean')
        df[f'{feature}_std_sh'] = df.groupby(['site','season','day_of_week','hour','year'])[feature].transform('std')
        df[f'{feature}_zscore_sh'] = (df[feature] - df[f'{feature}_mean_sh']) / (df[f'{feature}_std_sh'] + 1e-8)

    features_to_bin = [
        'power_zscore_mt','power_zscore_mh','power_zscore_st','power_zscore_sh',
        'power_share_zscore_mt','power_share_zscore_mh','power_share_zscore_st','power_share_zscore_sh',
        'mean_usg_residual_zscore_mt','mean_usg_residual_zscore_mh','mean_usg_residual_zscore_st','mean_usg_residual_zscore_sh',
        ]
    
    for feature in features_to_bin:
        df[f"{feature}_diff"] = df.groupby(['site','year'])[feature].diff().fillna(0)
        df[f"{feature}_diff_t"] = df.groupby(['site','time','year'])[feature].diff().fillna(0)
        df[f"{feature}_diff_wdt"] = df.groupby(['site','time','day_of_week','year'])[feature].diff().fillna(0)
        df[f"{feature}_hourly_std"]  = df.groupby(['site','date','hour','year'])[feature].transform(lambda x: x.std())

        # Peek
        df[f"{feature}_peek_diff"] = df.groupby(['site','year'])[feature].transform(lambda x: x.shift(-1) - x).fillna(0)
        df[f"{feature}_peek_diff_t"] = df.groupby(['site','time','year'])[feature].transform(lambda x: x.shift(-1) - x).fillna(0)
        
        # Lag
        df[f"{feature}_lag_diff"] = df.groupby(['site','year'])[feature].transform(lambda x: x.shift(1) - x).fillna(0)
        df[f"{feature}_lag_diff_t"] = df.groupby(['site','time','year'])[feature].transform(lambda x: x.shift(1) - x).fillna(0)

        for lag_val in [4]:
            df[f"{feature}_lag{lag_val}_diff"] = df.groupby(['site','year'])[feature].transform(lambda x: x.shift(lag_val) - x).fillna(0)
            df[f"{feature}_lag{lag_val}_diff_t"] = df.groupby(['site','time','year'])[feature].transform(lambda x: x.shift(lag_val) - x).fillna(0)
            df[f"{feature}_peek{lag_val}_diff"] = df.groupby(['site','year'])[feature].transform(lambda x: x.shift(-lag_val) - x).fillna(0)
            df[f"{feature}_peek{lag_val}_diff_t"] = df.groupby(['site','time','year'])[feature].transform(lambda x: x.shift(-lag_val) - x).fillna(0)

    return df.fillna(0)

# Feature engineer sites A,B,C,D for training the classifier later

In [3]:
df_ABC = pd.read_csv('./data/sites_ABC.csv')
df_ABC.columns = ['site','timestamp','temp','irr','power','demand_response','demand_response_capacity']

df_DEF = pd.read_csv('./data/sites_DEF.csv')
df_DEF.columns = ['site','timestamp','temp','irr','power','demand_response']

### Combine ABC + D
df_ABCD = pd.concat(
    [df_ABC, df_DEF[df_DEF['site']=='siteD']],
)[['site','timestamp','temp','irr','power','demand_response']]

In [4]:
df_ABCD_new = engineer_features(df_ABCD)

df_ABCD_new.head()

Unnamed: 0,site,timestamp,temp,irr,power,demand_response,busday,date,day_of_week,week,hour,time,quarter_hour,month,year,season,working_hours,power_share,temp_power_corr,temp_corr_dev,irr_power_corr,irr_corr_dev,mean_usg_residual,power_mean_mt,power_std_mt,power_zscore_mt,power_mean_mh,power_std_mh,power_zscore_mh,power_mean_st,power_std_st,power_zscore_st,power_mean_sh,power_std_sh,power_zscore_sh,power_share_mean_mt,power_share_std_mt,power_share_zscore_mt,power_share_mean_mh,power_share_std_mh,power_share_zscore_mh,power_share_mean_st,power_share_std_st,power_share_zscore_st,power_share_mean_sh,power_share_std_sh,power_share_zscore_sh,mean_usg_residual_mean_mt,mean_usg_residual_std_mt,mean_usg_residual_zscore_mt,...,power_share_zscore_sh_peek4_diff,power_share_zscore_sh_peek4_diff_t,mean_usg_residual_zscore_mt_diff,mean_usg_residual_zscore_mt_diff_t,mean_usg_residual_zscore_mt_diff_wdt,mean_usg_residual_zscore_mt_hourly_std,mean_usg_residual_zscore_mt_peek_diff,mean_usg_residual_zscore_mt_peek_diff_t,mean_usg_residual_zscore_mt_lag_diff,mean_usg_residual_zscore_mt_lag_diff_t,mean_usg_residual_zscore_mt_lag4_diff,mean_usg_residual_zscore_mt_lag4_diff_t,mean_usg_residual_zscore_mt_peek4_diff,mean_usg_residual_zscore_mt_peek4_diff_t,mean_usg_residual_zscore_mh_diff,mean_usg_residual_zscore_mh_diff_t,mean_usg_residual_zscore_mh_diff_wdt,mean_usg_residual_zscore_mh_hourly_std,mean_usg_residual_zscore_mh_peek_diff,mean_usg_residual_zscore_mh_peek_diff_t,mean_usg_residual_zscore_mh_lag_diff,mean_usg_residual_zscore_mh_lag_diff_t,mean_usg_residual_zscore_mh_lag4_diff,mean_usg_residual_zscore_mh_lag4_diff_t,mean_usg_residual_zscore_mh_peek4_diff,mean_usg_residual_zscore_mh_peek4_diff_t,mean_usg_residual_zscore_st_diff,mean_usg_residual_zscore_st_diff_t,mean_usg_residual_zscore_st_diff_wdt,mean_usg_residual_zscore_st_hourly_std,mean_usg_residual_zscore_st_peek_diff,mean_usg_residual_zscore_st_peek_diff_t,mean_usg_residual_zscore_st_lag_diff,mean_usg_residual_zscore_st_lag_diff_t,mean_usg_residual_zscore_st_lag4_diff,mean_usg_residual_zscore_st_lag4_diff_t,mean_usg_residual_zscore_st_peek4_diff,mean_usg_residual_zscore_st_peek4_diff_t,mean_usg_residual_zscore_sh_diff,mean_usg_residual_zscore_sh_diff_t,mean_usg_residual_zscore_sh_diff_wdt,mean_usg_residual_zscore_sh_hourly_std,mean_usg_residual_zscore_sh_peek_diff,mean_usg_residual_zscore_sh_peek_diff_t,mean_usg_residual_zscore_sh_lag_diff,mean_usg_residual_zscore_sh_lag_diff_t,mean_usg_residual_zscore_sh_lag4_diff,mean_usg_residual_zscore_sh_lag4_diff_t,mean_usg_residual_zscore_sh_peek4_diff,mean_usg_residual_zscore_sh_peek4_diff_t
0,siteA,2019-01-01 00:00:00,22.2,0.0,4.8,0.0,1,2019-01-01,1,1,0,00:00:00,0,1,2019,0,0,0.0,-0.004113,0.003701,0.0,0.0,4.085714,1.28,1.96774,1.788854,1.28,1.805722,1.949359,0.714286,1.175949,3.474396,0.714286,1.143428,3.573214,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.565714,1.96774,1.788854,...,0.0,0.0,0.0,0.0,0.0,0.0,0.0,-1.788854,0.0,0.0,0.0,0.0,0.0,-1.788854,0.0,0.0,0.0,0.0,0.0,-1.949359,0.0,0.0,0.0,0.0,0.0,-1.949359,0.0,0.0,0.0,0.0,0.0,-3.474396,0.0,0.0,0.0,0.0,0.0,-3.474396,0.0,0.0,0.0,0.0,0.0,-3.573214,0.0,0.0,0.0,0.0,0.0,-3.573214
1,siteA,2019-01-01 00:15:00,22.27,0.0,4.8,0.0,1,2019-01-01,1,1,0,00:15:00,1,1,2019,0,0,0.0,-0.021473,0.017178,0.0,0.0,4.085714,1.28,1.96774,1.788854,1.28,1.805722,1.949359,0.714286,1.175949,3.474396,0.714286,1.143428,3.573214,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.565714,1.96774,1.788854,...,0.0,0.0,0.0,0.0,0.0,0.0,0.0,-1.788854,0.0,0.0,0.0,0.0,0.0,-1.788854,0.0,0.0,0.0,0.0,0.0,-1.949359,0.0,0.0,0.0,0.0,0.0,-1.949359,0.0,0.0,0.0,0.0,0.0,-3.474396,0.0,0.0,0.0,0.0,0.0,-3.474396,0.0,0.0,0.0,0.0,0.0,-3.573214,0.0,0.0,0.0,0.0,0.0,-3.573214
2,siteA,2019-01-01 00:30:00,22.35,0.0,4.8,0.0,1,2019-01-01,1,1,0,00:30:00,2,1,2019,0,0,0.0,-0.040791,0.028553,0.0,0.0,4.085714,1.28,1.96774,1.788854,1.28,1.805722,1.949359,0.714286,1.175949,3.474396,0.714286,1.143428,3.573214,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.565714,1.96774,1.788854,...,0.0,0.0,0.0,0.0,0.0,0.0,0.0,-1.788854,0.0,0.0,0.0,0.0,0.0,-1.788854,0.0,0.0,0.0,0.0,0.0,-1.949359,0.0,0.0,0.0,0.0,0.0,-1.949359,0.0,0.0,0.0,0.0,0.0,-3.474396,0.0,0.0,0.0,0.0,0.0,-3.474396,0.0,0.0,0.0,0.0,0.0,-3.573214,0.0,0.0,0.0,0.0,0.0,-3.573214
3,siteA,2019-01-01 00:45:00,22.42,0.0,4.8,0.0,1,2019-01-01,1,1,0,00:45:00,3,1,2019,0,0,0.0,-0.058662,0.034024,0.0,0.0,4.085714,1.28,1.96774,1.788854,1.28,1.805722,1.949359,0.714286,1.175949,3.474396,0.714286,1.143428,3.573214,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.565714,1.96774,1.788854,...,0.0,0.0,0.0,0.0,0.0,0.0,0.0,-1.788854,0.0,0.0,0.0,0.0,0.0,-1.788854,0.0,0.0,0.0,0.0,0.0,-1.949359,0.0,0.0,0.0,0.0,0.0,-1.949359,0.0,0.0,0.0,0.0,0.0,-3.474396,0.0,0.0,0.0,0.0,0.0,-3.474396,0.0,0.0,0.0,0.0,0.0,-3.573214,0.0,0.0,0.0,0.0,0.0,-3.573214
4,siteA,2019-01-01 01:00:00,22.5,0.0,4.8,0.0,1,2019-01-01,1,1,1,01:00:00,0,1,2019,0,0,0.0,-0.078496,0.031398,0.0,0.0,4.085714,1.28,1.96774,1.788854,1.28,1.805722,1.949359,0.714286,1.175949,3.474396,0.714286,1.143428,3.573214,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.565714,1.96774,1.788854,...,0.0,0.0,0.0,0.0,0.0,0.0,0.0,-1.788854,0.0,0.0,0.0,0.0,0.0,-1.788854,0.0,0.0,0.0,0.0,0.0,-1.949359,0.0,0.0,0.0,0.0,0.0,-1.949359,0.0,0.0,0.0,0.0,0.0,-3.474396,0.0,0.0,0.0,0.0,0.0,-3.474396,0.0,0.0,0.0,0.0,0.0,-3.573214,0.0,0.0,0.0,0.0,0.0,-3.573214


In [5]:
df_ABCD_new.to_csv('./data/sites_ABCD_NewFeatures.csv')

# Feature engineer the test sites

In [6]:
del df_ABCD, df_ABCD_new

In [7]:
test_df = pd.read_csv('./data/test_data_v3.csv')
test_df.columns = ['site','timestamp','temp','irr','power']

test_df = engineer_features(test_df)

test_df.head()

Unnamed: 0,site,timestamp,temp,irr,power,busday,date,day_of_week,week,hour,time,quarter_hour,month,year,season,working_hours,power_share,temp_power_corr,temp_corr_dev,irr_power_corr,irr_corr_dev,mean_usg_residual,power_mean_mt,power_std_mt,power_zscore_mt,power_mean_mh,power_std_mh,power_zscore_mh,power_mean_st,power_std_st,power_zscore_st,power_mean_sh,power_std_sh,power_zscore_sh,power_share_mean_mt,power_share_std_mt,power_share_zscore_mt,power_share_mean_mh,power_share_std_mh,power_share_zscore_mh,power_share_mean_st,power_share_std_st,power_share_zscore_st,power_share_mean_sh,power_share_std_sh,power_share_zscore_sh,mean_usg_residual_mean_mt,mean_usg_residual_std_mt,mean_usg_residual_zscore_mt,mean_usg_residual_mean_mh,...,power_share_zscore_sh_peek4_diff,power_share_zscore_sh_peek4_diff_t,mean_usg_residual_zscore_mt_diff,mean_usg_residual_zscore_mt_diff_t,mean_usg_residual_zscore_mt_diff_wdt,mean_usg_residual_zscore_mt_hourly_std,mean_usg_residual_zscore_mt_peek_diff,mean_usg_residual_zscore_mt_peek_diff_t,mean_usg_residual_zscore_mt_lag_diff,mean_usg_residual_zscore_mt_lag_diff_t,mean_usg_residual_zscore_mt_lag4_diff,mean_usg_residual_zscore_mt_lag4_diff_t,mean_usg_residual_zscore_mt_peek4_diff,mean_usg_residual_zscore_mt_peek4_diff_t,mean_usg_residual_zscore_mh_diff,mean_usg_residual_zscore_mh_diff_t,mean_usg_residual_zscore_mh_diff_wdt,mean_usg_residual_zscore_mh_hourly_std,mean_usg_residual_zscore_mh_peek_diff,mean_usg_residual_zscore_mh_peek_diff_t,mean_usg_residual_zscore_mh_lag_diff,mean_usg_residual_zscore_mh_lag_diff_t,mean_usg_residual_zscore_mh_lag4_diff,mean_usg_residual_zscore_mh_lag4_diff_t,mean_usg_residual_zscore_mh_peek4_diff,mean_usg_residual_zscore_mh_peek4_diff_t,mean_usg_residual_zscore_st_diff,mean_usg_residual_zscore_st_diff_t,mean_usg_residual_zscore_st_diff_wdt,mean_usg_residual_zscore_st_hourly_std,mean_usg_residual_zscore_st_peek_diff,mean_usg_residual_zscore_st_peek_diff_t,mean_usg_residual_zscore_st_lag_diff,mean_usg_residual_zscore_st_lag_diff_t,mean_usg_residual_zscore_st_lag4_diff,mean_usg_residual_zscore_st_lag4_diff_t,mean_usg_residual_zscore_st_peek4_diff,mean_usg_residual_zscore_st_peek4_diff_t,mean_usg_residual_zscore_sh_diff,mean_usg_residual_zscore_sh_diff_t,mean_usg_residual_zscore_sh_diff_wdt,mean_usg_residual_zscore_sh_hourly_std,mean_usg_residual_zscore_sh_peek_diff,mean_usg_residual_zscore_sh_peek_diff_t,mean_usg_residual_zscore_sh_lag_diff,mean_usg_residual_zscore_sh_lag_diff_t,mean_usg_residual_zscore_sh_lag4_diff,mean_usg_residual_zscore_sh_lag4_diff_t,mean_usg_residual_zscore_sh_peek4_diff,mean_usg_residual_zscore_sh_peek4_diff_t
0,siteA,2020-01-01 00:00:00,19.89,0.0,12.0,1,2020-01-01,2,1,0,00:00:00,0,1,2020,0,0,0.0,0.056355,-0.152722,0.0,0.0,9.777778,3.2,4.91935,1.788854,3.2,4.514305,1.949359,2.222222,3.666667,2.666667,2.222222,3.506004,2.788867,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.977778,4.91935,1.788854,0.977778,...,0.0,0.0,0.0,0.0,0.0,0.0,0.0,-1.788854,0.0,0.0,0.0,0.0,0.0,-1.788854,0.0,0.0,0.0,0.0,0.0,-1.949359,0.0,0.0,0.0,0.0,0.0,-1.949359,0.0,0.0,0.0,0.0,0.0,-2.666667,0.0,0.0,0.0,0.0,0.0,-2.666667,0.0,0.0,0.0,0.0,0.0,-2.788867,0.0,0.0,0.0,0.0,0.0,-2.788867
1,siteA,2020-01-01 00:15:00,19.79,0.0,12.0,1,2020-01-01,2,1,0,00:15:00,1,1,2020,0,0,0.0,0.051247,-0.142466,0.0,0.0,9.777778,3.2,4.91935,1.788854,3.2,4.514305,1.949359,2.222222,3.666667,2.666667,2.222222,3.506004,2.788867,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.977778,4.91935,1.788854,0.977778,...,0.0,0.0,0.0,0.0,0.0,0.0,0.0,-1.788854,0.0,0.0,0.0,0.0,0.0,-1.788854,0.0,0.0,0.0,0.0,0.0,-1.949359,0.0,0.0,0.0,0.0,0.0,-1.949359,0.0,0.0,0.0,0.0,0.0,-2.666667,0.0,0.0,0.0,0.0,0.0,-2.666667,0.0,0.0,0.0,0.0,0.0,-2.788867,0.0,0.0,0.0,0.0,0.0,-2.788867
2,siteA,2020-01-01 00:30:00,19.7,0.0,12.0,1,2020-01-01,2,1,0,00:30:00,2,1,2020,0,0,0.0,0.045397,-0.129382,0.0,0.0,9.777778,3.2,4.91935,1.788854,3.2,4.514305,1.949359,2.222222,3.666667,2.666667,2.222222,3.506004,2.788867,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.977778,4.91935,1.788854,0.977778,...,0.0,0.0,0.0,0.0,0.0,0.0,0.0,-1.788854,0.0,0.0,0.0,0.0,0.0,-1.788854,0.0,0.0,0.0,0.0,0.0,-1.949359,0.0,0.0,0.0,0.0,0.0,-1.949359,0.0,0.0,0.0,0.0,0.0,-2.666667,0.0,0.0,0.0,0.0,0.0,-2.666667,0.0,0.0,0.0,0.0,0.0,-2.788867,0.0,0.0,0.0,0.0,0.0,-2.788867
3,siteA,2020-01-01 00:45:00,19.6,0.0,12.0,1,2020-01-01,2,1,0,00:45:00,3,1,2020,0,0,0.0,0.039868,-0.114421,0.0,0.0,9.777778,3.2,4.91935,1.788854,3.2,4.514305,1.949359,2.222222,3.666667,2.666667,2.222222,3.506004,2.788867,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.977778,4.91935,1.788854,0.977778,...,0.0,0.0,0.0,0.0,0.0,0.0,0.0,-1.788854,0.0,0.0,0.0,0.0,0.0,-1.788854,0.0,0.0,0.0,0.0,0.0,-1.949359,0.0,0.0,0.0,0.0,0.0,-1.949359,0.0,0.0,0.0,0.0,0.0,-2.666667,0.0,0.0,0.0,0.0,0.0,-2.666667,0.0,0.0,0.0,0.0,0.0,-2.788867,0.0,0.0,0.0,0.0,0.0,-2.788867
4,siteA,2020-01-01 01:00:00,19.5,0.0,12.0,1,2020-01-01,2,1,1,01:00:00,0,1,2020,0,0,0.0,0.033646,-0.09421,0.0,0.0,9.777778,3.2,4.91935,1.788854,3.2,4.514305,1.949359,2.222222,3.666667,2.666667,2.222222,3.506004,2.788867,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.977778,4.91935,1.788854,0.977778,...,0.0,0.0,0.0,0.0,0.0,0.0,0.0,-1.788854,0.0,0.0,0.0,0.0,0.0,-1.788854,0.0,0.0,0.0,0.0,0.0,-1.949359,0.0,0.0,0.0,0.0,0.0,-1.949359,0.0,0.0,0.0,0.0,0.0,-2.666667,0.0,0.0,0.0,0.0,0.0,-2.666667,0.0,0.0,0.0,0.0,0.0,-2.788867,0.0,0.0,0.0,0.0,0.0,-2.788867


In [8]:
test_df.to_csv('./data/test_sites_NewFeatures.csv')