In [1]:
import os
import sys
import time
import random
import warnings
import collections
from dateutil.relativedelta import relativedelta
from tqdm import tqdm
import numpy as np
import pandas as pd
import matplotlib.pyplot as plt
import matplotlib.gridspec as gridspec
import seaborn as sns
from sklearn.model_selection import train_test_split, cross_val_score, GridSearchCV
from sklearn.ensemble import RandomForestRegressor, GradientBoostingRegressor, BaggingRegressor, StackingRegressor, HistGradientBoostingRegressor 
from sklearn.linear_model import Ridge, Lasso, BayesianRidge, ElasticNet
from sklearn.preprocessing import OneHotEncoder

sys.path.append('../src')
import cb_utils

sns.set(style="darkgrid")
pd.options.display.max_columns = 500

%load_ext autoreload
%autoreload 2

In [2]:
# configuration
use_cache = True
seed = random.randint(0, 100)

print(f'Seed: {seed}')

Seed: 89


In [3]:
query = f"select * from junk.ip_features_all;"
ip_features_all = cb_utils.sql_query_to_df(query, use_cache=use_cache)

Pulled query from cache


In [4]:
ip_features_all.shape

(1444147, 140)

In [5]:
member_key = cb_utils.sql_query_to_df(f"select * from junk.ip_member_key;", use_cache=use_cache)

Pulled query from cache


### Mark good member periods

eligible members on day of id and first day of post

iterate from first month to total_months - pre + post_period, create pre/post for member if elg

In [6]:
pre_months = 12
post_months = 6
pre_post_months = pre_months + post_months

In [7]:
months = sorted(ip_features_all.eom.unique())
n_months = len(months)
last_valid_pre_start = n_months - pre_post_months # 42
months[:3], months[-3:], n_months, last_valid_pre_start 

([datetime.date(2017, 1, 31),
  datetime.date(2017, 2, 28),
  datetime.date(2017, 3, 31)],
 [datetime.date(2021, 10, 31),
  datetime.date(2021, 11, 30),
  datetime.date(2021, 12, 31)],
 60,
 42)

In [8]:
# create bool column flags to easily query what batches this can be in
# pres = {f'pre_{i}': False for i, _ in enumerate(months) if i < last_valid_pre_start}
# posts = {f'post_{i}': False for i, _ in enumerate(months) if i < last_valid_pre_start}
flags = {f'{prefix}_{i}': False for prefix in ['pre', 'post', 'pre_post_elg'] for i in range(n_months) if i < last_valid_pre_start}
ip_features_all = ip_features_all.assign(**flags);

  ip_features_all = ip_features_all.assign(**flags);
  ip_features_all = ip_features_all.assign(**flags);
  ip_features_all = ip_features_all.assign(**flags);
  ip_features_all = ip_features_all.assign(**flags);
  ip_features_all = ip_features_all.assign(**flags);
  ip_features_all = ip_features_all.assign(**flags);
  ip_features_all = ip_features_all.assign(**flags);
  ip_features_all = ip_features_all.assign(**flags);
  ip_features_all = ip_features_all.assign(**flags);
  ip_features_all = ip_features_all.assign(**flags);
  ip_features_all = ip_features_all.assign(**flags);
  ip_features_all = ip_features_all.assign(**flags);
  ip_features_all = ip_features_all.assign(**flags);
  ip_features_all = ip_features_all.assign(**flags);
  ip_features_all = ip_features_all.assign(**flags);
  ip_features_all = ip_features_all.assign(**flags);
  ip_features_all = ip_features_all.assign(**flags);
  ip_features_all = ip_features_all.assign(**flags);
  ip_features_all = ip_features_all.assign(**f

In [9]:
ifa = ip_features_all

In [10]:
# assign bool flags for each potential period
periods = []
for i in tqdm(range(last_valid_pre_start)):
    # Build date anchor points relative to start month
    pre_start = months[i]
    pre_end = months[i+11]
    # id_date = pre_end + relativedelta(days=1)
    
    post_start = None
    post_end = None
    if i + 17 < n_months:
        post_start = months[i+12]
        post_end = months[i+17]
        
    periods.append([i, pre_start, pre_end, post_start, post_end])

    # Determine elg members
    pre_elg = ifa.loc[(ifa.eom == pre_end) & (ifa.is_cb_eligible)].member_id.unique()
    post_elg = ifa.loc[(ifa.eom == post_start) & (ifa.is_cb_eligible)].member_id.unique()

    pre_post_elg_mems = np.intersect1d(pre_elg, post_elg)

    # Flag elg members for period i
    ifa.loc[(ifa.eom >= pre_start) & (ifa.eom <= pre_end) & (ifa.member_id.isin(pre_elg)), f'pre_{i}'] = True 
    ifa.loc[(ifa.eom >= post_start) & (ifa.eom <= post_end) & (ifa.member_id.isin(post_elg)), f'post_{i}'] = True 

    ifa.loc[(ifa.eom >= pre_start) & (ifa.eom <= post_end) & (ifa.member_id.isin(pre_post_elg_mems)), f'pre_post_elg_{i}'] = True 

100%|██████████████████████████████████████████████████████████████████████████████████████████████████| 42/42 [00:18<00:00,  2.23it/s]


In [11]:
periods_df = pd.DataFrame(periods, columns=['period', 'pre_start', 'pre_end', 'post_start', 'post_end'])

In [12]:
months_df = pd.DataFrame(months, columns=['eom'])

In [13]:
ifa.is_cb_eligible = ifa.is_cb_eligible.fillna(False)
ifa.is_unaligned = ifa.is_unaligned.fillna(False)
ifa = ifa.fillna(0)

In [14]:
ifa = ifa.assign(is_male=np.where(ifa.gender=='m',1,0))

In [15]:
# assign state
ifa = ifa.assign(state=ifa.mco_name.str.split(' ').apply(lambda x: x[1]).replace({'Centene': 'IA'}))

In [16]:
ifa.to_parquet('./data/member_periods_v4.parquet')

In [17]:
# ifa.to_csv('./data/member_periods.csv')

### Build features + targets

In [18]:
member_periods = pd.read_parquet('./data/member_periods_v4.parquet')

In [19]:
target_cols = ['ip_tc', 'er_tc', 'snf_tc', 'amb_tc']
##
tc_feats = [c for c in ifa.columns if '_tc' in c]
ddos_cols = ['ip_ddos', 'er_ddos', 'out_ddos', 'snf_ddos', 'icf_ddos', 'hh_ddos', 'amb_ddos', 'hsp_ddos', 'pro_ddos', 'spc_fac_ddos', 'dme_ddos', 'cls_ddos', 'hha_ddos']
top_level_feats = ['age', 'is_male', 'state', 'ggroup', 'line_of_business_id']

In [20]:
tc_dx_feats = [
 # 'rx_tc',
 # 'other_tc',
 # 'ip_tc',
 # 'er_tc',
 # 'out_tc',
 # 'snf_tc',
 # 'icf_tc',
 # 'hh_tc',
 # 'amb_tc',
 # 'hsp_tc',
 # 'pro_tc',
 # 'spc_fac_tc',
 # 'dme_tc',
 # 'cls_tc',
 # 'hha_tc',
 'hcbs_attdpcs_tc',
 'hcbs_other_tc',
 'hcbs_support_house_tc',
 'hcbs_adult_day_tc',
 'hcbs_pers_tc',
 'hcbs_assist_tech_tc',
 'oxygen_tc',
 'hosp_bed_tc',
 'chf_tc',
 'heart_tc',
 'copd_tc',
 'pulmonar_tc',
 'cancer_tc',
 'ckd_tc',
 'esrd_tc',
 'lipidy_tc',
 'diab_tc',
 'alzh_tc',
 'demented_tc',
 'stroke_tc',
 'hyper_tc',
 'fall_tc',
 'trans_tc',
 'liver_tc',
 'hippy_tc',
 'depressed_tc',
 'psycho_tc',
 'druggy_tc',
 'boozy_tc',
 'paralyzed_tc',
 'mono_tc',
 'mono_dom_tc',
 'hemi_tc',
 'hemi_dom_tc',
 'para_tc',
 'quad_tc',
 'tbi_tc',
 'obese_tc',
 'pressure_ulcer_tc',
 'hemophilia_tc']

In [21]:
# def build_member_features(mdf): 
mcos = member_periods.mco_name.unique().tolist()
mco_cols = [f'is_{m.lower().replace(" ", "_")}' for m in mcos]
n_mcos = len(mcos)
def encode_mco(mco_str):
    one_hot = np.zeros(n_mcos, dtype=int)
    one_hot[mcos.index(mco_str)] = 1 
    return one_hot

In [22]:
lobs = member_periods.line_of_business_id.unique().tolist()
lob_cols = [f'is_lob_{l}' for l in lobs]
n_lobs = len(lobs)
def encode_lob(lob):
    one_hot = np.zeros(n_lobs, dtype=int)
    one_hot[lobs.index(lob)] = 1 
    return one_hot

In [23]:
groups = member_periods.ggroup.unique().tolist()
group_cols = [f'is_group_{l}' for l in groups]
n_groups = len(groups)
def encode_group(group):
    one_hot = np.zeros(n_groups, dtype=int)
    one_hot[groups.index(group)] = 1 
    return one_hot

In [24]:
states = member_periods.state.unique().tolist()
state_cols = [f'is_state_{l}' for l in states]
n_states = len(states)
def encode_state(state):
    one_hot = np.zeros(n_states, dtype=int)
    one_hot[states.index(state)] = 1 
    return one_hot

In [25]:
wide_ddos_cols = [f'{c}_{i}' for i in range(pre_months) for c in ddos_cols]
wide_tc_dx_cols = [f'{c}_{i}' for i in range(pre_months) for c in tc_dx_feats]

In [26]:
def build_member_features(mdf, months_range):
# mdf = member_periods.loc[(member_periods.pre_0) & (member_periods.pre_full_0) & (member_periods.member_id == 102)].sort_values('eom')
    # print(months_range)

    if len(mdf) == 0:
        return mdf
        
    demographic_data = mdf[top_level_feats + ['member_id']].iloc[-1]
    
    mdf = months_range.merge(mdf, on='eom', how='left')
    mdf = mdf.sort_values('eom')[ddos_cols + tc_dx_feats]
    mdf = mdf.fillna(0)
    
    ddos_data = mdf.to_numpy().reshape([1, -1])

    state_data = encode_state(demographic_data.state)
    lob_data = encode_lob(demographic_data.line_of_business_id)
    group_data = encode_group(demographic_data.ggroup)
    data = np.concatenate((ddos_data[0], state_data, lob_data, group_data, np.array([demographic_data.is_male, demographic_data.age, demographic_data.member_id])), axis=0, dtype=float)
    cols = wide_ddos_cols + wide_tc_dx_cols + state_cols + lob_cols + group_cols + ['is_male', 'age', 'member_id']

    return pd.DataFrame([data], columns=cols)

In [27]:
# mdf = member_periods.loc[(member_periods.pre_0) & (member_periods.pre_full_0 == False) & (member_periods.member_id == 26)].sort_values('eom')

In [28]:
def build_member_targets(mdf):
    if len(mdf) == 0:
        return pd.DataFrame([], columns=['member_id', 'target'])
    tc = mdf[target_cols].sum().sum()
#     pmpm = tc / mdf.cpmm.sum()
    return pd.DataFrame([[mdf.iloc[0].member_id, tc]], columns=['member_id', 'target'])

In [29]:
def build_targets(post_df):
    return post_df.groupby('member_id', as_index=False).apply(build_member_targets)

In [30]:
def build_features(pre_df, months_range):
    return pre_df.groupby('member_id', as_index=False).apply(lambda x: build_member_features(x, months_range))

In [33]:
# build features and targets for each period
period_dfs = []
for i in tqdm(range(last_valid_pre_start)):
    elg = member_periods.loc[member_periods[f'pre_post_elg_{i}']] 
    pre = elg.loc[elg[f'pre_{i}']] 

    post = elg.loc[elg[f'post_{i}']] 
    x = build_features(pre, months_df.loc[i:i+11])
    # if i < 42:
    y = build_targets(post)
    final = x.merge(y, how='left', left_on='member_id', right_on='member_id').assign(period=i)
    # else:
        # final = x.assign(period=i)
    period_dfs.append(final)

100%|███████████████████████████████████████████████████████████████████████████████████████████████| 42/42 [2:13:56<00:00, 191.36s/it]


In [34]:
master_df = pd.concat(period_dfs)
master_df.shape

(974574, 671)

In [36]:
# master_df.to_parquet('./data/master_df.parquet')
# master_df.to_parquet('./data/master_ddos_df.parquet')
master_df.to_parquet('./data/master_wide_df_v4.parquet')

### Train/Val/Test split
Avoid any leakage by doing the splits at the member level

In [37]:
# master_df = pd.read_parquet('./data/master_df.parquet')
# master_df = pd.read_parquet('./data/master_ddos_df.parquet')
master_df = pd.read_parquet('./data/master_wide_df_v4.parquet')
# master_df = master_df.loc[master_df.period > 24]
# make dtype str for these categorical features
# master_df.ggroup = master_df.ggroup.astype(str)
# master_df.line_of_business_id = master_df.line_of_business_id.astype(str)

In [38]:
master_df.head()

Unnamed: 0,ip_ddos_0,er_ddos_0,out_ddos_0,snf_ddos_0,icf_ddos_0,hh_ddos_0,amb_ddos_0,hsp_ddos_0,pro_ddos_0,spc_fac_ddos_0,dme_ddos_0,cls_ddos_0,hha_ddos_0,ip_ddos_1,er_ddos_1,out_ddos_1,snf_ddos_1,icf_ddos_1,hh_ddos_1,amb_ddos_1,hsp_ddos_1,pro_ddos_1,spc_fac_ddos_1,dme_ddos_1,cls_ddos_1,hha_ddos_1,ip_ddos_2,er_ddos_2,out_ddos_2,snf_ddos_2,icf_ddos_2,hh_ddos_2,amb_ddos_2,hsp_ddos_2,pro_ddos_2,spc_fac_ddos_2,dme_ddos_2,cls_ddos_2,hha_ddos_2,ip_ddos_3,er_ddos_3,out_ddos_3,snf_ddos_3,icf_ddos_3,hh_ddos_3,amb_ddos_3,hsp_ddos_3,pro_ddos_3,spc_fac_ddos_3,dme_ddos_3,cls_ddos_3,hha_ddos_3,ip_ddos_4,er_ddos_4,out_ddos_4,snf_ddos_4,icf_ddos_4,hh_ddos_4,amb_ddos_4,hsp_ddos_4,pro_ddos_4,spc_fac_ddos_4,dme_ddos_4,cls_ddos_4,hha_ddos_4,ip_ddos_5,er_ddos_5,out_ddos_5,snf_ddos_5,icf_ddos_5,hh_ddos_5,amb_ddos_5,hsp_ddos_5,pro_ddos_5,spc_fac_ddos_5,dme_ddos_5,cls_ddos_5,hha_ddos_5,ip_ddos_6,er_ddos_6,out_ddos_6,snf_ddos_6,icf_ddos_6,hh_ddos_6,amb_ddos_6,hsp_ddos_6,pro_ddos_6,spc_fac_ddos_6,dme_ddos_6,cls_ddos_6,hha_ddos_6,ip_ddos_7,er_ddos_7,out_ddos_7,snf_ddos_7,icf_ddos_7,hh_ddos_7,amb_ddos_7,hsp_ddos_7,pro_ddos_7,spc_fac_ddos_7,dme_ddos_7,cls_ddos_7,hha_ddos_7,ip_ddos_8,er_ddos_8,out_ddos_8,snf_ddos_8,icf_ddos_8,hh_ddos_8,amb_ddos_8,hsp_ddos_8,pro_ddos_8,spc_fac_ddos_8,dme_ddos_8,cls_ddos_8,hha_ddos_8,ip_ddos_9,er_ddos_9,out_ddos_9,snf_ddos_9,icf_ddos_9,hh_ddos_9,amb_ddos_9,hsp_ddos_9,pro_ddos_9,spc_fac_ddos_9,dme_ddos_9,cls_ddos_9,hha_ddos_9,ip_ddos_10,er_ddos_10,out_ddos_10,snf_ddos_10,icf_ddos_10,hh_ddos_10,amb_ddos_10,hsp_ddos_10,pro_ddos_10,spc_fac_ddos_10,dme_ddos_10,cls_ddos_10,hha_ddos_10,ip_ddos_11,er_ddos_11,out_ddos_11,snf_ddos_11,icf_ddos_11,hh_ddos_11,amb_ddos_11,hsp_ddos_11,pro_ddos_11,spc_fac_ddos_11,dme_ddos_11,cls_ddos_11,hha_ddos_11,hcbs_attdpcs_tc_0,hcbs_other_tc_0,hcbs_support_house_tc_0,hcbs_adult_day_tc_0,hcbs_pers_tc_0,hcbs_assist_tech_tc_0,oxygen_tc_0,hosp_bed_tc_0,chf_tc_0,heart_tc_0,copd_tc_0,pulmonar_tc_0,cancer_tc_0,ckd_tc_0,esrd_tc_0,lipidy_tc_0,diab_tc_0,alzh_tc_0,demented_tc_0,stroke_tc_0,hyper_tc_0,fall_tc_0,trans_tc_0,liver_tc_0,hippy_tc_0,depressed_tc_0,psycho_tc_0,druggy_tc_0,boozy_tc_0,paralyzed_tc_0,mono_tc_0,mono_dom_tc_0,hemi_tc_0,hemi_dom_tc_0,para_tc_0,quad_tc_0,tbi_tc_0,obese_tc_0,pressure_ulcer_tc_0,hemophilia_tc_0,hcbs_attdpcs_tc_1,hcbs_other_tc_1,hcbs_support_house_tc_1,hcbs_adult_day_tc_1,hcbs_pers_tc_1,hcbs_assist_tech_tc_1,oxygen_tc_1,hosp_bed_tc_1,chf_tc_1,heart_tc_1,copd_tc_1,pulmonar_tc_1,cancer_tc_1,ckd_tc_1,esrd_tc_1,lipidy_tc_1,diab_tc_1,alzh_tc_1,demented_tc_1,stroke_tc_1,hyper_tc_1,fall_tc_1,trans_tc_1,liver_tc_1,hippy_tc_1,depressed_tc_1,psycho_tc_1,druggy_tc_1,boozy_tc_1,paralyzed_tc_1,mono_tc_1,mono_dom_tc_1,hemi_tc_1,hemi_dom_tc_1,para_tc_1,quad_tc_1,tbi_tc_1,obese_tc_1,pressure_ulcer_tc_1,hemophilia_tc_1,hcbs_attdpcs_tc_2,hcbs_other_tc_2,hcbs_support_house_tc_2,hcbs_adult_day_tc_2,hcbs_pers_tc_2,hcbs_assist_tech_tc_2,oxygen_tc_2,hosp_bed_tc_2,chf_tc_2,heart_tc_2,copd_tc_2,pulmonar_tc_2,cancer_tc_2,ckd_tc_2,...,depressed_tc_6,psycho_tc_6,druggy_tc_6,boozy_tc_6,paralyzed_tc_6,mono_tc_6,mono_dom_tc_6,hemi_tc_6,hemi_dom_tc_6,para_tc_6,quad_tc_6,tbi_tc_6,obese_tc_6,pressure_ulcer_tc_6,hemophilia_tc_6,hcbs_attdpcs_tc_7,hcbs_other_tc_7,hcbs_support_house_tc_7,hcbs_adult_day_tc_7,hcbs_pers_tc_7,hcbs_assist_tech_tc_7,oxygen_tc_7,hosp_bed_tc_7,chf_tc_7,heart_tc_7,copd_tc_7,pulmonar_tc_7,cancer_tc_7,ckd_tc_7,esrd_tc_7,lipidy_tc_7,diab_tc_7,alzh_tc_7,demented_tc_7,stroke_tc_7,hyper_tc_7,fall_tc_7,trans_tc_7,liver_tc_7,hippy_tc_7,depressed_tc_7,psycho_tc_7,druggy_tc_7,boozy_tc_7,paralyzed_tc_7,mono_tc_7,mono_dom_tc_7,hemi_tc_7,hemi_dom_tc_7,para_tc_7,quad_tc_7,tbi_tc_7,obese_tc_7,pressure_ulcer_tc_7,hemophilia_tc_7,hcbs_attdpcs_tc_8,hcbs_other_tc_8,hcbs_support_house_tc_8,hcbs_adult_day_tc_8,hcbs_pers_tc_8,hcbs_assist_tech_tc_8,oxygen_tc_8,hosp_bed_tc_8,chf_tc_8,heart_tc_8,copd_tc_8,pulmonar_tc_8,cancer_tc_8,ckd_tc_8,esrd_tc_8,lipidy_tc_8,diab_tc_8,alzh_tc_8,demented_tc_8,stroke_tc_8,hyper_tc_8,fall_tc_8,trans_tc_8,liver_tc_8,hippy_tc_8,depressed_tc_8,psycho_tc_8,druggy_tc_8,boozy_tc_8,paralyzed_tc_8,mono_tc_8,mono_dom_tc_8,hemi_tc_8,hemi_dom_tc_8,para_tc_8,quad_tc_8,tbi_tc_8,obese_tc_8,pressure_ulcer_tc_8,hemophilia_tc_8,hcbs_attdpcs_tc_9,hcbs_other_tc_9,hcbs_support_house_tc_9,hcbs_adult_day_tc_9,hcbs_pers_tc_9,hcbs_assist_tech_tc_9,oxygen_tc_9,hosp_bed_tc_9,chf_tc_9,heart_tc_9,copd_tc_9,pulmonar_tc_9,cancer_tc_9,ckd_tc_9,esrd_tc_9,lipidy_tc_9,diab_tc_9,alzh_tc_9,demented_tc_9,stroke_tc_9,hyper_tc_9,fall_tc_9,trans_tc_9,liver_tc_9,hippy_tc_9,depressed_tc_9,psycho_tc_9,druggy_tc_9,boozy_tc_9,paralyzed_tc_9,mono_tc_9,mono_dom_tc_9,hemi_tc_9,hemi_dom_tc_9,para_tc_9,quad_tc_9,tbi_tc_9,obese_tc_9,pressure_ulcer_tc_9,hemophilia_tc_9,hcbs_attdpcs_tc_10,hcbs_other_tc_10,hcbs_support_house_tc_10,hcbs_adult_day_tc_10,hcbs_pers_tc_10,hcbs_assist_tech_tc_10,oxygen_tc_10,hosp_bed_tc_10,chf_tc_10,heart_tc_10,copd_tc_10,pulmonar_tc_10,cancer_tc_10,ckd_tc_10,esrd_tc_10,lipidy_tc_10,diab_tc_10,alzh_tc_10,demented_tc_10,stroke_tc_10,hyper_tc_10,fall_tc_10,trans_tc_10,liver_tc_10,hippy_tc_10,depressed_tc_10,psycho_tc_10,druggy_tc_10,boozy_tc_10,paralyzed_tc_10,mono_tc_10,mono_dom_tc_10,hemi_tc_10,hemi_dom_tc_10,para_tc_10,quad_tc_10,tbi_tc_10,obese_tc_10,pressure_ulcer_tc_10,hemophilia_tc_10,hcbs_attdpcs_tc_11,hcbs_other_tc_11,hcbs_support_house_tc_11,hcbs_adult_day_tc_11,hcbs_pers_tc_11,hcbs_assist_tech_tc_11,oxygen_tc_11,hosp_bed_tc_11,chf_tc_11,heart_tc_11,copd_tc_11,pulmonar_tc_11,cancer_tc_11,ckd_tc_11,esrd_tc_11,lipidy_tc_11,diab_tc_11,alzh_tc_11,demented_tc_11,stroke_tc_11,hyper_tc_11,fall_tc_11,trans_tc_11,liver_tc_11,hippy_tc_11,depressed_tc_11,psycho_tc_11,druggy_tc_11,boozy_tc_11,paralyzed_tc_11,mono_tc_11,mono_dom_tc_11,hemi_tc_11,hemi_dom_tc_11,para_tc_11,quad_tc_11,tbi_tc_11,obese_tc_11,pressure_ulcer_tc_11,hemophilia_tc_11,is_state_TN,is_state_FL,is_state_TX,is_state_KS,is_state_IA,is_lob_1,is_lob_3,is_lob_2,is_lob_8,is_group_0.0,is_group_3.0,is_group_2.0,is_group_1.0,is_group_-1.0,is_group_5.0,is_group_6.0,is_group_4.0,is_group_8.0,is_group_14.0,is_group_16.0,is_group_7.0,is_group_11.0,is_group_15.0,is_group_20.0,is_group_12.0,is_group_13.0,is_group_9.0,is_group_21.0,is_group_18.0,is_group_10.0,is_male,age,member_id,target,period
0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,1.0,0.0,0.0,482.6,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,1.0,0.0,0.0,0.0,0.0,0.0,2.0,0.0,2.0,0.0,0.0,614.68,29.65,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,356.68,0.0,0.0,0.0,0.0,356.68,0.0,0.0,0.0,170.86,0.0,0.0,0.0,3.0,0.0,0.0,7.0,0.0,0.0,3.0,0.0,1.0,0.0,0.0,436.88,9970.08,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,...,0.0,0.0,0.0,0.0,0.0,1.0,0.0,0.0,0.0,0.0,0.0,1.0,0.0,1.0,0.0,0.0,41.04,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,197.62,0.0,0.0,0.0,0.0,197.62,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,10.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,82.08,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,9.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,82.08,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,1.0,0.0,0.0,9.0,0.0,0.0,1.0,0.0,0.0,0.0,0.0,30.78,29.65,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,217.6,0.0,0.0,0.0,0.0,217.6,0.0,0.0,0.0,0.0,0.0,1.0,0.0,0.0,0.0,0.0,1.0,0.0,0.0,0.0,0.0,0.0,1.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,1.0,35.0,26.0,0.0,0
1,0.0,0.0,2.0,0.0,0.0,0.0,13.0,0.0,2.0,0.0,11.0,0.0,0.0,622.72,29.65,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,393.88,0.0,0.0,4413.42,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,2.0,0.0,0.0,0.0,7.0,0.0,3.0,0.0,12.0,0.0,0.0,578.24,29.65,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,1275.7,0.0,0.0,5813.68,1189.06,1189.06,1279.24,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,13.0,0.0,0.0,0.0,9.0,0.0,8.0,0.0,7.0,0.0,0.0,644.96,118.61,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,86.64,0.0,0.0,4930.47,0.0,0.0,1448.37,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,15.0,0.0,0.0,0.0,13.0,0.0,3.0,0.0,1.0,0.0,0.0,611.6,96.37,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,288.52,0.0,0.0,5104.99,0.0,0.0,463.35,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,9.0,13.0,0.0,0.0,0.0,4.0,0.0,10.0,0.0,1.0,0.0,0.0,500.4,163.09,0.0,0.0,0.0,0.0,0.0,0.0,0.0,1412.75,1011.22,0.0,0.0,7496.75,3921.38,0.0,4323.76,0.0,0.0,0.0,1412.75,0.0,0.0,0.0,0.0,...,0.0,0.0,0.0,0.0,1.0,15.0,0.0,0.0,0.0,1.0,0.0,5.0,0.0,2.0,0.0,0.0,633.84,29.95,0.0,0.0,0.0,0.0,0.0,0.0,301.08,0.0,1543.61,0.0,0.0,4948.99,0.0,0.0,1683.01,0.0,0.0,0.0,1214.73,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,1.0,15.0,0.0,0.0,0.0,0.0,0.0,4.0,0.0,2.0,0.0,0.0,600.48,29.95,0.0,0.0,0.0,0.0,0.0,0.0,417.21,417.21,328.88,0.0,0.0,4822.87,417.21,0.0,587.64,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,16.0,0.0,0.0,0.0,2.0,0.0,5.0,0.0,2.0,0.0,0.0,622.72,29.95,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,87.52,0.0,0.0,3513.2,0.0,0.0,1303.47,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,15.0,0.0,0.0,0.0,2.0,0.0,3.0,0.0,3.0,0.0,0.0,633.84,29.65,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,129.1,0.0,0.0,3144.16,0.0,0.0,715.11,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,1.0,0.0,0.0,0.0,0.0,1.0,0.0,0.0,0.0,0.0,1.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,64.0,102.0,22807.25,0
2,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,...,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,2.0,0.0,0.0,2.0,0.0,1.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,121.62,0.0,0.0,0.0,0.0,0.0,0.0,0.0,154.78,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,1.0,0.0,0.0,0.0,0.0,1.0,0.0,0.0,0.0,0.0,0.0,1.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,71.0,115.0,0.0,0
3,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,5.0,0.0,0.0,0.0,0.0,0.0,0.0,1214.5,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,1.0,0.0,0.0,0.0,0.0,0.0,3.0,0.0,1.0,0.0,0.0,0.0,0.0,1250.7,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,116.82,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,3.0,0.0,1.0,0.0,0.0,0.0,0.0,1151.75,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,125.44,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,2.0,0.0,0.0,0.0,0.0,0.0,0.0,1255.2,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,2.0,0.0,1.0,0.0,0.0,0.0,0.0,1375.16,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,6.2,0.0,0.0,86.82,0.0,0.0,0.0,0.0,0.0,...,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,6.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,1189.09,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,9.0,0.0,0.0,1.0,0.0,0.0,0.0,0.0,0.0,0.0,1225.5,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,8.0,0.0,0.0,5.0,0.0,1.0,0.0,0.0,0.0,0.0,1492.08,0.0,0.0,0.0,0.0,0.0,0.0,0.0,183.15,0.0,0.0,0.0,0.0,0.0,118.01,0.0,0.0,118.01,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,7.0,0.0,0.0,2.0,0.0,2.0,0.0,0.0,0.0,0.0,1230.82,0.0,0.0,0.0,0.0,0.0,0.0,0.0,7.97,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,1.0,0.0,0.0,0.0,0.0,1.0,0.0,0.0,0.0,0.0,0.0,1.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,63.0,185.0,4085.0,0
4,0.0,0.0,1.0,0.0,0.0,1.0,0.0,0.0,1.0,0.0,0.0,0.0,0.0,335.28,214.83,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,131.28,0.0,0.0,0.0,131.28,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,1.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,330.2,194.04,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,1.0,0.0,0.0,2.0,0.0,0.0,0.0,0.0,563.88,214.83,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,2.0,0.0,0.0,1.0,0.0,1.0,0.0,0.0,650.24,207.9,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,3220.14,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,3220.14,0.0,0.0,0.0,0.0,0.0,0.0,0.0,3.0,0.0,0.0,1.0,0.0,0.0,0.0,0.0,751.84,214.83,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,183.06,0.0,0.0,0.0,0.0,...,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,1.0,0.0,0.0,1.0,0.0,0.0,0.0,0.0,143.64,207.9,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,2.0,0.0,0.0,0.0,0.0,430.92,200.97,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,404.52,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,5.0,0.0,0.0,0.0,0.0,471.96,207.9,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,243.26,0.0,0.0,0.0,0.0,0.0,463.26,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,3.0,0.0,0.0,0.0,0.0,553.44,215.04,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,323.54,106.61,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,1.0,0.0,0.0,0.0,0.0,1.0,0.0,0.0,0.0,0.0,0.0,1.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,1.0,64.0,200.0,0.0,0


### One hot enc

In [39]:
# cat_feats = ['gender', 'mco_name', 'ggroup', 'line_of_business_id']
# one_hots = pd.get_dummies(master_df[cat_feats])
# master_df = pd.concat([master_df, one_hots], axis=1).drop(columns=cat_feats)

In [40]:
member_ids = master_df.member_id.unique()
n_members = len(member_ids)
n_members

42219

In [41]:
train_n = int(n_members * .7)
val_n = int(n_members * .15)
test_n = n_members - train_n - val_n
train_n, val_n, test_n

(29553, 6332, 6334)

In [42]:
np.random.shuffle(member_ids)

In [43]:
train_mems, val_mems, test_mems = np.split(member_ids, [train_n, train_n + val_n])
assert train_mems.shape[0] == train_n
assert val_mems.shape[0] == val_n
assert test_mems.shape[0] == test_n

In [44]:
training_df = master_df.loc[master_df.member_id.isin(train_mems)]
val_df = master_df.loc[master_df.member_id.isin(val_mems)]
test_df = master_df.loc[master_df.member_id.isin(test_mems)]

### Normalize/encode features if needed
not needed for trees, most linear models will do it for you if you pass the param

### Training

In [45]:
x_cols = [c for c in training_df.columns if c not in ['member_id', 'target', 'period']]
# x_cols = [c for c in training_df.columns if c not in ['member_id', 'target', 'period'] + cat_feats]
x = training_df[x_cols]
y = training_df.target

In [46]:
ridge = Ridge(alpha=1, normalize=True)

In [47]:
ridge.fit(x, y)

If you wish to scale the data, use Pipeline with a StandardScaler in a preprocessing stage. To reproduce the previous behavior:

from sklearn.pipeline import make_pipeline

model = make_pipeline(StandardScaler(with_mean=False), Ridge())

If you wish to pass a sample_weight parameter, you need to pass it as a fit parameter to each step of the pipeline as follows:

kwargs = {s[0] + '__sample_weight': sample_weight for s in model.steps}
model.fit(X, y, **kwargs)

Set parameter alpha to: original_alpha * n_samples. 


Ridge(alpha=1, normalize=True)

In [48]:
ridge.score(x,y)

0.1484452975138304

In [49]:
val_x = val_df[x_cols]
val_y = val_df.target
ridge.score(val_x,val_y)

0.14251581427374604

In [51]:
histr = HistGradientBoostingRegressor()
histr.fit(x, y)

HistGradientBoostingRegressor()

In [52]:
histr.score(x, y)

0.2574800220536849

In [53]:
histr.score(val_x, val_y)

0.15944613612012726

In [None]:
val_df.head()

In [None]:
train_preds = histr.predict(x)
val_preds = histr.predict(val_x)

In [None]:
train_w_preds = training_df.assign(pred=train_preds, sample='train').merge(periods_df, on='period')
val_w_preds = val_df.assign(pred=val_preds, sample='validation').merge(periods_df, on='period')

In [None]:
out_cols = ['member_id', 'sample', 'target', 'pred', 'period', 'pre_start', 'pre_end', 'post_start', 'post_end']

In [None]:
pd.concat([train_w_preds[out_cols], val_w_preds[out_cols]]).to_csv('hgbr_12_mom_ddos.csv', index=False)

### Feature importance

In [50]:
sorted(zip(ridge.coef_, x.columns))

[(-744.3443821147873, 'is_group_14.0'),
 (-535.2269592142112, 'is_group_16.0'),
 (-493.3166853500218, 'is_group_7.0'),
 (-471.47293173355564, 'is_group_9.0'),
 (-425.8762239209822, 'is_group_18.0'),
 (-423.87695606167006, 'is_lob_1'),
 (-422.8530853784313, 'is_group_12.0'),
 (-373.8904176673686, 'mono_dom_tc_10'),
 (-352.7727375992785, 'is_group_4.0'),
 (-335.504459064428, 'is_state_IA'),
 (-297.4944238340344, 'is_group_6.0'),
 (-250.43629316514293, 'is_group_13.0'),
 (-237.3540535612524, 'hemi_dom_tc_2'),
 (-225.71405298750608, 'hyper_tc_1'),
 (-215.39051765550442, 'hosp_bed_tc_0'),
 (-214.33157838140224, 'oxygen_tc_4'),
 (-205.83750833139766, 'is_state_TN'),
 (-196.95483690942024, 'is_group_10.0'),
 (-188.90866167043063, 'demented_tc_9'),
 (-171.30118835783256, 'amb_ddos_8'),
 (-167.57347068886622, 'is_group_5.0'),
 (-160.96992437933153, 'stroke_tc_5'),
 (-158.02840512552805, 'hh_ddos_4'),
 (-128.33080691147197, 'is_group_15.0'),
 (-117.70336295260445, 'hemi_tc_6'),
 (-67.34123048952

In [None]:
from sklearn.inspection import permutation_importance

In [None]:
result = permutation_importance(histr, val_x, val_y, n_repeats=10,random_state=0, n_jobs=-1)

In [None]:
sorted(zip(result.importances_mean, val_x.columns))

### Write datasets for sagemaker

In [62]:
export_cols = ['target'] + [c for c in training_df.columns if c not in ['member_id', 'target', 'period']]
training_df[export_cols].head()

Unnamed: 0,target,ip_ddos_0,er_ddos_0,out_ddos_0,snf_ddos_0,icf_ddos_0,hh_ddos_0,amb_ddos_0,hsp_ddos_0,pro_ddos_0,spc_fac_ddos_0,dme_ddos_0,cls_ddos_0,hha_ddos_0,ip_ddos_1,er_ddos_1,out_ddos_1,snf_ddos_1,icf_ddos_1,hh_ddos_1,amb_ddos_1,hsp_ddos_1,pro_ddos_1,spc_fac_ddos_1,dme_ddos_1,cls_ddos_1,hha_ddos_1,ip_ddos_2,er_ddos_2,out_ddos_2,snf_ddos_2,icf_ddos_2,hh_ddos_2,amb_ddos_2,hsp_ddos_2,pro_ddos_2,spc_fac_ddos_2,dme_ddos_2,cls_ddos_2,hha_ddos_2,ip_ddos_3,er_ddos_3,out_ddos_3,snf_ddos_3,icf_ddos_3,hh_ddos_3,amb_ddos_3,hsp_ddos_3,pro_ddos_3,spc_fac_ddos_3,dme_ddos_3,cls_ddos_3,hha_ddos_3,ip_ddos_4,er_ddos_4,out_ddos_4,snf_ddos_4,icf_ddos_4,hh_ddos_4,amb_ddos_4,hsp_ddos_4,pro_ddos_4,spc_fac_ddos_4,dme_ddos_4,cls_ddos_4,hha_ddos_4,ip_ddos_5,er_ddos_5,out_ddos_5,snf_ddos_5,icf_ddos_5,hh_ddos_5,amb_ddos_5,hsp_ddos_5,pro_ddos_5,spc_fac_ddos_5,dme_ddos_5,cls_ddos_5,hha_ddos_5,ip_ddos_6,er_ddos_6,out_ddos_6,snf_ddos_6,icf_ddos_6,hh_ddos_6,amb_ddos_6,hsp_ddos_6,pro_ddos_6,spc_fac_ddos_6,dme_ddos_6,cls_ddos_6,hha_ddos_6,ip_ddos_7,er_ddos_7,out_ddos_7,snf_ddos_7,icf_ddos_7,hh_ddos_7,amb_ddos_7,hsp_ddos_7,pro_ddos_7,spc_fac_ddos_7,dme_ddos_7,cls_ddos_7,hha_ddos_7,ip_ddos_8,er_ddos_8,out_ddos_8,snf_ddos_8,icf_ddos_8,hh_ddos_8,amb_ddos_8,hsp_ddos_8,pro_ddos_8,spc_fac_ddos_8,dme_ddos_8,cls_ddos_8,hha_ddos_8,ip_ddos_9,er_ddos_9,out_ddos_9,snf_ddos_9,icf_ddos_9,hh_ddos_9,amb_ddos_9,hsp_ddos_9,pro_ddos_9,spc_fac_ddos_9,dme_ddos_9,cls_ddos_9,hha_ddos_9,ip_ddos_10,er_ddos_10,out_ddos_10,snf_ddos_10,icf_ddos_10,hh_ddos_10,amb_ddos_10,hsp_ddos_10,pro_ddos_10,spc_fac_ddos_10,dme_ddos_10,cls_ddos_10,hha_ddos_10,ip_ddos_11,er_ddos_11,out_ddos_11,snf_ddos_11,icf_ddos_11,hh_ddos_11,amb_ddos_11,hsp_ddos_11,pro_ddos_11,spc_fac_ddos_11,dme_ddos_11,cls_ddos_11,hha_ddos_11,hcbs_attdpcs_tc_0,hcbs_other_tc_0,hcbs_support_house_tc_0,hcbs_adult_day_tc_0,hcbs_pers_tc_0,hcbs_assist_tech_tc_0,oxygen_tc_0,hosp_bed_tc_0,chf_tc_0,heart_tc_0,copd_tc_0,pulmonar_tc_0,cancer_tc_0,ckd_tc_0,esrd_tc_0,lipidy_tc_0,diab_tc_0,alzh_tc_0,demented_tc_0,stroke_tc_0,hyper_tc_0,fall_tc_0,trans_tc_0,liver_tc_0,hippy_tc_0,depressed_tc_0,psycho_tc_0,druggy_tc_0,boozy_tc_0,paralyzed_tc_0,mono_tc_0,mono_dom_tc_0,hemi_tc_0,hemi_dom_tc_0,para_tc_0,quad_tc_0,tbi_tc_0,obese_tc_0,pressure_ulcer_tc_0,hemophilia_tc_0,hcbs_attdpcs_tc_1,hcbs_other_tc_1,hcbs_support_house_tc_1,hcbs_adult_day_tc_1,hcbs_pers_tc_1,hcbs_assist_tech_tc_1,oxygen_tc_1,hosp_bed_tc_1,chf_tc_1,heart_tc_1,copd_tc_1,pulmonar_tc_1,cancer_tc_1,ckd_tc_1,esrd_tc_1,lipidy_tc_1,diab_tc_1,alzh_tc_1,demented_tc_1,stroke_tc_1,hyper_tc_1,fall_tc_1,trans_tc_1,liver_tc_1,hippy_tc_1,depressed_tc_1,psycho_tc_1,druggy_tc_1,boozy_tc_1,paralyzed_tc_1,mono_tc_1,mono_dom_tc_1,hemi_tc_1,hemi_dom_tc_1,para_tc_1,quad_tc_1,tbi_tc_1,obese_tc_1,pressure_ulcer_tc_1,hemophilia_tc_1,hcbs_attdpcs_tc_2,hcbs_other_tc_2,hcbs_support_house_tc_2,hcbs_adult_day_tc_2,hcbs_pers_tc_2,hcbs_assist_tech_tc_2,oxygen_tc_2,hosp_bed_tc_2,chf_tc_2,heart_tc_2,copd_tc_2,pulmonar_tc_2,cancer_tc_2,...,trans_tc_6,liver_tc_6,hippy_tc_6,depressed_tc_6,psycho_tc_6,druggy_tc_6,boozy_tc_6,paralyzed_tc_6,mono_tc_6,mono_dom_tc_6,hemi_tc_6,hemi_dom_tc_6,para_tc_6,quad_tc_6,tbi_tc_6,obese_tc_6,pressure_ulcer_tc_6,hemophilia_tc_6,hcbs_attdpcs_tc_7,hcbs_other_tc_7,hcbs_support_house_tc_7,hcbs_adult_day_tc_7,hcbs_pers_tc_7,hcbs_assist_tech_tc_7,oxygen_tc_7,hosp_bed_tc_7,chf_tc_7,heart_tc_7,copd_tc_7,pulmonar_tc_7,cancer_tc_7,ckd_tc_7,esrd_tc_7,lipidy_tc_7,diab_tc_7,alzh_tc_7,demented_tc_7,stroke_tc_7,hyper_tc_7,fall_tc_7,trans_tc_7,liver_tc_7,hippy_tc_7,depressed_tc_7,psycho_tc_7,druggy_tc_7,boozy_tc_7,paralyzed_tc_7,mono_tc_7,mono_dom_tc_7,hemi_tc_7,hemi_dom_tc_7,para_tc_7,quad_tc_7,tbi_tc_7,obese_tc_7,pressure_ulcer_tc_7,hemophilia_tc_7,hcbs_attdpcs_tc_8,hcbs_other_tc_8,hcbs_support_house_tc_8,hcbs_adult_day_tc_8,hcbs_pers_tc_8,hcbs_assist_tech_tc_8,oxygen_tc_8,hosp_bed_tc_8,chf_tc_8,heart_tc_8,copd_tc_8,pulmonar_tc_8,cancer_tc_8,ckd_tc_8,esrd_tc_8,lipidy_tc_8,diab_tc_8,alzh_tc_8,demented_tc_8,stroke_tc_8,hyper_tc_8,fall_tc_8,trans_tc_8,liver_tc_8,hippy_tc_8,depressed_tc_8,psycho_tc_8,druggy_tc_8,boozy_tc_8,paralyzed_tc_8,mono_tc_8,mono_dom_tc_8,hemi_tc_8,hemi_dom_tc_8,para_tc_8,quad_tc_8,tbi_tc_8,obese_tc_8,pressure_ulcer_tc_8,hemophilia_tc_8,hcbs_attdpcs_tc_9,hcbs_other_tc_9,hcbs_support_house_tc_9,hcbs_adult_day_tc_9,hcbs_pers_tc_9,hcbs_assist_tech_tc_9,oxygen_tc_9,hosp_bed_tc_9,chf_tc_9,heart_tc_9,copd_tc_9,pulmonar_tc_9,cancer_tc_9,ckd_tc_9,esrd_tc_9,lipidy_tc_9,diab_tc_9,alzh_tc_9,demented_tc_9,stroke_tc_9,hyper_tc_9,fall_tc_9,trans_tc_9,liver_tc_9,hippy_tc_9,depressed_tc_9,psycho_tc_9,druggy_tc_9,boozy_tc_9,paralyzed_tc_9,mono_tc_9,mono_dom_tc_9,hemi_tc_9,hemi_dom_tc_9,para_tc_9,quad_tc_9,tbi_tc_9,obese_tc_9,pressure_ulcer_tc_9,hemophilia_tc_9,hcbs_attdpcs_tc_10,hcbs_other_tc_10,hcbs_support_house_tc_10,hcbs_adult_day_tc_10,hcbs_pers_tc_10,hcbs_assist_tech_tc_10,oxygen_tc_10,hosp_bed_tc_10,chf_tc_10,heart_tc_10,copd_tc_10,pulmonar_tc_10,cancer_tc_10,ckd_tc_10,esrd_tc_10,lipidy_tc_10,diab_tc_10,alzh_tc_10,demented_tc_10,stroke_tc_10,hyper_tc_10,fall_tc_10,trans_tc_10,liver_tc_10,hippy_tc_10,depressed_tc_10,psycho_tc_10,druggy_tc_10,boozy_tc_10,paralyzed_tc_10,mono_tc_10,mono_dom_tc_10,hemi_tc_10,hemi_dom_tc_10,para_tc_10,quad_tc_10,tbi_tc_10,obese_tc_10,pressure_ulcer_tc_10,hemophilia_tc_10,hcbs_attdpcs_tc_11,hcbs_other_tc_11,hcbs_support_house_tc_11,hcbs_adult_day_tc_11,hcbs_pers_tc_11,hcbs_assist_tech_tc_11,oxygen_tc_11,hosp_bed_tc_11,chf_tc_11,heart_tc_11,copd_tc_11,pulmonar_tc_11,cancer_tc_11,ckd_tc_11,esrd_tc_11,lipidy_tc_11,diab_tc_11,alzh_tc_11,demented_tc_11,stroke_tc_11,hyper_tc_11,fall_tc_11,trans_tc_11,liver_tc_11,hippy_tc_11,depressed_tc_11,psycho_tc_11,druggy_tc_11,boozy_tc_11,paralyzed_tc_11,mono_tc_11,mono_dom_tc_11,hemi_tc_11,hemi_dom_tc_11,para_tc_11,quad_tc_11,tbi_tc_11,obese_tc_11,pressure_ulcer_tc_11,hemophilia_tc_11,is_state_TN,is_state_FL,is_state_TX,is_state_KS,is_state_IA,is_lob_1,is_lob_3,is_lob_2,is_lob_8,is_group_0.0,is_group_3.0,is_group_2.0,is_group_1.0,is_group_-1.0,is_group_5.0,is_group_6.0,is_group_4.0,is_group_8.0,is_group_14.0,is_group_16.0,is_group_7.0,is_group_11.0,is_group_15.0,is_group_20.0,is_group_12.0,is_group_13.0,is_group_9.0,is_group_21.0,is_group_18.0,is_group_10.0,is_male,age
0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,1.0,0.0,0.0,482.6,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,1.0,0.0,0.0,0.0,0.0,0.0,2.0,0.0,2.0,0.0,0.0,614.68,29.65,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,356.68,0.0,0.0,0.0,0.0,356.68,0.0,0.0,0.0,170.86,0.0,0.0,0.0,3.0,0.0,0.0,7.0,0.0,0.0,3.0,0.0,1.0,0.0,0.0,436.88,9970.08,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,...,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,1.0,0.0,0.0,0.0,0.0,0.0,1.0,0.0,1.0,0.0,0.0,41.04,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,197.62,0.0,0.0,0.0,0.0,197.62,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,10.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,82.08,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,9.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,82.08,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,1.0,0.0,0.0,9.0,0.0,0.0,1.0,0.0,0.0,0.0,0.0,30.78,29.65,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,217.6,0.0,0.0,0.0,0.0,217.6,0.0,0.0,0.0,0.0,0.0,1.0,0.0,0.0,0.0,0.0,1.0,0.0,0.0,0.0,0.0,0.0,1.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,1.0,35.0
1,22807.25,0.0,0.0,2.0,0.0,0.0,0.0,13.0,0.0,2.0,0.0,11.0,0.0,0.0,622.72,29.65,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,393.88,0.0,0.0,4413.42,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,2.0,0.0,0.0,0.0,7.0,0.0,3.0,0.0,12.0,0.0,0.0,578.24,29.65,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,1275.7,0.0,0.0,5813.68,1189.06,1189.06,1279.24,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,13.0,0.0,0.0,0.0,9.0,0.0,8.0,0.0,7.0,0.0,0.0,644.96,118.61,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,86.64,0.0,0.0,4930.47,0.0,0.0,1448.37,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,15.0,0.0,0.0,0.0,13.0,0.0,3.0,0.0,1.0,0.0,0.0,611.6,96.37,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,288.52,0.0,0.0,5104.99,0.0,0.0,463.35,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,9.0,13.0,0.0,0.0,0.0,4.0,0.0,10.0,0.0,1.0,0.0,0.0,500.4,163.09,0.0,0.0,0.0,0.0,0.0,0.0,0.0,1412.75,1011.22,0.0,0.0,7496.75,3921.38,0.0,4323.76,0.0,0.0,0.0,1412.75,0.0,0.0,0.0,...,0.0,0.0,0.0,0.0,0.0,0.0,0.0,1.0,15.0,0.0,0.0,0.0,1.0,0.0,5.0,0.0,2.0,0.0,0.0,633.84,29.95,0.0,0.0,0.0,0.0,0.0,0.0,301.08,0.0,1543.61,0.0,0.0,4948.99,0.0,0.0,1683.01,0.0,0.0,0.0,1214.73,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,1.0,15.0,0.0,0.0,0.0,0.0,0.0,4.0,0.0,2.0,0.0,0.0,600.48,29.95,0.0,0.0,0.0,0.0,0.0,0.0,417.21,417.21,328.88,0.0,0.0,4822.87,417.21,0.0,587.64,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,16.0,0.0,0.0,0.0,2.0,0.0,5.0,0.0,2.0,0.0,0.0,622.72,29.95,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,87.52,0.0,0.0,3513.2,0.0,0.0,1303.47,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,15.0,0.0,0.0,0.0,2.0,0.0,3.0,0.0,3.0,0.0,0.0,633.84,29.65,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,129.1,0.0,0.0,3144.16,0.0,0.0,715.11,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,1.0,0.0,0.0,0.0,0.0,1.0,0.0,0.0,0.0,0.0,1.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,64.0
2,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,...,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,2.0,0.0,0.0,2.0,0.0,1.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,121.62,0.0,0.0,0.0,0.0,0.0,0.0,0.0,154.78,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,1.0,0.0,0.0,0.0,0.0,1.0,0.0,0.0,0.0,0.0,0.0,1.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,71.0
3,4085.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,5.0,0.0,0.0,0.0,0.0,0.0,0.0,1214.5,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,1.0,0.0,0.0,0.0,0.0,0.0,3.0,0.0,1.0,0.0,0.0,0.0,0.0,1250.7,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,116.82,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,3.0,0.0,1.0,0.0,0.0,0.0,0.0,1151.75,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,125.44,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,2.0,0.0,0.0,0.0,0.0,0.0,0.0,1255.2,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,2.0,0.0,1.0,0.0,0.0,0.0,0.0,1375.16,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,6.2,0.0,0.0,86.82,0.0,0.0,0.0,0.0,...,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,6.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,1189.09,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,9.0,0.0,0.0,1.0,0.0,0.0,0.0,0.0,0.0,0.0,1225.5,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,8.0,0.0,0.0,5.0,0.0,1.0,0.0,0.0,0.0,0.0,1492.08,0.0,0.0,0.0,0.0,0.0,0.0,0.0,183.15,0.0,0.0,0.0,0.0,0.0,118.01,0.0,0.0,118.01,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,7.0,0.0,0.0,2.0,0.0,2.0,0.0,0.0,0.0,0.0,1230.82,0.0,0.0,0.0,0.0,0.0,0.0,0.0,7.97,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,1.0,0.0,0.0,0.0,0.0,1.0,0.0,0.0,0.0,0.0,0.0,1.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,63.0
4,0.0,0.0,0.0,1.0,0.0,0.0,1.0,0.0,0.0,1.0,0.0,0.0,0.0,0.0,335.28,214.83,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,131.28,0.0,0.0,0.0,131.28,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,1.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,330.2,194.04,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,1.0,0.0,0.0,2.0,0.0,0.0,0.0,0.0,563.88,214.83,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,2.0,0.0,0.0,1.0,0.0,1.0,0.0,0.0,650.24,207.9,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,3220.14,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,3220.14,0.0,0.0,0.0,0.0,0.0,0.0,0.0,3.0,0.0,0.0,1.0,0.0,0.0,0.0,0.0,751.84,214.83,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,183.06,0.0,0.0,0.0,...,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,1.0,0.0,0.0,1.0,0.0,0.0,0.0,0.0,143.64,207.9,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,2.0,0.0,0.0,0.0,0.0,430.92,200.97,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,404.52,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,5.0,0.0,0.0,0.0,0.0,471.96,207.9,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,243.26,0.0,0.0,0.0,0.0,0.0,463.26,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,3.0,0.0,0.0,0.0,0.0,553.44,215.04,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,323.54,106.61,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,1.0,0.0,0.0,0.0,0.0,1.0,0.0,0.0,0.0,0.0,0.0,1.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,1.0,64.0


In [63]:
training_df[export_cols].to_csv('./data/train_df.csv', index=False, header=False)

In [64]:
val_df[export_cols].to_csv('./data/val_df.csv', index=False, header=False)

In [65]:
test_df[export_cols].to_csv('./data/test_df.csv', index=False, header=False)