In [1]:
import pandas as pd
import numpy as np
import matplotlib
import matplotlib.pyplot as plt
%matplotlib inline
from tqdm import tqdm; tqdm.pandas()
pd.options.display.max_columns = 202
pd.options.display.max_rows = 300

In [2]:
train = pd.read_pickle('data/train_original.pkl')
test = pd.read_pickle('data/test_original.pkl')
special_cols = [col for col in train.columns if train[col].dtype != np.float64]
feature_cols = [col for col in train.columns if col not in special_cols]

In [3]:
def add_noise(series, noise_level): 
    return series * (1 + noise_level * np.random.randn(len(series)))
                     
def target_encode(trn_series=None, 
                  tst_series=None, 
                  target=None, 
                  min_samples_leaf=1, 
                  smoothing=1,
                  noise_level=0):
                      
    assert len(trn_series) == len(target)
    assert trn_series.name == tst_series.name
    temp = pd.concat([trn_series, target], axis=1)
    # Compute target mean 
    averages = temp.groupby(by=trn_series.name)[target.name].agg(["mean", "count"])
    # Compute smoothing
    smoothing = 1 / (1 + np.exp(-(averages["count"] - min_samples_leaf) / smoothing))
    # Apply average function to all target data
    prior = target.mean()
    # The bigger the count the less full_avg is taken into account
    averages[target.name] = prior * (1 - smoothing) + averages["mean"] * smoothing
    averages.drop(["mean", "count"], axis=1, inplace=True)
    # Apply averages to trn and tst series
    ft_trn_series = pd.merge(
        trn_series.to_frame(trn_series.name),
        averages.reset_index().rename(columns={'index': target.name, target.name: 'average'}),
        on=trn_series.name,
        how='left')['average'].rename(trn_series.name + '_mean').fillna(prior)
    # pd.merge does not keep the index so restore it
    ft_trn_series.index = trn_series.index 
    ft_tst_series = pd.merge(
        tst_series.to_frame(tst_series.name),
        averages.reset_index().rename(columns={'index': target.name, target.name: 'average'}),
        on=tst_series.name,
        how='left')['average'].rename(trn_series.name + '_mean').fillna(prior)
    # pd.merge does not keep the index so restore it
    ft_tst_series.index = tst_series.index
    
    return add_noise(ft_trn_series, noise_level), add_noise(ft_tst_series, noise_level)

In [4]:
cum_target_encode_train = pd.DataFrame()
cum_target_encode_test = pd.DataFrame()

for i, col in tqdm(enumerate(feature_cols)):
    selected_features = feature_cols[:i+1]
    
    flatten_train = pd.DataFrame()
    flatten_train['original_values'] = np.concatenate(train[selected_features].values)
    flatten_train['target'] = np.concatenate([[t]*len(selected_features) for t in train.target])
    
    flatten_test = pd.DataFrame()
    flatten_test['original_values'] = np.concatenate(test[selected_features].values)

    trn_me, tst_me = target_encode(flatten_train['original_values'], 
                                    flatten_test['original_values'], 
                                    target=flatten_train.target,  
                                    min_samples_leaf=100,
                                    smoothing=10,
                                    noise_level=0.01)
    
    flatten_train['target_enc'] = trn_me.values
    flatten_test['target_enc'] = tst_me.values
    trn_me_mapping = flatten_train.groupby('original_values')['target_enc'].mean()
    tst_me_mapping = flatten_test.groupby('original_values')['target_enc'].mean()

    cum_target_encode_train[col+'_cumulative_me'] = train[col].map(trn_me_mapping)
    cum_target_encode_test[col+'_cumulative_me'] = test[col].map(tst_me_mapping)  
    
cum_target_encode_train.head(3)

200it [4:39:26, 166.37s/it]


Unnamed: 0,var_0_cumulative_me,var_1_cumulative_me,var_2_cumulative_me,var_3_cumulative_me,var_4_cumulative_me,var_5_cumulative_me,var_6_cumulative_me,var_7_cumulative_me,var_8_cumulative_me,var_9_cumulative_me,var_10_cumulative_me,var_11_cumulative_me,var_12_cumulative_me,var_13_cumulative_me,var_14_cumulative_me,var_15_cumulative_me,var_16_cumulative_me,var_17_cumulative_me,var_18_cumulative_me,var_19_cumulative_me,var_20_cumulative_me,var_21_cumulative_me,var_22_cumulative_me,var_23_cumulative_me,var_24_cumulative_me,var_25_cumulative_me,var_26_cumulative_me,var_27_cumulative_me,var_28_cumulative_me,var_29_cumulative_me,var_30_cumulative_me,var_31_cumulative_me,var_32_cumulative_me,var_33_cumulative_me,var_34_cumulative_me,var_35_cumulative_me,var_36_cumulative_me,var_37_cumulative_me,var_38_cumulative_me,var_39_cumulative_me,var_40_cumulative_me,var_41_cumulative_me,var_42_cumulative_me,var_43_cumulative_me,var_44_cumulative_me,var_45_cumulative_me,var_46_cumulative_me,var_47_cumulative_me,var_48_cumulative_me,var_49_cumulative_me,var_50_cumulative_me,var_51_cumulative_me,var_52_cumulative_me,var_53_cumulative_me,var_54_cumulative_me,var_55_cumulative_me,var_56_cumulative_me,var_57_cumulative_me,var_58_cumulative_me,var_59_cumulative_me,var_60_cumulative_me,var_61_cumulative_me,var_62_cumulative_me,var_63_cumulative_me,var_64_cumulative_me,var_65_cumulative_me,var_66_cumulative_me,var_67_cumulative_me,var_68_cumulative_me,var_69_cumulative_me,var_70_cumulative_me,var_71_cumulative_me,var_72_cumulative_me,var_73_cumulative_me,var_74_cumulative_me,var_75_cumulative_me,var_76_cumulative_me,var_77_cumulative_me,var_78_cumulative_me,var_79_cumulative_me,var_80_cumulative_me,var_81_cumulative_me,var_82_cumulative_me,var_83_cumulative_me,var_84_cumulative_me,var_85_cumulative_me,var_86_cumulative_me,var_87_cumulative_me,var_88_cumulative_me,var_89_cumulative_me,var_90_cumulative_me,var_91_cumulative_me,var_92_cumulative_me,var_93_cumulative_me,var_94_cumulative_me,var_95_cumulative_me,var_96_cumulative_me,var_97_cumulative_me,var_98_cumulative_me,var_99_cumulative_me,var_100_cumulative_me,var_101_cumulative_me,var_102_cumulative_me,var_103_cumulative_me,var_104_cumulative_me,var_105_cumulative_me,var_106_cumulative_me,var_107_cumulative_me,var_108_cumulative_me,var_109_cumulative_me,var_110_cumulative_me,var_111_cumulative_me,var_112_cumulative_me,var_113_cumulative_me,var_114_cumulative_me,var_115_cumulative_me,var_116_cumulative_me,var_117_cumulative_me,var_118_cumulative_me,var_119_cumulative_me,var_120_cumulative_me,var_121_cumulative_me,var_122_cumulative_me,var_123_cumulative_me,var_124_cumulative_me,var_125_cumulative_me,var_126_cumulative_me,var_127_cumulative_me,var_128_cumulative_me,var_129_cumulative_me,var_130_cumulative_me,var_131_cumulative_me,var_132_cumulative_me,var_133_cumulative_me,var_134_cumulative_me,var_135_cumulative_me,var_136_cumulative_me,var_137_cumulative_me,var_138_cumulative_me,var_139_cumulative_me,var_140_cumulative_me,var_141_cumulative_me,var_142_cumulative_me,var_143_cumulative_me,var_144_cumulative_me,var_145_cumulative_me,var_146_cumulative_me,var_147_cumulative_me,var_148_cumulative_me,var_149_cumulative_me,var_150_cumulative_me,var_151_cumulative_me,var_152_cumulative_me,var_153_cumulative_me,var_154_cumulative_me,var_155_cumulative_me,var_156_cumulative_me,var_157_cumulative_me,var_158_cumulative_me,var_159_cumulative_me,var_160_cumulative_me,var_161_cumulative_me,var_162_cumulative_me,var_163_cumulative_me,var_164_cumulative_me,var_165_cumulative_me,var_166_cumulative_me,var_167_cumulative_me,var_168_cumulative_me,var_169_cumulative_me,var_170_cumulative_me,var_171_cumulative_me,var_172_cumulative_me,var_173_cumulative_me,var_174_cumulative_me,var_175_cumulative_me,var_176_cumulative_me,var_177_cumulative_me,var_178_cumulative_me,var_179_cumulative_me,var_180_cumulative_me,var_181_cumulative_me,var_182_cumulative_me,var_183_cumulative_me,var_184_cumulative_me,var_185_cumulative_me,var_186_cumulative_me,var_187_cumulative_me,var_188_cumulative_me,var_189_cumulative_me,var_190_cumulative_me,var_191_cumulative_me,var_192_cumulative_me,var_193_cumulative_me,var_194_cumulative_me,var_195_cumulative_me,var_196_cumulative_me,var_197_cumulative_me,var_198_cumulative_me,var_199_cumulative_me
0,0.099943,0.100697,0.100198,0.100597,0.099891,0.10048,0.100309,0.100461,0.099851,0.100194,0.100935,0.100414,0.100369,0.100188,0.100568,0.100717,0.100786,0.101882,0.100058,0.100222,0.10062,0.10059,0.100864,0.100406,0.100584,0.100652,0.100703,0.100577,0.10006,0.100429,0.100882,0.10056,0.100591,0.100544,0.100564,0.100266,0.100297,0.100576,0.10065,0.100483,0.100672,0.099816,0.09997,0.100454,0.096191,0.10055,0.100578,0.100112,0.100823,0.100928,0.100464,0.100788,0.100624,0.101227,0.100387,0.100886,0.093583,0.094558,0.100378,0.100344,0.100842,0.101889,0.100604,0.100484,0.100157,0.10063,0.094106,0.101021,0.094897,0.100395,0.099945,0.09481,0.09548,0.100008,0.100191,0.100361,0.100643,0.100597,0.111273,0.100262,0.099882,0.104947,0.100711,0.099713,0.100776,0.100765,0.10088,0.100577,0.101908,0.100703,0.10031,0.085671,0.114259,0.094402,0.0998,0.111254,0.097285,0.100949,0.10327,0.100402,0.104192,0.100751,0.100615,0.103511,0.099578,0.113399,0.09869,0.100516,0.1027,0.100473,0.099939,0.095707,0.082744,0.100296,0.112247,0.103222,0.070575,0.100367,0.100548,0.104205,0.1009,0.10079,0.132452,0.087465,0.097157,0.122051,0.086754,0.101944,0.100493,0.099737,0.106656,0.114932,0.105192,0.118697,0.156138,0.104855,0.100531,0.100363,0.078241,0.100853,0.078872,0.100796,0.100339,0.097102,0.107404,0.127317,0.078962,0.100825,0.106908,0.100654,0.100465,0.070265,0.125812,0.100187,0.078046,0.059987,0.092652,0.100457,0.100136,0.11938,0.104405,0.064698,0.114897,0.09943,0.115155,0.117018,0.055426,0.115439,0.141358,0.072398,0.100587,0.100588,0.100391,0.068937,0.100517,0.096731,0.100488,0.098202,0.099453,0.114529,0.100364,0.107063,0.109118,0.083437,0.100077,0.067248,0.086701,0.100589,0.099134,0.12363,0.098141,0.082123,0.087506,0.074237,0.100408,0.103475,0.149839,0.13129,0.090398,0.117703
1,0.100409,0.100554,0.099951,0.10046,0.100649,0.10067,0.099967,0.100122,0.100268,0.100838,0.100498,0.100562,0.100162,0.100027,0.100721,0.100578,0.100741,0.100409,0.100551,0.102307,0.100885,0.100732,0.100764,0.10064,0.100438,0.104343,0.100871,0.100211,0.100458,0.100699,0.099708,0.100574,0.10024,0.100487,0.100237,0.100648,0.100658,0.100216,0.100136,0.100618,0.100643,0.100854,0.100358,0.098757,0.100377,0.101318,0.101095,0.100431,0.101951,0.100864,0.092944,0.100494,0.100019,0.100962,0.100987,0.100537,0.100275,0.100416,0.100662,0.100268,0.099985,0.100804,0.10075,0.100751,0.09903,0.100307,0.103579,0.100559,0.092484,0.100494,0.100831,0.10001,0.100277,0.100366,0.101233,0.101951,0.099525,0.100629,0.10342,0.10373,0.099177,0.10071,0.102581,0.10017,0.100401,0.133315,0.099621,0.099361,0.106258,0.09985,0.099551,0.110828,0.101197,0.10977,0.081071,0.100737,0.100436,0.100406,0.100385,0.055778,0.100644,0.098959,0.100157,0.091988,0.100526,0.099081,0.080153,0.101084,0.111981,0.097761,0.085428,0.134118,0.077153,0.104145,0.113406,0.079628,0.085178,0.101126,0.100577,0.079483,0.100247,0.110374,0.096313,0.09138,0.106423,0.08611,0.104207,0.101442,0.095763,0.100416,0.149598,0.107349,0.078988,0.101798,0.075288,0.100028,0.101645,0.101104,0.100507,0.100239,0.139411,0.081341,0.103115,0.105235,0.091025,0.088854,0.125127,0.053288,0.097186,0.100658,0.100355,0.100617,0.067518,0.100475,0.086361,0.084168,0.125128,0.100333,0.100987,0.065561,0.100387,0.103817,0.137993,0.100047,0.100401,0.097471,0.149478,0.088776,0.102933,0.085683,0.105757,0.128052,0.100737,0.081392,0.100286,0.065196,0.101062,0.101564,0.100677,0.08122,0.100216,0.074139,0.100562,0.078775,0.100447,0.099675,0.123531,0.100828,0.119602,0.10221,0.090327,0.083012,0.095561,0.08621,0.101878,0.098841,0.072518,0.102596,0.100858,0.073545
2,0.100876,0.101999,0.100859,0.100239,0.101386,0.10063,0.100672,0.101056,0.099819,0.100412,0.101045,0.100189,0.100416,0.100476,0.100332,0.100193,0.100078,0.101194,0.100287,0.101271,0.101167,0.100698,0.100558,0.100797,0.10053,0.100484,0.100699,0.100791,0.100376,0.100521,0.101373,0.100625,0.100698,0.100749,0.100453,0.100381,0.100856,0.10029,0.100447,0.100179,0.099733,0.100374,0.100542,0.100001,0.100137,0.100343,0.101799,0.100891,0.100309,0.102731,0.100597,0.100864,0.100673,0.103066,0.099905,0.099363,0.100206,0.098619,0.101105,0.100242,0.10062,0.10078,0.10081,0.102077,0.100819,0.100437,0.099881,0.100653,0.074776,0.100278,0.101534,0.101713,0.100276,0.103479,0.100433,0.100119,0.100324,0.100865,0.095063,0.100783,0.093453,0.100613,0.098475,0.100328,0.09591,0.100644,0.100764,0.100577,0.080069,0.100493,0.100216,0.106304,0.080513,0.099124,0.115036,0.099591,0.10051,0.100382,0.102981,0.099692,0.098443,0.101228,0.099752,0.119558,0.100431,0.101023,0.077139,0.097303,0.044762,0.10045,0.098411,0.102802,0.068341,0.092403,0.083938,0.1221,0.102631,0.100095,0.100281,0.105104,0.10022,0.105614,0.102577,0.100517,0.115057,0.106167,0.124608,0.100474,0.097848,0.100818,0.106691,0.089629,0.090167,0.056314,0.076846,0.100387,0.10075,0.09995,0.060178,0.081211,0.089309,0.099857,0.114666,0.083304,0.078068,0.108772,0.149106,0.110835,0.089423,0.100483,0.128886,0.140528,0.075399,0.101361,0.100619,0.114869,0.067527,0.097929,0.100571,0.103073,0.071535,0.101406,0.09773,0.118342,0.096721,0.100336,0.10313,0.100366,0.14113,0.152688,0.100919,0.097649,0.083437,0.100663,0.100997,0.061443,0.123463,0.083331,0.10065,0.091086,0.099779,0.114854,0.100828,0.081334,0.100445,0.099909,0.102391,0.100273,0.126374,0.110678,0.118647,0.108019,0.091814,0.085786,0.100565,0.142494,0.100795,0.117915,0.098048,0.137029


In [5]:
feature_cols = cum_target_encode_train.columns.tolist()

In [6]:
for df in [cum_target_encode_train, cum_target_encode_test]:
    df['target_encode_max'] = df[feature_cols].max(axis=1)
    df['target_encode_min'] = df[feature_cols].min(axis=1)
    df['target_encode_sum'] = df[feature_cols].sum(axis=1)
    df['target_encode_std'] = df[feature_cols].std(axis=1)

In [7]:
cum_target_encode_train.head(3)

Unnamed: 0,var_0_cumulative_me,var_1_cumulative_me,var_2_cumulative_me,var_3_cumulative_me,var_4_cumulative_me,var_5_cumulative_me,var_6_cumulative_me,var_7_cumulative_me,var_8_cumulative_me,var_9_cumulative_me,var_10_cumulative_me,var_11_cumulative_me,var_12_cumulative_me,var_13_cumulative_me,var_14_cumulative_me,var_15_cumulative_me,var_16_cumulative_me,var_17_cumulative_me,var_18_cumulative_me,var_19_cumulative_me,var_20_cumulative_me,var_21_cumulative_me,var_22_cumulative_me,var_23_cumulative_me,var_24_cumulative_me,var_25_cumulative_me,var_26_cumulative_me,var_27_cumulative_me,var_28_cumulative_me,var_29_cumulative_me,var_30_cumulative_me,var_31_cumulative_me,var_32_cumulative_me,var_33_cumulative_me,var_34_cumulative_me,var_35_cumulative_me,var_36_cumulative_me,var_37_cumulative_me,var_38_cumulative_me,var_39_cumulative_me,var_40_cumulative_me,var_41_cumulative_me,var_42_cumulative_me,var_43_cumulative_me,var_44_cumulative_me,var_45_cumulative_me,var_46_cumulative_me,var_47_cumulative_me,var_48_cumulative_me,var_49_cumulative_me,var_50_cumulative_me,var_51_cumulative_me,var_52_cumulative_me,var_53_cumulative_me,var_54_cumulative_me,var_55_cumulative_me,var_56_cumulative_me,var_57_cumulative_me,var_58_cumulative_me,var_59_cumulative_me,var_60_cumulative_me,var_61_cumulative_me,var_62_cumulative_me,var_63_cumulative_me,var_64_cumulative_me,var_65_cumulative_me,var_66_cumulative_me,var_67_cumulative_me,var_68_cumulative_me,var_69_cumulative_me,var_70_cumulative_me,var_71_cumulative_me,var_72_cumulative_me,var_73_cumulative_me,var_74_cumulative_me,var_75_cumulative_me,var_76_cumulative_me,var_77_cumulative_me,var_78_cumulative_me,var_79_cumulative_me,var_80_cumulative_me,var_81_cumulative_me,var_82_cumulative_me,var_83_cumulative_me,var_84_cumulative_me,var_85_cumulative_me,var_86_cumulative_me,var_87_cumulative_me,var_88_cumulative_me,var_89_cumulative_me,var_90_cumulative_me,var_91_cumulative_me,var_92_cumulative_me,var_93_cumulative_me,var_94_cumulative_me,var_95_cumulative_me,var_96_cumulative_me,var_97_cumulative_me,var_98_cumulative_me,var_99_cumulative_me,var_100_cumulative_me,...,var_103_cumulative_me,var_104_cumulative_me,var_105_cumulative_me,var_106_cumulative_me,var_107_cumulative_me,var_108_cumulative_me,var_109_cumulative_me,var_110_cumulative_me,var_111_cumulative_me,var_112_cumulative_me,var_113_cumulative_me,var_114_cumulative_me,var_115_cumulative_me,var_116_cumulative_me,var_117_cumulative_me,var_118_cumulative_me,var_119_cumulative_me,var_120_cumulative_me,var_121_cumulative_me,var_122_cumulative_me,var_123_cumulative_me,var_124_cumulative_me,var_125_cumulative_me,var_126_cumulative_me,var_127_cumulative_me,var_128_cumulative_me,var_129_cumulative_me,var_130_cumulative_me,var_131_cumulative_me,var_132_cumulative_me,var_133_cumulative_me,var_134_cumulative_me,var_135_cumulative_me,var_136_cumulative_me,var_137_cumulative_me,var_138_cumulative_me,var_139_cumulative_me,var_140_cumulative_me,var_141_cumulative_me,var_142_cumulative_me,var_143_cumulative_me,var_144_cumulative_me,var_145_cumulative_me,var_146_cumulative_me,var_147_cumulative_me,var_148_cumulative_me,var_149_cumulative_me,var_150_cumulative_me,var_151_cumulative_me,var_152_cumulative_me,var_153_cumulative_me,var_154_cumulative_me,var_155_cumulative_me,var_156_cumulative_me,var_157_cumulative_me,var_158_cumulative_me,var_159_cumulative_me,var_160_cumulative_me,var_161_cumulative_me,var_162_cumulative_me,var_163_cumulative_me,var_164_cumulative_me,var_165_cumulative_me,var_166_cumulative_me,var_167_cumulative_me,var_168_cumulative_me,var_169_cumulative_me,var_170_cumulative_me,var_171_cumulative_me,var_172_cumulative_me,var_173_cumulative_me,var_174_cumulative_me,var_175_cumulative_me,var_176_cumulative_me,var_177_cumulative_me,var_178_cumulative_me,var_179_cumulative_me,var_180_cumulative_me,var_181_cumulative_me,var_182_cumulative_me,var_183_cumulative_me,var_184_cumulative_me,var_185_cumulative_me,var_186_cumulative_me,var_187_cumulative_me,var_188_cumulative_me,var_189_cumulative_me,var_190_cumulative_me,var_191_cumulative_me,var_192_cumulative_me,var_193_cumulative_me,var_194_cumulative_me,var_195_cumulative_me,var_196_cumulative_me,var_197_cumulative_me,var_198_cumulative_me,var_199_cumulative_me,target_encode_max,target_encode_min,target_encode_sum,target_encode_std
0,0.099943,0.100697,0.100198,0.100597,0.099891,0.10048,0.100309,0.100461,0.099851,0.100194,0.100935,0.100414,0.100369,0.100188,0.100568,0.100717,0.100786,0.101882,0.100058,0.100222,0.10062,0.10059,0.100864,0.100406,0.100584,0.100652,0.100703,0.100577,0.10006,0.100429,0.100882,0.10056,0.100591,0.100544,0.100564,0.100266,0.100297,0.100576,0.10065,0.100483,0.100672,0.099816,0.09997,0.100454,0.096191,0.10055,0.100578,0.100112,0.100823,0.100928,0.100464,0.100788,0.100624,0.101227,0.100387,0.100886,0.093583,0.094558,0.100378,0.100344,0.100842,0.101889,0.100604,0.100484,0.100157,0.10063,0.094106,0.101021,0.094897,0.100395,0.099945,0.09481,0.09548,0.100008,0.100191,0.100361,0.100643,0.100597,0.111273,0.100262,0.099882,0.104947,0.100711,0.099713,0.100776,0.100765,0.10088,0.100577,0.101908,0.100703,0.10031,0.085671,0.114259,0.094402,0.0998,0.111254,0.097285,0.100949,0.10327,0.100402,0.104192,...,0.103511,0.099578,0.113399,0.09869,0.100516,0.1027,0.100473,0.099939,0.095707,0.082744,0.100296,0.112247,0.103222,0.070575,0.100367,0.100548,0.104205,0.1009,0.10079,0.132452,0.087465,0.097157,0.122051,0.086754,0.101944,0.100493,0.099737,0.106656,0.114932,0.105192,0.118697,0.156138,0.104855,0.100531,0.100363,0.078241,0.100853,0.078872,0.100796,0.100339,0.097102,0.107404,0.127317,0.078962,0.100825,0.106908,0.100654,0.100465,0.070265,0.125812,0.100187,0.078046,0.059987,0.092652,0.100457,0.100136,0.11938,0.104405,0.064698,0.114897,0.09943,0.115155,0.117018,0.055426,0.115439,0.141358,0.072398,0.100587,0.100588,0.100391,0.068937,0.100517,0.096731,0.100488,0.098202,0.099453,0.114529,0.100364,0.107063,0.109118,0.083437,0.100077,0.067248,0.086701,0.100589,0.099134,0.12363,0.098141,0.082123,0.087506,0.074237,0.100408,0.103475,0.149839,0.13129,0.090398,0.117703,0.156138,0.055426,20.08022,0.012205
1,0.100409,0.100554,0.099951,0.10046,0.100649,0.10067,0.099967,0.100122,0.100268,0.100838,0.100498,0.100562,0.100162,0.100027,0.100721,0.100578,0.100741,0.100409,0.100551,0.102307,0.100885,0.100732,0.100764,0.10064,0.100438,0.104343,0.100871,0.100211,0.100458,0.100699,0.099708,0.100574,0.10024,0.100487,0.100237,0.100648,0.100658,0.100216,0.100136,0.100618,0.100643,0.100854,0.100358,0.098757,0.100377,0.101318,0.101095,0.100431,0.101951,0.100864,0.092944,0.100494,0.100019,0.100962,0.100987,0.100537,0.100275,0.100416,0.100662,0.100268,0.099985,0.100804,0.10075,0.100751,0.09903,0.100307,0.103579,0.100559,0.092484,0.100494,0.100831,0.10001,0.100277,0.100366,0.101233,0.101951,0.099525,0.100629,0.10342,0.10373,0.099177,0.10071,0.102581,0.10017,0.100401,0.133315,0.099621,0.099361,0.106258,0.09985,0.099551,0.110828,0.101197,0.10977,0.081071,0.100737,0.100436,0.100406,0.100385,0.055778,0.100644,...,0.091988,0.100526,0.099081,0.080153,0.101084,0.111981,0.097761,0.085428,0.134118,0.077153,0.104145,0.113406,0.079628,0.085178,0.101126,0.100577,0.079483,0.100247,0.110374,0.096313,0.09138,0.106423,0.08611,0.104207,0.101442,0.095763,0.100416,0.149598,0.107349,0.078988,0.101798,0.075288,0.100028,0.101645,0.101104,0.100507,0.100239,0.139411,0.081341,0.103115,0.105235,0.091025,0.088854,0.125127,0.053288,0.097186,0.100658,0.100355,0.100617,0.067518,0.100475,0.086361,0.084168,0.125128,0.100333,0.100987,0.065561,0.100387,0.103817,0.137993,0.100047,0.100401,0.097471,0.149478,0.088776,0.102933,0.085683,0.105757,0.128052,0.100737,0.081392,0.100286,0.065196,0.101062,0.101564,0.100677,0.08122,0.100216,0.074139,0.100562,0.078775,0.100447,0.099675,0.123531,0.100828,0.119602,0.10221,0.090327,0.083012,0.095561,0.08621,0.101878,0.098841,0.072518,0.102596,0.100858,0.073545,0.149598,0.053288,19.847345,0.012474
2,0.100876,0.101999,0.100859,0.100239,0.101386,0.10063,0.100672,0.101056,0.099819,0.100412,0.101045,0.100189,0.100416,0.100476,0.100332,0.100193,0.100078,0.101194,0.100287,0.101271,0.101167,0.100698,0.100558,0.100797,0.10053,0.100484,0.100699,0.100791,0.100376,0.100521,0.101373,0.100625,0.100698,0.100749,0.100453,0.100381,0.100856,0.10029,0.100447,0.100179,0.099733,0.100374,0.100542,0.100001,0.100137,0.100343,0.101799,0.100891,0.100309,0.102731,0.100597,0.100864,0.100673,0.103066,0.099905,0.099363,0.100206,0.098619,0.101105,0.100242,0.10062,0.10078,0.10081,0.102077,0.100819,0.100437,0.099881,0.100653,0.074776,0.100278,0.101534,0.101713,0.100276,0.103479,0.100433,0.100119,0.100324,0.100865,0.095063,0.100783,0.093453,0.100613,0.098475,0.100328,0.09591,0.100644,0.100764,0.100577,0.080069,0.100493,0.100216,0.106304,0.080513,0.099124,0.115036,0.099591,0.10051,0.100382,0.102981,0.099692,0.098443,...,0.119558,0.100431,0.101023,0.077139,0.097303,0.044762,0.10045,0.098411,0.102802,0.068341,0.092403,0.083938,0.1221,0.102631,0.100095,0.100281,0.105104,0.10022,0.105614,0.102577,0.100517,0.115057,0.106167,0.124608,0.100474,0.097848,0.100818,0.106691,0.089629,0.090167,0.056314,0.076846,0.100387,0.10075,0.09995,0.060178,0.081211,0.089309,0.099857,0.114666,0.083304,0.078068,0.108772,0.149106,0.110835,0.089423,0.100483,0.128886,0.140528,0.075399,0.101361,0.100619,0.114869,0.067527,0.097929,0.100571,0.103073,0.071535,0.101406,0.09773,0.118342,0.096721,0.100336,0.10313,0.100366,0.14113,0.152688,0.100919,0.097649,0.083437,0.100663,0.100997,0.061443,0.123463,0.083331,0.10065,0.091086,0.099779,0.114854,0.100828,0.081334,0.100445,0.099909,0.102391,0.100273,0.126374,0.110678,0.118647,0.108019,0.091814,0.085786,0.100565,0.142494,0.100795,0.117915,0.098048,0.137029,0.152688,0.044762,20.050791,0.013315


In [8]:
tmp = pd.DataFrame()
tmp['target'] = train.target
tmp['target_encode_sum'] = cum_target_encode_train['target_encode_sum']
tmp.corr('spearman')

Unnamed: 0,target,target_encode_sum
target,1.0,0.481714
target_encode_sum,0.481714,1.0


In [10]:
cum_target_encode_train.to_pickle('features/cum_target_encode_train.pkl')
cum_target_encode_test.to_pickle('features/cum_target_encode_test.pkl')