# 모듈 로드

In [2]:
import numpy as np
import pandas as pd
import warnings
import gc
from tqdm import tqdm_notebook as tqdm
import lightgbm as lgb
from sklearn.preprocessing import LabelEncoder, OneHotEncoder
from sklearn.model_selection import StratifiedKFold
from sklearn.model_selection import train_test_split
import matplotlib.pyplot as plt
import seaborn as sns
from datetime import datetime
from sklearn.metrics import roc_auc_score
warnings.filterwarnings("ignore")
gc.enable()

In [3]:
pd.set_option('max_rows', 500)
pd.set_option('max_colwidth', 500)
pd.set_option('max_columns', 500)

# 데이터 로드

In [4]:
train = pd.read_csv('./data/train.csv')
test = pd.read_csv('./data/test.csv')
train.shape, test.shape

((200000, 202), (200000, 201))

In [5]:
col_list = train.columns[2:]

In [6]:
train_0 = train[train.target == 0]
train_1 = train[train.target == 1]

In [7]:
pb_idx = np.load('./data_temp/public_LB.npy')
pv_idx = np.load('./data_temp/private_LB.npy')

In [8]:
test_pb = test.iloc[pb_idx].sort_index().copy()
test_pv = test.iloc[pv_idx].sort_index().copy()

test_real = test_pb.append(test_pv)

# Unique Data

In [8]:
# unique df
unique_df = train[['ID_code']]
for col in tqdm(col_list):
    unique_df[col] = train[col].map(((train[col].value_counts() == 1) * 1).to_dict())

HBox(children=(IntProgress(value=0, max=200), HTML(value='')))




In [14]:
unique_train = train[['ID_code', 'target']]
for col in tqdm(col_list):
    unique_train[col] = np.around(train[col] * unique_df[col], 4)

HBox(children=(IntProgress(value=0, max=200), HTML(value='')))




# 삐꾸 데이터 비교

In [97]:
bbiggu = pd.read_csv('./data_temp/new_bbiggu.csv')

In [98]:
bbiggu_ID_code = bbiggu[(bbiggu.target == 0) & (bbiggu.pred > 0.1)].ID_code
normal_ID_code = bbiggu[(bbiggu.target == 1) & (bbiggu.pred > 0.1)].ID_code
new_ID_code = bbiggu[bbiggu.pred > 0.1].ID_code

In [99]:
bbiggu = train[train.ID_code.isin(bbiggu_ID_code)].copy()
normal = train[train.ID_code.isin(normal_ID_code)].copy()
new_train = train[train.ID_code.isin(new_ID_code)].copy()

In [94]:
unique_root = {}
for col in tqdm(col_list):

    unique_root[col] = set(new_train[col].value_counts()[new_train[col].value_counts() == 3].index)

HBox(children=(IntProgress(value=0, max=200), HTML(value='')))




In [95]:
unique_set = {}
    
for col in tqdm(col_list):
    target_1 = unique_root[col].intersection(set(bbiggu[col].unique()))
    target_0 = unique_root[col].intersection(set(normal[col].unique()))
    
    unique_set[col] = {'target_0': target_0, 'target_1': target_1}

HBox(children=(IntProgress(value=0, max=200), HTML(value='')))




In [None]:
## for i, _ in enumerate(col_list):
    i = i + 1
    if i % 4 != 0:
        continue
    
    print("showing var_{} ~ var_{}".format(i-4, i-1))
    plt.figure(figsize=(15, 8))

    for j, col in enumerate(col_list[i-4:i]):
        j = j + 1
        
        plt.subplot(2, 2, j)
        sns.distplot(list(unique_set[col]['target_0']), label='t=0', color='b', bins=100)
        sns.distplot(list(unique_set[col]['target_1']), label='t=1', color='r', bins=100)
        plt.legend()
        plt.title(col)
            
    plt.show()

In [79]:
roc_auc_score(bbiggu.target, bbiggu.pred)

0.9116527218981605

In [82]:
roc_auc_score(bbiggu.target, bbiggu.pred)

0.9116595156590153

In [100]:
bbiggu

Unnamed: 0,ID_code,target,var_0,var_1,var_2,var_3,var_4,var_5,var_6,var_7,var_8,var_9,var_10,var_11,var_12,var_13,var_14,var_15,var_16,var_17,var_18,var_19,var_20,var_21,var_22,var_23,var_24,var_25,var_26,var_27,var_28,var_29,var_30,var_31,var_32,var_33,var_34,var_35,var_36,var_37,var_38,var_39,var_40,var_41,var_42,var_43,var_44,var_45,var_46,var_47,var_48,var_49,var_50,var_51,var_52,var_53,var_54,var_55,var_56,var_57,var_58,var_59,var_60,var_61,var_62,var_63,var_64,var_65,var_66,var_67,var_68,var_69,var_70,var_71,var_72,var_73,var_74,var_75,var_76,var_77,var_78,var_79,var_80,var_81,var_82,var_83,var_84,var_85,var_86,var_87,var_88,var_89,var_90,var_91,var_92,var_93,var_94,var_95,var_96,var_97,var_98,var_99,var_100,var_101,var_102,var_103,var_104,var_105,var_106,var_107,var_108,var_109,var_110,var_111,var_112,var_113,var_114,var_115,var_116,var_117,var_118,var_119,var_120,var_121,var_122,var_123,var_124,var_125,var_126,var_127,var_128,var_129,var_130,var_131,var_132,var_133,var_134,var_135,var_136,var_137,var_138,var_139,var_140,var_141,var_142,var_143,var_144,var_145,var_146,var_147,var_148,var_149,var_150,var_151,var_152,var_153,var_154,var_155,var_156,var_157,var_158,var_159,var_160,var_161,var_162,var_163,var_164,var_165,var_166,var_167,var_168,var_169,var_170,var_171,var_172,var_173,var_174,var_175,var_176,var_177,var_178,var_179,var_180,var_181,var_182,var_183,var_184,var_185,var_186,var_187,var_188,var_189,var_190,var_191,var_192,var_193,var_194,var_195,var_196,var_197,var_198,var_199
1,train_1,0,11.5006,-4.1473,13.8588,5.3890,12.3622,7.0433,5.6208,16.5338,3.1468,8.0851,-0.4032,8.0585,14.0239,8.4135,5.4345,13.7003,13.8275,-15.5849,7.8000,28.5708,3.4287,2.7407,8.5524,3.3716,6.9779,13.8910,-11.7684,-2.5586,5.0464,0.5481,-9.2987,7.8755,1.2859,19.3710,11.3702,0.7399,2.7995,5.8434,10.8160,3.6783,-11.1147,1.8730,9.8775,11.7842,1.2444,-47.3797,7.3718,0.1948,34.4014,25.7037,11.8343,13.2256,-4.1083,6.6885,-8.0946,18.5995,19.3219,7.0118,1.9210,8.8682,8.0109,-7.2417,1.7944,-1.3147,8.1042,1.5365,5.4007,7.9344,5.0220,2.2302,40.5632,0.5134,3.1701,20.1068,7.7841,7.0529,3.2709,23.4822,5.5075,13.7814,2.5462,18.1782,0.3683,-4.8210,-5.4850,13.7867,-13.5901,11.0993,7.9022,12.2301,0.4768,6.8852,8.0905,10.9631,11.7569,-1.2722,24.7876,26.6881,1.8944,0.6939,-13.6950,8.4068,35.4734,1.7093,15.1866,2.6227,7.3412,32.0888,13.9550,13.0858,6.6203,7.1051,5.3523,8.5426,3.6159,4.1569,3.0454,7.8522,-11.5100,7.5109,31.5899,9.5018,8.2736,10.1633,0.1225,12.5942,14.5697,2.4354,0.8194,16.5346,12.4205,-0.1780,5.7582,7.0513,1.9568,-8.9921,9.7797,18.1577,-1.9721,16.1622,3.6937,6.6803,-0.3243,12.2806,8.6086,11.0738,8.9231,11.7700,4.2578,-4.4223,20.6294,14.8743,9.4317,16.7242,-0.5687,0.1898,12.2419,-9.6953,22.3949,10.6261,29.4846,5.8683,3.8208,15.8348,-5.0121,15.1345,3.2003,9.3192,3.8821,5.7999,5.5378,5.0988,22.0330,5.5134,30.2645,10.4968,-7.2352,16.5721,-7.3477,11.0752,-5.5937,9.4878,-14.9100,9.4245,22.5441,-4.8622,7.6543,-15.9319,13.3175,-0.3566,7.6421,7.7214,2.5837,10.9516,15.4305,2.0339,8.1267,8.7889,18.3560,1.9518
3,train_3,0,11.0604,-2.1518,8.9522,7.1957,12.5846,-1.8361,5.8428,14.9250,-5.8609,8.2450,2.3061,2.8102,13.8463,11.9704,6.4569,14.8372,10.7430,-0.4299,15.9426,13.7257,20.3010,12.5579,6.8202,2.7229,12.1354,13.7367,0.8135,-0.9059,5.9070,2.8407,-15.2398,10.4407,-2.5731,6.1796,10.6093,-5.9158,8.1723,2.8521,9.1738,0.6665,-3.8294,-1.0370,11.7770,11.2834,8.0485,-24.6840,12.7404,-35.1659,0.7613,8.3838,12.6832,9.5503,1.7895,5.2091,8.0913,12.3972,14.4698,6.5850,3.3164,9.4638,15.7820,-25.0222,3.4418,-4.3923,8.6464,6.3072,5.6221,23.6143,5.0220,-3.9989,4.0462,0.2500,1.2516,24.4187,4.5290,15.4235,11.6875,23.6273,4.0806,15.2733,0.7839,10.5404,1.6212,-5.2896,1.6027,17.9762,-2.3174,15.6298,4.5474,7.5509,-7.5866,7.0364,14.4027,10.7795,7.2887,-1.0930,11.3596,18.1486,2.8344,1.9480,-19.8592,22.5316,18.6129,1.3512,9.3291,4.2835,10.3907,7.0874,14.3256,14.4135,4.2827,6.9750,1.6480,11.6896,2.5762,-2.5459,5.3446,38.1015,3.5732,5.0988,30.5644,11.3025,3.9618,-8.2464,2.7038,12.3441,12.5431,-1.3683,3.5974,13.9761,14.3003,1.0486,8.9500,7.1954,-1.1984,1.9586,27.5609,24.6065,-2.8233,8.9821,3.8873,15.9638,10.0142,7.8388,9.9718,2.9253,10.4994,4.1622,3.7613,2.3701,18.0984,17.1765,7.6508,18.2452,17.0336,-10.9370,12.0500,-1.2155,19.9750,12.3892,31.8833,5.9684,7.2084,3.8899,-11.0882,17.2502,2.5881,-2.7018,0.5641,5.3430,-7.1541,-6.1920,18.2366,11.7134,14.7483,8.1013,11.8771,13.9552,-10.4701,5.6961,-3.7546,8.4117,1.8986,7.2601,-0.4639,-0.0498,7.9336,-12.8279,12.4124,1.8489,4.4666,4.7433,0.7178,1.4214,23.0347,-1.2706,-2.9275,10.2922,17.9697,-8.9996
6,train_6,0,11.8091,-0.0832,9.3494,4.2916,11.1355,-8.0198,6.1961,12.0771,-4.3781,7.9232,-5.1288,-7.5271,14.1629,13.3058,7.8412,14.3363,7.5951,11.0922,21.1976,6.2946,15.8877,24.2595,8.1159,3.9769,7.6851,13.3600,-0.5156,0.0690,5.6452,4.6140,-12.3890,12.0880,-1.5290,9.2376,11.1510,6.6352,4.8462,7.0202,19.9479,-6.2271,4.4616,9.0383,10.6889,10.9480,15.0431,1.0776,14.2049,-2.0894,26.6997,18.4393,12.9840,23.2511,-3.4444,5.2735,3.7100,12.4798,18.4812,6.6104,2.7292,7.9378,15.3738,-12.6687,0.6272,2.7414,6.7854,0.4546,5.8610,13.5793,5.0178,-5.7921,21.1640,0.6386,3.7051,19.3404,29.6714,13.7166,12.5176,16.9189,5.2232,16.9441,-2.7142,12.3111,-2.5448,10.7269,-1.3374,21.5486,12.6511,6.3112,5.9346,10.4663,-9.2271,7.0140,11.1954,10.4502,11.1829,0.6137,23.7290,32.8788,2.4715,0.0483,-22.2159,17.0682,20.5977,1.8554,12.1542,4.3055,8.3469,23.3089,14.4272,13.5718,11.6967,6.6323,2.7194,4.6110,2.3090,0.3681,2.9144,19.4180,1.3141,-0.9810,23.3644,10.5135,5.7981,-5.0096,0.5866,11.8274,12.1880,1.7719,2.7085,21.7473,11.9410,0.5405,8.9913,7.4506,-13.2871,-5.7890,23.8387,7.8704,5.2588,7.4428,2.7612,10.3221,-1.7192,16.4603,8.9949,11.7397,11.4148,6.8520,3.8912,-1.3446,11.6169,10.4602,4.3166,13.1753,9.9217,-2.5725,13.7241,-10.0345,21.9814,11.1648,39.9599,5.5552,3.3459,9.2661,6.1213,23.7558,3.0298,5.9109,8.1035,6.1887,0.2619,-1.1405,25.1675,2.6965,17.0152,12.7942,-3.0403,8.1735,4.5637,3.8973,-8.1416,10.0570,15.7862,3.3593,11.9140,-4.2870,7.5015,-29.9763,17.2867,1.8539,8.7830,6.4521,3.5325,0.1777,18.3314,0.5845,9.1104,9.1143,10.8869,-3.2097
14,train_14,0,13.8080,5.0514,17.2611,8.5120,12.8517,-9.1622,5.7327,21.0517,-4.5117,6.8116,8.2028,-7.8221,13.9241,10.3896,6.8838,14.1297,14.5268,-4.8877,26.1382,4.8558,23.4624,20.8922,8.0659,4.0123,7.6241,13.3559,-13.1966,-3.8157,6.1292,5.2018,-1.6535,11.3707,2.7550,19.5342,10.6458,10.0454,0.9564,3.8333,13.0079,1.6806,7.8845,1.4269,11.0149,12.1214,7.5736,-6.5626,7.8535,-18.9700,15.6651,11.5988,12.3821,-0.1118,-4.3987,5.5246,2.6776,9.1675,13.7200,6.2372,7.6941,10.3796,3.9899,-28.1259,2.7835,0.3181,4.5298,-3.8985,5.6207,9.5731,5.0203,-5.9425,36.6398,0.9844,3.7291,8.7684,17.8056,10.5278,-0.1000,16.1371,4.2005,14.2070,4.6762,14.9304,6.2291,9.1703,-6.2429,16.0202,2.8262,16.1663,7.2524,6.9682,-0.5809,6.9044,17.2195,10.3820,9.4765,-0.3397,10.0153,29.6691,3.9740,0.7079,-7.4887,10.5649,24.8074,1.6084,8.8058,4.2805,11.3171,12.5439,14.4251,16.4421,6.2570,7.2652,5.6589,12.5741,1.7613,5.9302,2.2599,45.0588,-6.2653,3.6460,48.8792,13.4504,6.7362,1.8856,2.6005,12.3617,14.2914,0.7164,-3.0522,12.9256,13.3206,0.7150,5.5274,6.5929,-15.8721,-3.1137,34.1622,11.9432,-5.2207,15.2230,5.9190,7.9199,24.2084,10.2945,9.1527,10.0958,13.8135,-9.4187,4.1184,4.2370,17.3946,9.3863,10.4441,16.4421,15.8412,-3.0656,11.9998,-6.8458,11.0506,9.1354,21.0429,5.7070,7.2320,13.8567,8.0575,9.5069,2.8172,-10.9940,4.0863,5.1196,-2.4897,3.1915,15.8254,3.8015,26.2032,8.5550,5.5689,10.5646,-1.1600,3.2684,-4.2496,10.5889,-15.4406,1.9688,30.8736,-6.4666,4.4271,16.0274,18.4116,1.0279,1.0740,8.3220,3.2619,1.6738,17.4797,-0.0257,-3.5323,9.3439,24.4479,-5.1110
15,train_15,0,3.9416,2.6562,13.3633,6.8895,12.2806,-16.1620,5.6979,14.4573,-4.3144,7.1290,-7.0984,1.7324,14.1446,5.2403,5.0599,14.6456,7.2626,-15.3607,24.7424,17.6439,11.5724,11.4583,0.5490,2.3395,8.3642,14.2083,-7.7781,-2.0180,6.4194,3.6106,-12.7855,11.1948,1.3230,13.9853,10.7737,7.0264,1.6508,6.0989,7.5711,-4.6098,-17.7249,2.4271,10.1051,12.0460,3.2954,-34.0436,12.9922,-25.8124,14.6790,28.1191,13.3836,16.8278,7.1393,6.3741,0.3618,13.4837,13.9663,5.8798,4.3868,9.2069,9.9538,-22.7829,1.9602,-1.2211,8.7602,1.3576,4.4527,14.2933,5.0085,-5.7862,11.3269,0.9925,-0.2101,8.2387,-11.7765,19.4705,8.3826,21.8628,5.4904,14.7332,1.0263,15.3367,1.2207,-2.0234,-0.4324,23.9033,6.2974,18.8752,6.5515,4.7269,-12.4368,6.7834,10.1365,9.0939,17.3690,-0.1736,1.8590,-4.5463,2.9103,1.3449,-8.3983,17.3156,15.4252,1.6919,9.9785,4.6852,3.7102,10.1972,14.2003,23.5847,0.6544,5.0437,2.1130,0.7387,4.6307,-0.9106,2.0502,24.6517,-11.7223,5.0092,40.2209,11.8996,-3.5936,10.9542,2.1982,12.6665,12.9207,-0.4392,-7.3826,14.2616,12.4578,0.4224,6.6427,7.0095,-5.9515,4.0630,7.9128,15.3688,0.8297,4.1289,3.6205,-2.9695,21.2670,10.8972,8.6246,10.7959,8.9432,-9.8588,3.6942,8.2039,17.8921,10.4100,6.3760,13.7376,3.1786,-5.3311,12.5147,-6.9223,1.0356,3.1323,17.7009,5.4925,4.6479,10.2630,1.9567,11.3951,2.0466,5.6207,6.3211,6.0198,7.9611,0.7532,8.4928,-9.9688,20.8953,8.3705,5.5334,8.5702,10.0225,1.4850,1.5508,9.1124,-4.4539,7.3382,4.2341,2.1422,9.4803,2.4272,17.7642,-0.1420,-3.4195,0.8829,-1.9859,3.9905,22.4647,0.5129,6.5273,8.2899,12.9116,-4.9182
33,train_33,0,18.2931,0.6422,14.6849,4.1357,11.7496,-5.8369,4.6817,14.9886,-2.2271,7.8669,-0.9099,1.4323,13.8428,4.7871,9.0179,14.7911,9.3052,-2.4756,16.1478,12.8026,7.8427,4.3109,2.4078,2.6223,12.0140,13.6283,-10.7461,0.8427,5.0484,10.1792,-8.0848,9.0747,1.1911,20.6323,11.7644,4.7999,2.8661,6.1637,4.3713,2.8132,-13.9735,16.8915,10.9426,11.7858,2.5212,-48.8162,13.0342,-16.1388,8.4046,13.6391,11.6111,13.5590,5.3757,6.7519,-6.4825,17.5144,9.2793,6.5310,3.9301,8.8205,7.2708,1.7760,2.2714,4.7515,5.5032,-3.6677,5.4279,5.8031,5.0255,1.0966,7.2695,0.4799,1.1035,14.4368,31.3957,17.2018,-9.5653,16.4613,6.4285,15.8264,11.0748,13.9003,-0.7753,0.7836,-8.0995,24.3649,2.3957,4.5419,9.2813,6.0492,-15.2964,6.8267,10.6227,10.5128,10.3416,0.2063,21.3312,23.9391,1.0991,0.0556,-11.8066,1.9407,19.4214,1.4695,9.0711,3.7169,5.3836,13.0953,13.9857,11.3338,7.8508,7.0467,3.8430,2.9486,2.3705,4.3034,2.9056,26.5205,-3.1911,-1.5623,9.4743,9.3446,3.6163,4.5993,1.8528,11.9774,12.5663,-2.5901,4.3425,14.5993,11.1883,0.6989,5.4510,7.1175,-10.1045,-3.9080,21.5159,21.0003,-0.3723,-1.7066,0.8231,0.7869,7.2945,13.9577,7.2210,4.5536,10.3540,-2.2203,3.6778,1.7497,18.4672,4.8441,1.2606,13.5918,12.3124,1.6983,11.4738,-6.2848,14.3043,12.2671,19.8858,5.5611,3.4062,4.9981,-2.3543,29.6815,2.7919,-22.7562,5.2867,5.7211,1.2720,3.3131,22.6439,8.7568,9.5737,10.2499,-2.0961,15.8405,-6.0983,6.7953,3.1582,7.9270,0.5481,6.4250,24.3702,-6.9638,13.9041,5.0838,19.8157,-0.5549,2.6778,11.5770,3.7000,4.8089,18.2611,-1.7182,5.3902,9.3638,15.4329,3.4912
51,train_51,0,8.6005,6.2344,9.1086,7.7384,13.3435,0.7011,6.5702,13.8584,4.6112,7.2793,1.6776,-5.3336,13.6572,7.6000,6.9183,14.7233,9.0845,-5.7533,6.1745,14.3910,6.2788,11.0075,4.5518,2.7893,16.7314,14.0071,-6.4098,-1.8986,6.3247,8.6076,-6.2107,14.2222,0.8112,13.6237,11.4995,8.2775,5.3331,4.2348,16.0097,3.2205,-10.6715,2.6880,11.4829,11.7177,-3.5569,-1.7088,13.5046,-4.4696,5.5715,6.8567,13.2752,13.4091,7.9803,6.7645,-8.7159,11.9709,17.2929,6.0178,2.9079,7.9269,12.6546,1.4550,4.6374,0.1766,6.6993,3.5319,6.4265,3.8838,5.0296,-6.8204,37.6474,0.6969,-2.2705,9.4442,33.7339,5.0064,6.6507,25.3074,7.6679,14.5532,6.6703,15.8457,-10.0903,12.8232,-2.6722,21.8735,4.9770,-1.3653,6.4302,2.8108,-8.6799,6.8607,15.5370,9.8294,13.3620,-0.6100,11.9091,30.6094,0.4354,-0.4094,-9.6315,22.1001,6.2863,1.2469,13.7915,4.3007,9.3726,22.9712,14.2606,17.7058,7.7097,6.8456,3.4168,9.9315,4.3338,-4.4626,0.8383,22.1440,-5.2779,4.4469,20.0781,11.5652,-0.9409,15.4603,4.2883,12.6290,12.7079,1.8414,1.0616,15.3061,12.6618,0.4308,5.1395,6.9466,-11.9402,3.8283,20.8223,27.6006,1.8114,19.9157,0.8687,0.5199,5.9875,14.1901,9.0331,0.8727,9.8908,7.0243,4.2250,0.0370,14.5233,4.6920,6.7431,12.6494,9.4745,-2.9273,13.3117,1.3722,10.6380,11.1567,32.5719,5.2533,7.3757,7.6482,-1.4874,18.2404,3.4514,-5.3433,4.5564,5.2980,1.0507,-8.5987,27.4729,1.4200,22.0801,17.2346,-5.6782,13.8259,4.9250,3.6704,1.0784,8.7094,-6.0761,6.3864,14.8150,-1.9847,10.6788,-33.8779,19.2283,0.0300,6.4999,4.7458,1.0441,0.7604,22.3790,-1.6256,-4.4040,9.8296,16.9640,-13.0576
60,train_60,0,5.7733,0.6536,9.3862,6.2292,9.5133,-4.4736,5.6957,12.3160,1.4934,8.7252,0.7421,-9.9809,13.9746,5.9190,5.9355,14.0966,10.7436,-0.3776,21.9756,1.0213,6.5450,1.7762,4.0164,3.6899,5.3183,13.7419,-2.8141,-1.9635,5.1010,4.1465,-3.5565,8.4890,-2.9999,14.3298,11.3090,2.1294,-3.9135,7.4304,10.1081,0.1460,-0.0714,14.4361,11.5908,11.2855,7.7724,-44.2549,10.4269,-27.3664,4.6169,8.9421,12.5910,7.8186,-0.6988,7.0926,4.2975,20.1593,13.2107,4.9294,2.2150,10.0977,8.3844,-15.0127,1.2344,1.7780,6.8949,4.8229,8.2100,8.4748,5.0069,-6.2297,36.3464,0.7883,6.6433,26.2446,33.3568,17.7386,-3.1620,25.7014,8.6823,12.9873,12.5250,14.7448,-6.5424,14.8588,3.1686,18.0119,-7.8772,5.2012,7.6369,11.0792,-7.7179,7.0232,9.5631,10.2646,8.8481,-0.1193,21.3892,-7.3096,1.9740,-2.4347,-2.0539,11.1377,39.1237,1.5101,6.4756,5.2579,8.6925,14.4854,14.1975,20.2932,8.0731,5.1982,1.9863,9.8423,3.5241,-1.0736,1.2426,1.8234,-5.8401,3.2719,32.2223,11.3581,3.8351,11.9781,4.0976,12.7421,13.0753,-1.2144,2.4879,13.2786,11.7474,0.6606,9.4636,6.6304,-3.7147,-2.4275,34.3674,27.9313,5.1163,17.0799,3.1826,3.7402,7.1471,11.9897,6.8866,11.2846,9.4031,-4.9707,3.8915,-0.5436,18.6699,4.8440,10.6318,15.9076,6.1710,-1.0593,14.5991,-8.5041,16.1748,9.3684,10.9819,6.1418,2.6404,2.2551,-10.7614,14.7003,3.0740,-15.1533,2.8453,4.8193,0.0247,2.3501,4.9576,-6.8278,23.6304,13.9653,-8.5849,17.5712,-10.7819,3.1796,-6.8184,9.8688,-7.7013,5.7116,3.6056,-11.5501,7.6512,-15.7661,17.9262,1.5439,13.5739,4.4436,2.1518,4.5054,15.7899,-0.8621,2.3509,7.8059,16.6860,-17.4470
74,train_74,0,5.4386,2.4679,10.0766,4.4916,10.8987,-0.5161,5.9401,14.1194,6.5380,7.5313,-7.4100,-1.7023,13.9940,4.2867,7.3343,14.1209,8.1369,-2.7923,14.9253,17.4836,12.6850,18.8759,6.6304,2.0745,4.0440,14.0295,-14.0942,0.6382,5.4602,8.0811,-14.9237,8.6015,4.3052,11.5106,11.4964,6.2359,8.9139,7.4854,6.9879,1.5806,-3.0486,3.3815,11.0877,11.1101,9.3676,23.5916,11.1382,-12.7508,16.6437,17.0354,11.9425,23.3782,-10.3070,6.1258,-3.1190,13.5716,21.4224,6.2132,0.3101,8.7424,11.6551,-32.3111,2.8578,-0.5938,6.6095,2.7637,3.7439,-1.6034,5.0181,-6.6414,29.2497,0.2391,-1.1637,25.5743,5.4843,17.9062,12.4031,11.8527,4.6881,15.1468,-14.5499,15.6593,11.2148,16.1599,-4.6199,19.2066,-8.9221,8.3397,10.6098,2.7597,-23.8309,7.2395,5.7569,11.4184,13.3517,1.0772,24.6739,28.0239,2.8749,0.5442,-0.3211,17.6909,21.2034,1.5222,14.6182,4.6984,6.6016,10.8334,14.4265,23.6642,4.1696,7.6872,1.5431,17.0017,2.0904,0.1163,2.9148,13.8959,-4.9169,5.3179,18.9303,10.3661,4.9934,-1.5633,5.1519,12.4921,13.7062,2.3628,1.7117,14.6450,12.1863,1.0123,6.3532,6.4974,-0.1334,-9.2916,35.6552,31.2581,0.1915,2.1941,2.6894,-6.9773,22.6513,7.2277,8.2976,8.8074,12.4328,-9.2392,4.1106,29.6118,19.8055,14.1816,5.0201,15.5153,11.1027,6.2440,11.5636,-0.0198,30.9222,14.9281,24.4377,5.9153,6.5085,14.2035,-1.3408,14.9387,2.7590,-3.8993,1.6685,6.0122,0.0035,0.5196,19.3283,-2.2626,-0.6860,14.1973,-13.0116,11.5022,-3.8396,2.7280,2.5371,9.4293,-1.7131,10.2493,6.9334,-2.5092,9.7556,-26.0881,16.1721,2.6441,-3.8850,4.7890,-0.5674,6.3509,15.3483,-0.0128,4.6483,9.4707,19.8155,10.3066
75,train_75,0,16.7218,-3.1693,12.8215,7.9725,12.0685,-1.1816,4.7701,25.5929,4.7303,6.6577,4.1122,-0.6190,13.9584,4.8873,6.7688,14.2944,7.5067,0.8846,12.9386,23.6522,8.0394,7.2035,0.4951,2.8720,17.4438,13.5903,0.8940,-3.1813,4.8235,6.6232,-15.4055,8.9570,-0.8949,11.4128,12.0326,-0.9890,4.9696,8.2546,15.9351,-0.1115,-12.0151,2.9268,10.7497,11.4407,2.9879,-11.8665,7.9829,-5.7772,6.1922,31.2255,13.2103,17.5374,0.7364,5.6506,-14.1866,14.1972,17.0600,6.6850,6.2047,9.3281,8.1230,-10.4829,1.8170,1.2143,7.8803,3.1487,5.5495,3.9200,5.0195,5.4346,24.4176,0.1599,-3.5457,22.6393,41.1120,17.4250,-1.0364,24.7390,6.8407,15.8023,7.8556,17.5490,1.9431,6.3890,-5.5446,20.1244,12.9840,9.4338,11.2824,9.7770,-2.9243,7.0755,7.9828,10.2919,14.3765,0.4675,15.2877,19.0529,1.0956,-2.6122,-7.1220,10.4246,28.1120,1.5469,10.5357,3.0939,6.3677,19.6700,14.0832,14.9176,10.0008,7.2043,1.1477,5.7301,2.8813,-1.7110,5.5944,28.0714,-1.4925,0.9872,25.2607,11.7274,-4.7293,15.4268,1.8057,12.1407,13.8697,-4.0990,-3.4506,14.1470,12.9748,-0.3089,6.5669,6.8459,-11.9524,-6.2397,11.7661,10.3180,-8.4090,-8.5344,5.0961,1.5254,2.9453,9.2489,7.2371,4.8391,7.8121,3.4474,4.1595,12.9447,16.2501,10.7256,11.7887,20.1281,11.7909,4.6737,12.8246,-13.7780,12.0008,15.1963,16.7201,5.8183,5.6502,9.4262,6.8393,27.3746,2.6078,-7.7031,7.1138,5.0283,5.8726,2.0734,22.9716,-3.6497,12.1786,13.4813,-5.1763,12.6614,1.8675,-3.5886,-2.4120,10.0562,8.5966,6.7856,13.7409,-4.9721,7.9650,-22.9322,15.3831,0.9360,-5.4970,5.6385,5.3761,3.5912,15.1097,-1.0483,13.7565,9.0890,16.7629,-4.7062


In [23]:
col = 'var_0'

train_1[col].value_counts().value_counts()

1    16942
2     1443
3       82
4        6
Name: var_0, dtype: int64

In [28]:
col = 'var_0'

train_0.iloc[:70000][col].value_counts().value_counts()

1    36985
2    11008
3     2688
4      598
5       99
6        8
Name: var_0, dtype: int64

In [22]:
train_0[col].value_counts()

1     41175
2     24584
3     13490
4      6542
5      2742
6       958
7       335
8       100
9        24
10        8
11        2
Name: var_0, dtype: int64