Разработать модель, предсказывающую к какому из трех сегментов относится каждый клиент.

In [137]:
import pandas as pd
import numpy as np
import seaborn as sns
import matplotlib.pyplot as plt
from sklearn.ensemble import RandomForestClassifier
from sklearn.metrics import classification_report
from sklearn.feature_selection import mutual_info_classif

In [138]:
pd.set_option('display.max_columns', 10) # Снимаем ограничение по клонкам
df = pd.read_csv('contest_train.csv', sep=',') # Загрузка файла
prediction_data = pd.read_csv('contest_test.csv', sep=',')
# df.dropna(axis=1, how='any', inplace=True) # Удалеем колонны с пропусками

# Заменяем пропуски на median
df = df.fillna(df.median()) 
prediction_data = prediction_data.fillna(prediction_data.median())

In [139]:
pd.set_option('display.max_columns', 300) # Снимаем ограничение по клонкам
df = pd.read_csv('contest_train.csv', sep=',') # Загрузка файла

In [140]:
df['FEATURE_261'] = df["FEATURE_150"].fillna('нет').apply(lambda x: 1 if x == 'нет' else 0)
df['FEATURE_260'] = df["FEATURE_123"].fillna('нет').apply(lambda x: 1 if x == 'нет' else 0)
colnames = df.columns.tolist()
colnames = colnames[:-3] + colnames[:-4:-1]
df = df[colnames]


In [141]:
def feature_selector(df, columns_list=False, max_one_elemtnt_k = 0.9, max_nan_elemtnt_k = 0.7, max_corr = 0.9):
    '''
    Удаляем из df фичи которые имеют один уникальный элемент,
    много одинаковых элементов выше попрогового значения,
    пропущенные элементы выше порогового значения,
    Коррелирующие между собой фичи выше порогового значения. 
    '''
    drop_list = []
    max_one_elemtnt = df.count()[0] * max_one_elemtnt_k # Считаем границу количество уникального элемента
    max_nan_elemtnt = df.count()[0] * max_nan_elemtnt_k # Считаем границу пропусков в столбце

    if not columns_list: # если не добавил колонки для проверки, берем все, 
        columns = list(df.columns)[1:-1]    # либо настраиваем тут. 

    for col in columns: # первые 3-и проверки
        if (len(df[col].unique()) == 1) or \
           (df[col].value_counts().tolist()[0] > max_one_elemtnt) or \
           (df[col].isnull().sum() > max_nan_elemtnt):
            drop_list.append(col)

    df_corr  = df.corr().unstack().reset_index() # корреляции
    hig_corr = df_corr[(np.abs(df_corr[0]) > max_corr) & (df_corr['level_0'] != df_corr['level_1'])]
    couple   = hig_corr[['level_0', 'level_1']].values.tolist()
    
    while couple:
        _, two = couple[0]
        drop_list.append(two)
        for el in couple:
            if two in el:
                couple.pop(couple.index(el))

    return df.drop(columns=set(drop_list), axis=1)



In [143]:
def normal_colum(n):
    if n < Q1:
        return Q1 - 1
    elif n > Q3:
        return Q3 + 1
    return n

def normal_df(df):
    columns = list(df.columns)[1:-1]
    for col in columns:
        global Q1, Q3
        Q1 = df[col].quantile(0.25)
        Q3 = df[col].quantile(0.75)

        if len(df[col].unique()) <= 2:
            df[col] = df[col].fillna(df[col].median())
        else:
            df[col] = df[col].apply(normal_colum)
            df[col] = df[col].fillna(df[col].min() - 1)
    return df


def normal_colum2(n):
    if n < Q1:
        return 0
    elif Q1 <= n < Q2:
        return 1
    elif Q2 <= n < Q3:
        return 2 
    return 3
def normal_df2(df):
    columns = list(df.columns)[1:-1]
    for col in columns:
        global Q1, Q2, Q3
        Q1 = df[col].quantile(0.25)
        Q2 = df[col].median()
        Q3 = df[col].quantile(0.75)

        if len(df[col].unique()) <= 10:
            df[col] = df[col].fillna(df[col].median())
        else:
            df[col] = df[col].apply(normal_colum2)
            df[col] = df[col].fillna(-1)
    return df

In [144]:
df = feature_selector(df)
df = normal_df2(df)

In [145]:
train_df = pd.concat([
    df[df['TARGET'] == 0].head(1200),
    df[df['TARGET'] == 1].head(1200),
    df[df['TARGET'] == 2].head(1200)
])

In [None]:
columns = list(train_df.columns)[1:-1]
for col in columns:
    # train_df.boxplot(column=col)
    plt.figure(figsize=(10,7))
    bins = int(train_df[col].max()/10)
    plt.hist(train_df[col])
    plt.xlabel(col)
    plt.ylabel('count')
    plt.show()



In [124]:
df['targetBin'] = df["TARGET"].apply(lambda x: 0 if x in [0, 1] else 2).values # Для предсказания 2
myTrain = df.iloc[:15000, :] # На чем будем учиться
myTest  = df.iloc[15000:, :] # Для предсказания, для сравнения подходов
features = df.columns.tolist()[1:-2]

In [147]:
from sklearn.feature_selection import SelectKBest
from sklearn.feature_selection import chi2
X = train_df[features]
y = train_df['targetBin']
bestfeatures = SelectKBest(score_func=chi2, k=10)
fit = bestfeatures.fit(X,y)
dfscores = pd.DataFrame(fit.scores_)
dfcolumns = pd.DataFrame(X.columns)
featureScores = pd.concat([dfcolumns,dfscores],axis=1)
featureScores.columns = ['Specs','Score']  #naming the dataframe columns
features1 = featureScores.nlargest(100,'Score')['Specs'].tolist()

In [148]:
from sklearn.ensemble import ExtraTreesClassifier
import matplotlib.pyplot as plt
model = ExtraTreesClassifier()
model.fit(X,y)
feat_importances = pd.Series(model.feature_importances_, index=X.columns)
features2 = list(feat_importances.nlargest(100).to_dict().keys())

In [149]:
features = set(features1) & set(features2)
len(features)

80

In [65]:
# Фичи которые будем использовать для Подходов
df_corr = df.corr().unstack().reset_index()
hig_corr_features = df_corr[(np.abs(df_corr[0]) > 0.15) & (df_corr['level_0'] != df_corr['level_1'])]
features = hig_corr_features[hig_corr_features['level_0'] == 'TARGET']['level_1'].values[1:-1]

In [146]:
features = train_df.columns.tolist()[1:-1]
df['targetBin'] = df["TARGET"].apply(lambda x: 0 if x in [0, 1] else 2).values
train_df['targetBin'] = train_df["TARGET"].apply(lambda x: 0 if x in [0, 1] else 2).values
myTrain  = train_df
myTest   = df

In [150]:
# Модель Random Forest сразу на классификацию
X = myTrain[features].values
y = myTrain['TARGET'].values
model_all = RandomForestClassifier()
model_all.fit(X, y)

# Тест + запись 
Xtest    = myTest[features].values
predict  = model_all.predict(Xtest)
myTest['tRFall'] = predict


  X = myTrain[features].values
  Xtest    = myTest[features].values


In [151]:
# Модель для предсказание сначала 2-к потом 1-ц

two  = myTrain[myTrain['targetBin'] == 2]
zero = myTrain[myTrain['targetBin'] == 0].head(1000)
myTrainBin = pd.concat([two, zero])

In [152]:
X = myTrainBin[features].values
y = myTrainBin['targetBin'].values

model_search_two = RandomForestClassifier()
model_search_two.fit(X, y)

# Результат предсказания 2
Xtest = myTest[features].values
predict = model_search_two.predict(Xtest)
myTest['tRF01'] = predict

  X = myTrainBin[features].values
  Xtest = myTest[features].values


In [153]:
# Находим 1
one  = myTrain[myTrain['TARGET'] == 1]
zero = myTrain[myTrain['TARGET'] == 0].head(3500)
myTrainbin01 = pd.concat([one, zero])
myTrainbin01['TARGET'].value_counts()

1    1200
0    1200
Name: TARGET, dtype: int64

In [154]:
X = myTrainbin01[features].values
y = myTrainbin01['TARGET'].values

model_search_one = RandomForestClassifier(n_estimators=100)
model_search_one.fit(X, y)

Xtest = myTest[features].values
predict = model_search_one.predict(Xtest)
myTest['TresultBin'] = predict

  X = myTrainbin01[features].values
  Xtest = myTest[features].values


In [155]:
# объединяем результат работы поиска 1 и 2
result = []
list_ = list(myTest[['tRF01', 'TresultBin']].values)
for i, j in list_:
    result.append(2 if i == 2 else j)
myTest['TresultBin'] = result

In [156]:
# Сравнение 2-х вариантов 
print("Результат работы раздельно:")
print(classification_report(myTest['TARGET'], myTest['TresultBin']))

print("Результат поиска сразу всего:")
print(classification_report(myTest['TARGET'], myTest['tRFall']))

Результат работы раздельно:
              precision    recall  f1-score   support

           0       0.86      0.48      0.61     13029
           1       0.34      0.47      0.39      4237
           2       0.21      1.00      0.35      1124

    accuracy                           0.51     18390
   macro avg       0.47      0.65      0.45     18390
weighted avg       0.70      0.51      0.55     18390

Результат поиска сразу всего:
              precision    recall  f1-score   support

           0       0.86      0.52      0.65     13029
           1       0.38      0.59      0.46      4237
           2       0.29      1.00      0.45      1124

    accuracy                           0.56     18390
   macro avg       0.51      0.70      0.52     18390
weighted avg       0.71      0.56      0.59     18390



In [33]:
# Результат предсказания 2
X1 = prediction_data[features].values
predict = model_search_two.predict(X1)
prediction_data['TARGETpred'] = predict

X2 = prediction_data[features].values
predict = model_search_one.predict(X2)
prediction_data['TARGET'] = predict

result = []
list_ = list(prediction_data[['TARGETpred', 'TARGET']].values)
for i, j in list_:
    result.append(2 if i == 2 else j)
prediction_data['TARGET'] = result


In [34]:
prediction_data

Unnamed: 0,ID,FEATURE_0,FEATURE_1,FEATURE_2,FEATURE_3,FEATURE_4,FEATURE_5,FEATURE_6,FEATURE_7,FEATURE_8,FEATURE_9,FEATURE_10,FEATURE_11,FEATURE_12,FEATURE_13,FEATURE_14,FEATURE_15,FEATURE_16,FEATURE_17,FEATURE_18,FEATURE_19,FEATURE_20,FEATURE_21,FEATURE_22,FEATURE_23,FEATURE_24,FEATURE_25,FEATURE_26,FEATURE_27,FEATURE_28,FEATURE_29,FEATURE_30,FEATURE_31,FEATURE_32,FEATURE_33,FEATURE_34,FEATURE_35,FEATURE_36,FEATURE_37,FEATURE_38,FEATURE_39,FEATURE_40,FEATURE_41,FEATURE_42,FEATURE_43,FEATURE_44,FEATURE_45,FEATURE_46,FEATURE_47,FEATURE_48,FEATURE_49,FEATURE_50,FEATURE_51,FEATURE_52,FEATURE_53,FEATURE_54,FEATURE_55,FEATURE_56,FEATURE_57,FEATURE_58,FEATURE_59,FEATURE_60,FEATURE_61,FEATURE_62,FEATURE_63,FEATURE_64,FEATURE_65,FEATURE_66,FEATURE_67,FEATURE_68,FEATURE_69,FEATURE_70,FEATURE_71,FEATURE_72,FEATURE_73,FEATURE_74,FEATURE_75,FEATURE_76,FEATURE_77,FEATURE_78,FEATURE_79,FEATURE_80,FEATURE_81,FEATURE_82,FEATURE_83,FEATURE_84,FEATURE_85,FEATURE_86,FEATURE_87,FEATURE_88,FEATURE_89,FEATURE_90,FEATURE_91,FEATURE_92,FEATURE_93,FEATURE_94,FEATURE_95,FEATURE_96,FEATURE_97,FEATURE_98,FEATURE_99,FEATURE_100,FEATURE_101,FEATURE_102,FEATURE_103,FEATURE_104,FEATURE_105,FEATURE_106,FEATURE_107,FEATURE_108,FEATURE_109,FEATURE_110,FEATURE_111,FEATURE_112,FEATURE_113,FEATURE_114,FEATURE_115,FEATURE_116,FEATURE_117,FEATURE_118,FEATURE_119,FEATURE_120,FEATURE_121,FEATURE_122,FEATURE_123,FEATURE_124,FEATURE_125,FEATURE_126,FEATURE_127,FEATURE_128,FEATURE_129,FEATURE_130,FEATURE_131,FEATURE_132,FEATURE_133,FEATURE_134,FEATURE_135,FEATURE_136,FEATURE_137,FEATURE_138,FEATURE_139,FEATURE_140,FEATURE_141,FEATURE_142,FEATURE_143,FEATURE_144,FEATURE_145,FEATURE_146,FEATURE_147,FEATURE_148,FEATURE_149,FEATURE_150,FEATURE_151,FEATURE_152,FEATURE_153,FEATURE_154,FEATURE_155,FEATURE_156,FEATURE_157,FEATURE_158,FEATURE_159,FEATURE_160,FEATURE_161,FEATURE_162,FEATURE_163,FEATURE_164,FEATURE_165,FEATURE_166,FEATURE_167,FEATURE_168,FEATURE_169,FEATURE_170,FEATURE_171,FEATURE_172,FEATURE_173,FEATURE_174,FEATURE_175,FEATURE_176,FEATURE_177,FEATURE_178,FEATURE_179,FEATURE_180,FEATURE_181,FEATURE_182,FEATURE_183,FEATURE_184,FEATURE_185,FEATURE_186,FEATURE_187,FEATURE_188,FEATURE_189,FEATURE_190,FEATURE_191,FEATURE_192,FEATURE_193,FEATURE_194,FEATURE_195,FEATURE_196,FEATURE_197,FEATURE_198,FEATURE_199,FEATURE_200,FEATURE_201,FEATURE_202,FEATURE_203,FEATURE_204,FEATURE_205,FEATURE_206,FEATURE_207,FEATURE_208,FEATURE_209,FEATURE_210,FEATURE_211,FEATURE_212,FEATURE_213,FEATURE_214,FEATURE_215,FEATURE_216,FEATURE_217,FEATURE_218,FEATURE_219,FEATURE_220,FEATURE_221,FEATURE_222,FEATURE_223,FEATURE_224,FEATURE_225,FEATURE_226,FEATURE_227,FEATURE_228,FEATURE_229,FEATURE_230,FEATURE_231,FEATURE_232,FEATURE_233,FEATURE_234,FEATURE_235,FEATURE_236,FEATURE_237,FEATURE_238,FEATURE_239,FEATURE_240,FEATURE_241,FEATURE_242,FEATURE_243,FEATURE_244,FEATURE_245,FEATURE_246,FEATURE_247,FEATURE_248,FEATURE_249,FEATURE_250,FEATURE_251,FEATURE_252,FEATURE_253,FEATURE_254,FEATURE_255,FEATURE_256,FEATURE_257,FEATURE_258,FEATURE_259,TARGET,TARGETpred,TARGET2
0,84728433,0.0,0.0,0.0,0.0,0.0,0.0,0.0,35.265322,110.813724,0.0,0.0,1.0,154.726900,1.0,1.0,1.0,0.0,1.0,0.0,0.0,0.0,-13.857818,0.0,61.287620,30.607010,0.0,35.720469,0.0,0.0,0.0,0.0,0.0,0.0,-13.363083,27.042315,121.164445,410.777662,418.184730,288.779844,0.0,0.0,0.0,0.0,0.0,0.0,9.814280,127.316255,140.253660,224.746131,165.411893,475.424812,26.583890,146.018837,265.737867,604.491503,659.827819,800.933376,49.935832,129.457907,151.852115,101.333077,58.323627,279.256776,67.445173,235.620404,363.524021,469.295854,313.095897,523.738616,28.0,29.0,11.115539,-4.006499,-13.473595,1.0,0.0,0.096774,79.023306,143.448293,562.376815,37.242363,157.809322,83.678082,89.993600,351.394700,1.210967,-0.335548,5.621130,41.154574,2.256960,15.278167,22.153717,63.346740,238.329810,15.212931,0.273242,2.720437,20.491297,21.948040,4.242742,0.716410,7.035782,82.186024,78.854780,10.822625,0.128304,1.059200,12.566417,26.107138,10.843584,0.000000,1.333333,22.819023,31.817500,17.333333,9.0,24.0,0.0,0.0,0.0,0.0,16.0,63.0,1.0,45.0,31.0,14.0,31.492206,154.803534,98.112236,49.605037,1.0,15.0,5.0,10.0,87.048613,83.920097,13.972679,70.071872,0.0,0.0,0.0,3.0,11.563522,0.0,0.0,0.0,104.859955,0.33,0.0,2.0,0.0,0.0,2.0,0.0,1.0,0.0,0.0,1.0,0.0,577.055808,528.333427,2672.093493,524.078761,1015.947939,1.0,6.0,2.0,64.973581,3.666667,30.666667,33.0,0.0,71.0,3.0,49.0,0.0,989.530150,0.0,0.0,31.0,251.743448,4523.730126,203.852733,1.0,0.0,4540.695088,544.881460,29.533360,214.0,177.859683,188.519064,557.161853,201.416260,0.0,27.075023,4.0,1.0,1.0,26.0,0.0,0.0,0.0,1.0,1.0,1.0,1.0,1.0,1.0,0.0,1.0,1.0,2.0,2.0,2.0,89.0,136.0,346.605551,1.0,1.0,1.0,124.0,135.0,112.181055,1.0,296.639806,19.0,214.0,222.735520,0.0,424.661172,0.0,279.008933,722.177313,0.0,115.394387,269.617846,473.058974,40.0,142.0,293.151579,0.0,0.0,0.0,417.527713,0.0,0.0,0.0,23.0,0.0,0.0,0.0,436.883761,427.497873,0.0,1.0,0.0,1.0,1.0,1.0,0,0,0
1,335016156,0.0,0.0,0.0,0.0,0.0,0.0,0.0,51.316538,14.427879,0.0,0.0,0.0,-62.499009,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,-0.680881,0.0,-2.500994,-0.107472,0.0,4.270219,0.0,0.0,0.0,0.0,0.0,0.0,-47.851014,-98.661813,78.362264,54.625172,-79.215239,95.849191,0.0,0.0,0.0,0.0,0.0,0.0,-3.729714,-5.390320,59.596394,-8.144886,-29.370469,-15.474226,13.463733,-2.534146,7.687234,-10.348282,180.504503,47.176406,262.147640,176.572125,94.720758,76.367749,143.439009,114.212611,367.214030,468.003983,251.627321,142.895852,254.434765,198.949016,29.0,0.0,95.248258,43.534728,-11.402512,1.0,0.0,0.064516,32.115236,18.794719,-56.662569,30.353493,41.206796,66.303888,86.586234,143.071468,0.310601,-0.024114,-1.409140,12.162496,3.760033,4.165384,-2.156905,29.711738,68.111690,-7.611607,0.132147,-0.060258,17.958978,30.583049,1.540248,-0.049512,-0.903518,19.672039,40.475526,2.121147,0.651182,-0.108471,9.777451,35.645461,4.533710,0.333333,0.000000,3.793839,16.547220,2.333333,8.0,13.0,0.0,0.0,0.0,0.0,17.0,27.0,1.0,9.0,1.0,8.0,91.364242,60.674745,-1.344777,37.938661,1.0,28.0,16.0,12.0,53.798946,77.184822,37.842007,42.475568,0.0,0.0,0.0,0.0,-0.725992,0.0,0.0,0.0,-0.947521,0.09,1.0,2.0,0.0,0.0,3.0,0.0,2.0,0.0,0.0,2.0,0.0,137.423552,-6.397857,901.178075,135.460172,326.264743,2.0,14.0,4.0,31.294883,2.000000,27.666667,0.0,0.0,28.0,2.0,57.0,0.0,-0.089924,0.0,0.0,29.0,1545.820891,3585.106553,162.196058,1.0,0.0,3620.728661,1362.837466,166.237214,214.0,204.204721,230.135871,220.672583,207.089619,0.0,54.695225,2.0,1.0,0.0,8.0,0.0,0.0,0.0,2.0,1.0,1.0,2.0,1.0,1.0,0.0,0.0,0.0,0.0,0.0,0.0,72.0,82.0,18.464351,2.0,1.0,1.0,28.0,68.0,-42.026531,0.0,101.255562,20.0,170.0,199.235015,0.0,28.621848,0.0,1.191704,-109.873571,0.0,117.895003,134.120558,250.132950,8.0,35.0,101.391468,0.0,0.0,0.0,-22.853844,0.0,0.0,0.0,24.0,0.0,0.0,0.0,53.400612,58.863404,1.0,1.0,0.0,1.0,1.0,1.0,2,2,0
2,1163752045,16.0,4.0,0.0,0.0,0.0,0.0,1.0,457.087009,1172.556882,2.0,1.0,1.0,-17.145833,2.0,1.0,0.0,0.0,1.0,0.0,1.0,0.0,-11.121329,0.0,31.933272,27.815267,0.0,5.226699,0.0,0.0,0.0,0.0,0.0,0.0,-5.404270,-6.110141,5.470973,-0.292772,-39.593356,-14.416002,0.0,0.0,0.0,0.0,0.0,0.0,272.184785,106.602940,33.961967,277.662646,399.912122,484.590597,324.704869,108.528499,-61.390997,321.288774,337.036135,558.290315,196.053520,46.100052,171.799937,166.245644,111.544584,238.882497,448.598988,394.099260,496.514801,495.207145,338.143227,500.077092,30.0,31.0,-21.422133,11.828326,17.603807,1.0,0.0,0.032258,62.085319,151.400162,209.457189,76.819613,221.861262,72.282813,138.238261,334.622030,7.273960,8.518962,14.908471,17.191215,2.307109,66.870537,25.605786,48.643883,82.119564,-15.755201,0.225200,1.135365,25.187876,22.629747,1.436549,0.044223,0.925596,92.064488,72.286481,4.623721,-0.040603,0.611225,16.175853,29.030551,4.204346,0.000000,2.000000,41.745388,65.982376,10.166667,11.0,20.0,35.0,58.0,48.0,105.0,40.0,90.0,0.0,14.0,6.0,8.0,44.563811,40.177705,36.604279,19.730721,1.0,26.0,8.0,18.0,56.935865,116.486248,76.679382,60.181222,0.0,0.0,0.0,0.0,0.260596,0.0,0.0,0.0,274.277040,0.59,0.0,1.0,0.0,0.0,1.0,0.0,1.0,0.0,0.0,1.0,0.0,1065.965937,1043.085767,5356.638898,1136.259974,2300.066663,2.0,14.0,4.0,54.546860,5.000000,30.666667,75.0,24.0,72.0,4.0,22.0,0.0,18.500167,0.0,5.0,31.0,860.712436,806.015598,530.846055,1.0,0.0,754.090823,190.119208,4.008125,214.0,204.204721,230.135871,220.672583,207.089619,0.0,-6.833609,1.0,0.0,0.0,3.0,0.0,0.0,0.0,1.0,0.0,1.0,1.0,0.0,1.0,1.0,1.0,0.0,1.0,1.0,2.0,63.0,62.0,176.618065,1.0,2.0,1.0,60.0,85.0,108.249053,1.0,779.800830,15.0,192.0,202.892636,0.0,-4.097048,0.0,377.752387,406.987244,0.0,217.211243,288.221858,484.982885,90.0,150.0,642.775704,0.0,0.0,148.0,-22.495845,0.0,0.0,3.0,22.0,0.0,131.0,151.0,217.419547,490.478263,0.0,1.0,0.0,1.0,1.0,1.0,1,0,0
3,84929758,2.0,1.0,0.0,0.0,0.0,0.0,0.0,29.672314,58.511494,0.0,0.0,0.0,622.518469,4.0,3.0,0.0,1.0,0.0,0.0,0.0,0.0,36.869571,0.0,-69.482265,-34.027897,0.0,19.560967,0.0,0.0,0.0,0.0,0.0,0.0,33.413963,65.485910,0.717419,57.305712,107.853108,21.628844,0.0,0.0,0.0,0.0,0.0,0.0,140.148089,462.894188,902.892019,391.590823,230.366192,78.783889,-53.466628,424.856104,832.246795,418.775251,182.491586,204.328222,299.323227,326.080509,173.905197,119.226387,89.828696,158.276295,536.031118,764.864626,625.546906,373.501384,379.633614,372.151992,31.0,25.0,-22.053791,-3.780297,-3.517892,0.0,0.0,0.000000,53.462271,187.906927,173.742758,78.365581,277.468307,79.478661,74.983638,223.793096,6.119733,0.605527,6.460056,24.674952,12.058002,24.532497,11.470307,36.805413,34.598617,-0.749851,-0.053978,0.341612,6.250069,40.999242,2.636278,-0.428281,6.743686,18.136268,110.917181,7.164464,-0.031869,0.203947,5.152259,41.317509,4.063060,0.000000,0.166667,18.428394,116.742614,11.666667,10.0,20.0,0.0,0.0,0.0,0.0,22.0,62.0,1.0,44.0,21.0,23.0,49.016353,54.738705,13.884631,29.405753,1.0,87.0,35.0,52.0,53.878911,87.775057,19.603175,37.106950,0.0,0.0,0.0,5.0,5.352690,0.0,0.0,0.0,124.469038,0.33,0.0,5.0,0.0,0.0,5.0,0.0,5.0,0.0,0.0,5.0,0.0,4.508058,998.252839,3950.304968,984.024698,1880.128327,0.0,4.0,2.0,72.685501,3.666667,30.666667,0.0,0.0,64.0,3.0,56.0,65.0,2426.134827,0.0,0.0,31.0,320.813555,4475.833949,95.313642,1.0,84.0,4497.486003,190.119208,9.704608,214.0,204.204721,230.135871,220.672583,207.089619,0.0,-25.502700,304.0,0.0,0.0,37.0,0.0,0.0,0.0,2.0,0.0,1.0,2.0,0.0,1.0,1.0,1.0,2.0,1.0,1.0,1.0,80.0,170.0,346.535615,3.0,1.0,1.0,30.0,65.0,107.464263,2.0,699.030798,0.0,53.0,-17.804364,0.0,79.934708,0.0,246.160625,385.577306,0.0,173.509277,184.734672,288.500644,42.0,92.0,676.458077,0.0,0.0,0.0,707.880192,0.0,0.0,0.0,0.0,0.0,0.0,0.0,657.453708,692.191717,1.0,1.0,0.0,2.0,1.0,1.0,0,0,0
4,143090121,2.0,1.0,0.0,0.0,1.0,0.0,0.0,522.148408,2724.481080,1.0,1.0,0.0,1414.701598,18.0,12.0,0.0,0.0,0.0,0.0,0.0,0.0,0.880909,0.0,-13.217971,-7.161856,0.0,-9.039134,0.0,0.0,0.0,0.0,0.0,0.0,81.712738,-0.636251,44.802230,-23.565437,-7.226179,-1.940260,0.0,0.0,2.0,0.0,0.0,0.0,6362.809539,7222.282300,3134.795175,2608.983012,2409.180206,2011.581650,6427.645276,7175.293259,3144.596090,2387.063199,2306.268344,2015.803467,376.699258,385.942506,413.022847,538.161707,602.260548,431.215160,692.210752,808.527159,957.392458,894.749362,830.269836,639.374360,31.0,31.0,7.842119,-12.496924,12.673403,0.0,0.0,0.000000,71.457228,601.390146,1758.848923,40.078339,228.205732,74.110533,225.576487,568.322811,0.624839,5.940137,11.982950,22.367449,7.708757,-13.231982,196.866226,197.046282,401.259204,94.930525,-0.136109,1.229716,8.232849,36.863758,1.324160,2.317591,6.585478,43.247536,215.726167,10.049714,0.072394,0.910945,12.612685,36.001463,2.095903,0.333333,2.166667,29.326498,86.697234,4.666667,9.0,21.0,0.0,0.0,0.0,0.0,25.0,102.0,0.0,9.0,3.0,6.0,40.736817,132.035953,79.834645,47.680863,0.0,28.0,2.0,26.0,96.117272,104.994637,26.748551,101.311986,0.0,0.0,0.0,0.0,0.524242,0.0,0.0,0.0,1277.628426,0.84,0.0,2.0,0.0,0.0,2.0,0.0,1.0,0.0,0.0,1.0,0.0,2112.698723,2009.668960,2361.250053,2154.538368,6505.556723,2.0,15.0,8.0,153.975739,21.000000,30.666667,0.0,0.0,170.0,23.0,43.0,0.0,4063.311706,0.0,0.0,31.0,89.376404,3726.265845,735.626828,1.0,562.0,3717.047758,101.952365,-68.580536,214.0,122.605731,199.981677,169.604781,186.498528,0.0,2.074494,2.0,0.0,0.0,10.0,0.0,0.0,0.0,1.0,1.0,1.0,1.0,1.0,1.0,4.0,4.0,3.0,3.0,3.0,3.0,85.0,116.0,2218.456566,1.0,1.0,2.0,83.0,101.0,309.469092,2.0,2229.095009,63.0,383.0,452.270935,0.0,-34.062830,0.0,2371.187143,2338.368691,0.0,542.545439,295.343217,785.043493,432.0,198.0,1595.408553,0.0,0.0,22.0,554.123915,0.0,0.0,167.0,173.0,0.0,50.0,188.0,1253.064074,1670.514312,0.0,1.0,0.0,1.0,1.0,2.0,0,0,0
...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...
6126,87822820,2.0,2.0,1.0,0.0,0.0,0.0,0.0,910.007459,18.527934,1.0,1.0,1.0,904.478054,21.0,5.0,0.0,0.0,0.0,0.0,0.0,0.0,6.389485,0.0,-26.430280,-31.202674,0.0,-27.496064,0.0,0.0,0.0,0.0,0.0,0.0,60.417864,25.455629,49.752596,-28.489181,-44.424639,66.527319,0.0,0.0,0.0,0.0,0.0,0.0,25312.382243,26597.458139,12123.586137,16044.127236,8345.796577,7380.943704,25307.148119,26580.741872,12084.537189,16126.239299,8292.023701,7423.064784,1058.548561,1060.735289,1431.750786,1601.626078,961.309388,543.962491,2054.266697,2153.368946,2455.164887,2714.130961,1593.323973,1244.880226,31.0,31.0,11.941033,6.816641,-6.779952,0.0,0.0,0.000000,73.972312,2746.090431,7904.370694,151.813875,553.030910,74.191523,440.702492,1411.262370,1.616214,6.965926,13.708016,23.672972,5.569049,133.106414,689.440489,1033.038145,1753.389350,496.206085,0.084278,2.936752,25.869752,20.997459,1.012159,-1.386815,37.782125,356.420579,303.504644,14.260329,0.057658,1.820767,15.528316,30.195257,2.465661,0.333333,10.166667,87.763861,163.847568,14.000000,31.0,64.0,0.0,0.0,0.0,0.0,36.0,78.0,1.0,107.0,27.0,80.0,89.156076,155.928636,25.727423,109.861071,1.0,69.0,26.0,43.0,75.871513,100.472830,17.602313,122.134606,0.0,0.0,0.0,33.0,141.238111,0.0,0.0,0.0,154.783274,0.05,6.0,2.0,0.0,0.0,9.0,0.0,1.0,0.0,0.0,1.0,0.0,816.849283,691.591947,4342.248709,780.844206,2221.910754,3.0,35.0,13.0,49.107592,2.000000,30.666667,0.0,0.0,65.0,4.0,44.0,0.0,1332.194100,0.0,0.0,31.0,678.752600,3491.294316,534.694783,1.0,119.0,3576.155296,190.119208,-0.754596,214.0,204.204721,230.135871,220.672583,207.089619,0.0,-2.145352,0.0,0.0,0.0,4.0,0.0,0.0,0.0,2.0,0.0,1.0,2.0,0.0,1.0,6.0,6.0,5.0,5.0,4.0,4.0,357.0,543.0,8322.544045,1.0,1.0,1.0,514.0,761.0,4796.591627,0.0,758.796116,85.0,461.0,542.174723,0.0,-17.252687,0.0,10622.082164,10663.451665,0.0,1011.922173,719.412424,1786.692185,53.0,97.0,510.401903,0.0,0.0,320.0,311.966284,0.0,0.0,0.0,0.0,0.0,5.0,320.0,373.468995,652.534787,1.0,1.0,0.0,1.0,1.0,1.0,0,0,0
6127,648084027,1.0,1.0,0.0,0.0,0.0,0.0,0.0,0.461696,-25.037712,0.0,0.0,0.0,69.340378,2.0,1.0,0.0,1.0,0.0,0.0,0.0,0.0,5.100396,0.0,-4.143027,-5.297602,0.0,19.522508,0.0,0.0,0.0,0.0,0.0,0.0,522.094875,603.430330,2666.679896,67.087204,-54.191881,33.704608,0.0,0.0,0.0,0.0,0.0,0.0,4360.407998,33.117447,252.984709,-12.106592,-13.705342,2.901319,4864.878198,612.380113,2832.439897,9.985136,-4.005775,20.355772,368.269474,580.907341,505.069497,386.518170,392.875192,377.927878,648.683207,974.137291,854.579306,823.529312,783.345411,1019.199812,31.0,0.0,130.308857,68.695634,79.217836,0.0,0.0,0.000000,1.802551,-8.393769,-12.759748,46.019479,171.658881,70.498874,262.734450,670.029126,0.128642,-0.534977,0.257724,-0.077959,0.308475,-41.859630,-29.126763,88.882322,-5.984864,-59.331787,0.000574,0.327878,13.298113,35.182144,2.011763,1.266981,1.508482,79.796706,200.559485,10.758858,0.277254,-0.650184,8.103343,31.403629,11.025610,0.333333,0.166667,12.247053,54.882439,16.500000,6.0,17.0,0.0,0.0,0.0,0.0,10.0,42.0,1.0,4.0,2.0,2.0,48.223270,60.807168,45.354852,19.266444,1.0,48.0,31.0,17.0,42.583654,536.272890,306.249562,227.275348,0.0,0.0,0.0,0.0,0.035202,0.0,0.0,0.0,245.251501,0.25,0.0,2.0,0.0,0.0,2.0,0.0,2.0,0.0,0.0,2.0,0.0,571.321486,431.508739,2363.119125,399.088826,1271.105575,3.0,15.0,8.0,50.348248,4.666667,30.000000,0.0,0.0,36.0,5.0,29.0,0.0,1115.859340,0.0,0.0,31.0,998.145612,945.938318,205.575632,1.0,38.0,902.075517,190.119208,25.195468,214.0,204.204721,230.135871,220.672583,207.089619,117.0,99.356452,5.0,0.0,0.0,9.0,0.0,0.0,0.0,2.0,1.0,1.0,2.0,1.0,1.0,3.0,2.0,3.0,0.0,0.0,0.0,94.0,145.0,1994.367851,1.0,2.0,1.0,35.0,148.0,54.768166,1.0,353.919081,42.0,250.0,291.210439,0.0,39.118080,0.0,-46.264096,-47.716517,0.0,334.452197,484.894106,875.451403,76.0,218.0,502.555489,0.0,0.0,0.0,208.077672,0.0,0.0,0.0,6.0,0.0,85.0,0.0,220.856363,347.157907,1.0,1.0,0.0,1.0,1.0,1.0,1,0,0
6128,1402792850,14.0,6.0,1.0,0.0,0.0,0.0,0.0,868.853981,4.501930,3.0,1.0,1.0,41.709484,1.0,1.0,0.0,0.0,0.0,0.0,1.0,0.0,203.339456,-100.0,2.506877,-7.133139,2.6,-30.553400,0.0,0.0,0.0,0.0,0.0,0.0,6.625226,29.402575,-39.601529,74.430794,-91.266388,3030.350822,0.0,0.0,0.0,0.0,2043.0,0.0,24246.219563,23146.481575,28794.462452,35006.075538,14936.517757,26904.490331,24362.412040,23166.735789,28826.013425,34945.672393,14940.208779,29962.803500,151.341436,124.979547,39.083354,47.818571,14.532499,-46.720640,246.128675,89.798499,57.661637,186.309487,110.531735,-12.720493,11.0,31.0,2.451367,-25.614898,16.096153,18.0,0.0,5.580645,58.010497,9122.436568,17501.890761,-2.581570,-2.331823,76.284392,22.775002,117.347792,3.116257,10.052012,16.955473,16.739790,4.547834,454.569152,1894.010409,2929.650260,2978.301581,642.083368,0.065773,1.187792,17.965704,31.094970,0.744945,0.824140,0.946961,3.174839,20.699437,-0.549458,-0.135089,0.819727,12.694951,32.907503,1.895472,0.000000,0.333333,4.840994,9.397419,0.833333,3.0,2.0,0.0,0.0,0.0,0.0,5.0,5.0,0.0,16.0,8.0,8.0,38.537268,30.192217,14.046755,12.781852,0.0,18.0,13.0,5.0,26.618419,25.991474,17.554929,11.151483,0.0,0.0,0.0,0.0,1.395206,0.0,0.0,0.0,100.189460,0.62,0.0,2.0,1.0,1.0,4.0,0.0,2.0,1.0,1.0,4.0,0.0,925.479336,862.918966,5335.309591,903.631109,2807.562801,6.0,15.0,10.0,45.189924,5.666667,30.666667,1.0,1.0,90.0,6.0,34.0,0.0,10.873090,300.0,1.0,31.0,459.685367,389.746759,12.385392,1.0,0.0,488.890602,52.798620,-3.488006,214.0,18.230235,32.096278,95.159679,-0.468083,1.0,22.244598,1.0,0.0,0.0,1.0,0.0,0.0,0.0,2.0,0.0,1.0,2.0,1.0,1.0,6.0,6.0,6.0,6.0,5.0,6.0,54.0,58.0,6838.120604,2.0,3.0,1.0,31.0,35.0,10455.138655,5.0,965.525841,22.0,102.0,123.503082,0.0,981.871875,681.0,25602.701961,26631.054684,34.0,38.386816,77.138600,145.675786,19.0,21.0,534.322561,182.0,0.0,0.0,496.198616,182.0,0.0,0.0,37.0,0.0,28.0,0.0,558.439963,891.069074,1.0,1.0,0.0,2.0,2.0,1.0,2,2,0
6129,1032223762,1.0,1.0,0.0,0.0,0.0,0.0,0.0,-13.849651,-30.745913,0.0,0.0,0.0,-30.929532,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.071182,0.0,7.475321,1.729710,0.0,20.192864,0.0,0.0,0.0,0.0,0.0,0.0,-35.113922,-26.468310,20.624682,-33.640450,-55.017116,7.039664,0.0,0.0,0.0,0.0,0.0,0.0,42.879012,5.955247,-15.735413,-24.568588,17.029679,-7.751248,6.966353,42.971533,16.784087,-1.750429,-56.664730,-6.205868,61.880914,86.227769,64.651399,98.762225,195.410012,20.757646,65.965121,132.370207,39.610096,95.681714,136.131431,186.615240,25.0,0.0,367.957624,339.317118,346.767334,4.0,0.0,0.419355,-0.234065,36.637282,10.805235,-1.380854,48.355071,91.203668,2.934822,76.605510,0.197777,0.201392,1.132425,-0.486622,-0.574123,8.796199,-73.768154,-31.324379,-72.008921,12.362885,0.198739,-0.103880,2.914036,45.926514,2.572479,-0.472935,1.070927,8.166498,41.771970,0.749824,0.082333,0.117280,4.243534,41.616765,4.316744,0.000000,0.000000,0.926558,11.859166,1.875000,8.0,12.0,0.0,0.0,0.0,0.0,14.0,36.0,1.0,5.0,1.0,4.0,83.705663,20.063404,-10.568104,22.320054,1.0,24.0,7.0,17.0,52.841483,74.810719,18.188891,20.604147,0.0,0.0,0.0,3.0,-6.193380,0.0,0.0,0.0,122.492447,0.53,1.0,1.0,0.0,0.0,2.0,2.0,0.0,0.0,0.0,2.0,0.0,67.423361,92.627011,537.567158,-37.280223,141.505521,2.0,7.0,4.0,28.279318,2.333333,25.000000,0.0,0.0,23.0,3.0,45.0,0.0,-28.954565,0.0,0.0,31.0,3610.577708,3623.299902,1582.697771,1.0,0.0,3598.646352,190.119208,-6.316747,214.0,204.204721,230.135871,220.672583,207.089619,0.0,381.830485,535.0,1.0,0.0,2.0,0.0,0.0,0.0,1.0,0.0,0.0,1.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,17.0,29.0,-37.732010,2.0,2.0,2.0,13.0,25.0,-36.309658,1.0,108.780349,0.0,44.0,40.632235,0.0,-15.142559,0.0,20.469993,119.402192,0.0,63.201818,63.717064,233.537834,32.0,26.0,105.544543,0.0,0.0,0.0,25.463984,0.0,0.0,0.0,0.0,0.0,1.0,0.0,69.275919,53.229676,0.0,0.0,0.0,1.0,2.0,2.0,2,2,0


In [35]:
prediction_data[['ID', 'TARGET']].to_csv('Sergunin.csv', sep=',', index=False)

Для решения задачи использовал RandomForest поскольку она дала лучшие результаты на первичном прогоне среди других моделек классификации. 
Использовал 2-а варианта классификации.
1) стандартна сегментировал по 3-м категориям.
2) Нашел с начала 2-ю категорию поскольку она сильнее всего отличается от остальных,
    После чего уже между остальными искал 0 и 1. 
    Вариант оказался не лучше рандома. Его я оставил хорошенько обрезов, в качестве сравнения! 

LogisticRegression выдал хороший результат однако перенести его не получилось

