## Тестовое задание
Разработать модель, предсказывающую к какому из трех сегментов относится каждый клиент.

In [442]:
import pandas as pd
import numpy as np
from sklearn.model_selection import cross_val_score
from sklearn.linear_model import LogisticRegression
from sklearn.model_selection import train_test_split
from sklearn.ensemble import RandomForestClassifier
import xgboost
from sklearn.metrics import f1_score

In [443]:
db_train = pd.read_csv('contest_train.csv')

## Очистка данных
При визуальном осмотре были обнаружены пропуски в признаках. Это создаст проблему при машинном обучении. Существует множество способов борьбы с этой проблемой, вот основные:
+ замена пропуска медианой
+ замена на нулевое значением
+ замена среднем значение
+ удаление строк с пропусками.

Решение "в лоб" db_train.dropna(inplace = True) - из более 18000 строк осталось около 300.
Возникла гипотеза, что есть столбцы состоящие из пропусков.
Столбцов и сток только из пропусков также не оказалось и метод:
db_train.dropna(how='all', inplace = True) не принес результата.


In [444]:
# найдем признаки, где отсутствует порядка 10% данных от общего числа
s = []
for i in range(len(db_train.columns)):
  nan_rows = db_train[db_train.iloc[:,i].isnull()]
  if len(nan_rows)>1800:
    s.append(i) # формируем список признаков
# всего 16 признаков из 262. Предпологаю, что  их удаление не сильно снизит качество предсказания   
db_train = db_train.drop(db_train.columns[s], axis=1)  # удаляем признаки с большим количество пропусков
db_train.dropna(inplace = True) # а теперь вновь удаляем строки с пропусками - и в результате более 15000 строк - уже можно с этим работать

In [445]:
db_train.describe() # при анализе даннх - видны явные выбросы. Снова встает дилема: удалять или заменять.

Unnamed: 0,ID,FEATURE_0,FEATURE_1,FEATURE_2,FEATURE_3,FEATURE_4,FEATURE_5,FEATURE_6,FEATURE_7,FEATURE_8,FEATURE_9,FEATURE_10,FEATURE_11,FEATURE_12,FEATURE_13,FEATURE_14,FEATURE_15,FEATURE_16,FEATURE_17,FEATURE_18,FEATURE_19,FEATURE_20,FEATURE_21,FEATURE_22,FEATURE_23,FEATURE_24,FEATURE_25,FEATURE_26,FEATURE_27,FEATURE_28,FEATURE_29,FEATURE_30,FEATURE_31,FEATURE_32,FEATURE_33,FEATURE_34,FEATURE_35,FEATURE_36,FEATURE_37,FEATURE_38,...,FEATURE_221,FEATURE_222,FEATURE_223,FEATURE_224,FEATURE_225,FEATURE_226,FEATURE_227,FEATURE_228,FEATURE_229,FEATURE_230,FEATURE_231,FEATURE_232,FEATURE_233,FEATURE_234,FEATURE_235,FEATURE_236,FEATURE_237,FEATURE_238,FEATURE_239,FEATURE_240,FEATURE_241,FEATURE_242,FEATURE_243,FEATURE_244,FEATURE_245,FEATURE_246,FEATURE_247,FEATURE_248,FEATURE_249,FEATURE_250,FEATURE_251,FEATURE_252,FEATURE_253,FEATURE_254,FEATURE_255,FEATURE_256,FEATURE_257,FEATURE_258,FEATURE_259,TARGET
count,15545.0,15545.0,15545.0,15545.0,15545.0,15545.0,15545.0,15545.0,15545.0,15545.0,15545.0,15545.0,15545.0,15545.0,15545.0,15545.0,15545.0,15545.0,15545.0,15545.0,15545.0,15545.0,15545.0,15545.0,15545.0,15545.0,15545.0,15545.0,15545.0,15545.0,15545.0,15545.0,15545.0,15545.0,15545.0,15545.0,15545.0,15545.0,15545.0,15545.0,...,15545.0,15545.0,15545.0,15545.0,15545.0,15545.0,15545.0,15545.0,15545.0,15545.0,15545.0,15545.0,15545.0,15545.0,15545.0,15545.0,15545.0,15545.0,15545.0,15545.0,15545.0,15545.0,15545.0,15545.0,15545.0,15545.0,15545.0,15545.0,15545.0,15545.0,15545.0,15545.0,15545.0,15545.0,15545.0,15545.0,15545.0,15545.0,15545.0,15545.0
mean,1361680000.0,5.4422,1.352525,0.035574,0.0,0.123963,0.000129,0.012673,51.742691,280.094999,0.284014,0.19138,0.175426,245.162178,4.67964,2.125185,0.125893,0.121711,0.127115,0.117208,0.125378,-0.085314,5.982867,-0.406349,2.984813,5.54772,1.607044,3.775393,0.460663,0.801608,0.381409,0.363654,0.112062,0.075523,408.301953,490.417266,540.061726,650.398835,745.393028,875.717819,...,78.840849,132.620778,761.721491,1.421936,480.151488,21.884014,167.216919,189.14432,0.183725,756.830358,12.722612,1222.337158,1979.103672,5.934062,300.837867,304.680753,605.314956,96.650113,105.90119,363.809971,8.599614,4.460341,13.671856,211.013886,8.599614,2.623931,6.767256,12.202316,0.0,37.378128,27.523705,298.566467,384.444958,0.627855,0.650177,0.0,1.140431,1.180058,1.199421,0.349437
std,1970435000.0,14.800453,1.80823,0.185232,0.0,0.32955,0.011342,0.111862,182.960535,1206.178252,0.714307,0.459338,0.380343,462.079326,15.951126,2.176561,0.331739,0.326962,0.333112,0.321678,0.331158,5.964216,42.8555,28.596834,52.030435,49.668273,28.096941,28.977695,18.851743,43.06621,14.097958,13.799594,6.134708,5.41517,2077.421609,2428.779248,2479.591192,2951.307924,3205.516656,3735.53294,...,95.134557,147.237821,2116.339029,11.538289,355.078227,52.486509,139.60758,176.281407,6.179097,3033.838935,151.959798,3686.112613,5247.454518,52.086388,330.703076,305.193183,567.39628,138.198305,127.311569,253.918741,47.471568,29.508146,51.443309,211.477604,47.471568,20.571451,36.408134,33.548324,0.0,86.978063,78.317243,215.630888,278.334347,0.483392,0.476929,0.0,0.375734,0.401283,0.41397,0.583806
min,14283350.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,-171.709797,-297.065679,0.0,0.0,0.0,-286.437088,0.0,0.0,0.0,0.0,0.0,0.0,0.0,-100.0,-117.669869,-100.0,-123.305025,-122.61734,0.0,-102.269744,0.0,0.0,0.0,0.0,0.0,0.0,-373.307474,-319.650659,-310.390755,-411.320897,-302.335958,-313.106083,...,0.0,1.0,-338.858445,-15.0,-177.446176,0.0,0.0,-88.376898,0.0,-346.109731,0.0,-334.960321,-383.840006,0.0,-172.378673,-141.991962,-287.261172,0.0,0.0,-159.862141,0.0,-254.0,-201.0,-206.148109,0.0,0.0,0.0,0.0,0.0,-501.0,-351.0,-1031.287798,-328.361649,0.0,0.0,0.0,1.0,0.0,1.0,0.0
25%,444213300.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,-10.200535,-20.751417,0.0,0.0,0.0,0.887432,1.0,1.0,0.0,0.0,0.0,0.0,0.0,0.0,-2.810392,0.0,-9.478894,-6.843207,0.0,-12.759237,0.0,0.0,0.0,0.0,0.0,0.0,-18.702311,-18.105804,-18.528612,-17.939766,-16.589247,-16.206009,...,24.0,43.0,-0.910508,0.0,232.427262,1.0,71.0,74.400357,0.0,-15.673393,0.0,-1.476681,-0.879196,0.0,89.275648,107.534136,216.916873,15.0,26.0,185.328133,0.0,0.0,0.0,12.909128,0.0,0.0,0.0,0.0,0.0,0.0,0.0,143.273495,192.42476,0.0,0.0,0.0,1.0,1.0,1.0,0.0
50%,736634000.0,1.0,1.0,0.0,0.0,0.0,0.0,0.0,2.912285,4.353363,0.0,0.0,0.0,48.876553,2.0,2.0,0.0,0.0,0.0,0.0,0.0,0.0,0.124504,0.0,-0.048105,0.283593,0.0,2.428991,0.0,0.0,0.0,0.0,0.0,0.0,6.960158,7.619277,7.642783,8.190045,9.549496,10.802713,...,50.0,88.0,61.426337,1.0,398.956079,5.0,133.0,143.84727,0.0,11.505955,0.0,62.185968,76.824515,0.0,203.882863,219.712675,451.594905,48.0,65.0,314.520483,0.0,0.0,0.0,201.664162,0.0,0.0,0.0,2.0,0.0,6.0,0.0,268.670175,326.449115,1.0,1.0,0.0,1.0,1.0,1.0,0.0
75%,1151357000.0,5.0,2.0,0.0,0.0,0.0,0.0,0.0,23.312298,37.923357,0.0,0.0,0.0,256.124515,4.0,3.0,0.0,0.0,0.0,0.0,0.0,0.0,3.592853,0.0,8.580529,10.206057,0.0,18.505446,0.0,0.0,0.0,0.0,0.0,0.0,49.966804,53.294021,54.187706,57.916222,63.750869,70.947739,...,100.0,166.0,602.892262,1.0,642.234872,21.0,223.0,246.848417,0.0,77.089663,0.0,964.176427,1765.449883,0.0,397.593048,405.449013,808.532446,124.0,139.0,479.192941,0.0,0.0,0.0,324.862586,0.0,0.0,0.0,11.0,0.0,39.0,8.0,404.224255,513.063813,1.0,1.0,0.0,1.0,1.0,1.0,1.0
max,10438920000.0,385.0,31.0,1.0,0.0,1.0,1.0,1.0,2867.462158,9963.242114,8.0,5.0,1.0,6141.670997,662.0,24.0,1.0,1.0,1.0,1.0,1.0,225.0,308.548398,300.0,324.177493,302.869382,1907.1,195.475088,1360.0,3549.0,782.0,882.0,496.0,621.0,51330.792972,52173.418138,49779.214848,64514.215545,66879.655275,78246.936112,...,2738.0,2695.0,31953.258044,1025.0,2862.254343,1733.0,1781.0,2257.063365,420.0,63776.240595,5151.0,61529.804721,76958.254875,1819.0,5989.690593,5432.067733,6285.268214,2523.0,2651.0,2230.906533,1601.0,820.0,1633.0,1576.4832,1601.0,792.0,1186.0,1483.0,0.0,1469.0,1633.0,1831.485429,2552.595218,1.0,1.0,0.0,3.0,3.0,3.0,2.0


In [446]:
from scipy import stats
for i in range(len(db_train.columns)-1):
  # db_train = db_train[np.abs(db_train.iloc[:,i]-db_train.iloc[:,i].mean()) <= (3*db_train.iloc[:,i].std())]
  db_train.iloc[:,i] = db_train.iloc[:,i].mask(db_train.iloc[:,i] > 3*db_train.iloc[:,i].std(), db_train.iloc[:,i].median())
 # при удалении строк с выбросами более 3-х сигм - у нас опять теряются все данныеЖ в остатке около 3000 строк. 
 # заменяю выбросы на медианное значение. 

In [447]:
db_train.describe() # так значительно лучше

Unnamed: 0,ID,FEATURE_0,FEATURE_1,FEATURE_2,FEATURE_3,FEATURE_4,FEATURE_5,FEATURE_6,FEATURE_7,FEATURE_8,FEATURE_9,FEATURE_10,FEATURE_11,FEATURE_12,FEATURE_13,FEATURE_14,FEATURE_15,FEATURE_16,FEATURE_17,FEATURE_18,FEATURE_19,FEATURE_20,FEATURE_21,FEATURE_22,FEATURE_23,FEATURE_24,FEATURE_25,FEATURE_26,FEATURE_27,FEATURE_28,FEATURE_29,FEATURE_30,FEATURE_31,FEATURE_32,FEATURE_33,FEATURE_34,FEATURE_35,FEATURE_36,FEATURE_37,FEATURE_38,...,FEATURE_221,FEATURE_222,FEATURE_223,FEATURE_224,FEATURE_225,FEATURE_226,FEATURE_227,FEATURE_228,FEATURE_229,FEATURE_230,FEATURE_231,FEATURE_232,FEATURE_233,FEATURE_234,FEATURE_235,FEATURE_236,FEATURE_237,FEATURE_238,FEATURE_239,FEATURE_240,FEATURE_241,FEATURE_242,FEATURE_243,FEATURE_244,FEATURE_245,FEATURE_246,FEATURE_247,FEATURE_248,FEATURE_249,FEATURE_250,FEATURE_251,FEATURE_252,FEATURE_253,FEATURE_254,FEATURE_255,FEATURE_256,FEATURE_257,FEATURE_258,FEATURE_259,TARGET
count,15545.0,15545.0,15545.0,15545.0,15545.0,15545.0,15545.0,15545.0,15545.0,15545.0,15545.0,15545.0,15545.0,15545.0,15545.0,15545.0,15545.0,15545.0,15545.0,15545.0,15545.0,15545.0,15545.0,15545.0,15545.0,15545.0,15545.0,15545.0,15545.0,15545.0,15545.0,15545.0,15545.0,15545.0,15545.0,15545.0,15545.0,15545.0,15545.0,15545.0,...,15545.0,15545.0,15545.0,15545.0,15545.0,15545.0,15545.0,15545.0,15545.0,15545.0,15545.0,15545.0,15545.0,15545.0,15545.0,15545.0,15545.0,15545.0,15545.0,15545.0,15545.0,15545.0,15545.0,15545.0,15545.0,15545.0,15545.0,15545.0,15545.0,15545.0,15545.0,15545.0,15545.0,15545.0,15545.0,15545.0,15545.0,15545.0,15545.0,15545.0
mean,955394700.0,3.857446,1.121325,0.0,0.0,0.0,0.0,0.0,23.234335,83.326659,0.198392,0.14294,0.175426,171.320344,3.496687,1.817112,0.0,0.0,0.0,0.0,0.0,-0.177305,-0.355249,-3.006446,-2.316206,0.013128,0.375259,2.383838,0.016468,0.041235,0.002638,0.005082,0.001608,0.002252,192.718247,238.054719,269.459693,323.745116,368.248752,449.758768,...,65.563139,110.016726,493.836606,1.153619,411.412876,15.579093,143.966806,161.361766,0.004503,397.251622,3.230106,801.726944,1276.600824,1.798456,250.441931,258.041252,511.013258,77.746285,87.077131,310.980958,3.948344,1.22496,6.830878,184.408971,3.948344,0.590994,2.895143,8.755741,0.0,24.814152,16.820907,258.78226,329.774663,0.627855,0.650177,0.0,1.0,0.999936,1.0,0.349437
std,1025662000.0,7.046114,1.305083,0.0,0.0,0.0,0.0,0.0,88.971302,408.824802,0.527923,0.350023,0.380343,307.717864,5.898797,1.582177,0.0,0.0,0.0,0.0,0.0,4.181289,25.689304,17.527452,40.828784,37.720335,3.391933,26.106011,0.799663,1.8992,0.173311,0.332704,0.147234,0.1722,727.337685,869.823305,950.461947,1111.158777,1194.857344,1431.430244,...,57.427803,89.885503,995.630408,1.755719,245.486983,25.643638,93.37609,113.522209,0.182142,1219.761859,25.643218,1632.467981,2495.181304,10.74417,214.537312,199.534572,378.704284,86.637176,81.346746,171.004106,16.333975,8.286829,23.423772,166.455998,16.333975,4.537483,11.289006,16.372988,0.0,43.958257,43.084972,151.872532,190.861787,0.483392,0.476929,0.0,0.0,0.008021,0.0,0.583806
min,14283350.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,-171.709797,-297.065679,0.0,0.0,0.0,-286.437088,0.0,0.0,0.0,0.0,0.0,0.0,0.0,-100.0,-117.669869,-100.0,-123.305025,-122.61734,0.0,-102.269744,0.0,0.0,0.0,0.0,0.0,0.0,-373.307474,-319.650659,-310.390755,-411.320897,-302.335958,-313.106083,...,0.0,1.0,-338.858445,-15.0,-177.446176,0.0,0.0,-88.376898,0.0,-346.109731,0.0,-334.960321,-383.840006,0.0,-172.378673,-141.991962,-287.261172,0.0,0.0,-159.862141,0.0,-254.0,-201.0,-206.148109,0.0,0.0,0.0,0.0,0.0,-501.0,-351.0,-1031.287798,-328.361649,0.0,0.0,0.0,1.0,0.0,1.0,0.0
25%,444213300.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,-10.200535,-20.751417,0.0,0.0,0.0,0.887432,1.0,1.0,0.0,0.0,0.0,0.0,0.0,0.0,-2.810392,0.0,-9.478894,-6.843207,0.0,-12.759237,0.0,0.0,0.0,0.0,0.0,0.0,-18.702311,-18.105804,-18.528612,-17.939766,-16.589247,-16.206009,...,24.0,43.0,-0.910508,0.0,232.427262,1.0,71.0,74.400357,0.0,-15.673393,0.0,-1.476681,-0.879196,0.0,89.275648,107.534136,216.916873,15.0,26.0,185.328133,0.0,0.0,0.0,12.909128,0.0,0.0,0.0,0.0,0.0,0.0,0.0,143.273495,192.42476,0.0,0.0,0.0,1.0,1.0,1.0,0.0
50%,736634000.0,1.0,1.0,0.0,0.0,0.0,0.0,0.0,2.912285,4.353363,0.0,0.0,0.0,48.876553,2.0,2.0,0.0,0.0,0.0,0.0,0.0,0.0,0.124504,0.0,-0.048105,0.283593,0.0,2.428991,0.0,0.0,0.0,0.0,0.0,0.0,6.960158,7.619277,7.642783,8.190045,9.549496,10.802713,...,50.0,88.0,61.426337,1.0,398.956079,5.0,133.0,143.84727,0.0,11.505955,0.0,62.185968,76.824515,0.0,203.882863,219.712675,451.594905,48.0,65.0,314.520483,0.0,0.0,0.0,201.664162,0.0,0.0,0.0,2.0,0.0,6.0,0.0,268.670175,326.449115,1.0,1.0,0.0,1.0,1.0,1.0,0.0
75%,1144014000.0,4.0,2.0,0.0,0.0,0.0,0.0,0.0,18.935922,31.184635,0.0,0.0,0.0,174.208832,4.0,3.0,0.0,0.0,0.0,0.0,0.0,0.0,2.793472,0.0,6.385348,7.394142,0.0,17.423178,0.0,0.0,0.0,0.0,0.0,0.0,44.909377,48.429443,47.564634,50.92828,55.750778,61.930853,...,90.0,150.0,508.75382,1.0,558.081816,18.0,199.0,221.410441,0.0,64.697246,0.0,809.086719,1470.887796,0.0,355.596649,362.129493,717.913172,109.0,122.0,414.036594,0.0,0.0,0.0,298.519561,0.0,0.0,0.0,9.0,0.0,32.0,4.0,354.654796,445.791241,1.0,1.0,0.0,1.0,1.0,1.0,1.0
max,5443476000.0,44.0,5.0,0.0,0.0,0.0,0.0,0.0,548.469074,3614.259228,2.0,1.0,1.0,1386.182461,47.0,6.0,0.0,0.0,0.0,0.0,0.0,0.0,128.439264,77.3,155.960609,148.693248,79.8,86.826389,54.0,128.0,16.0,37.0,16.0,16.0,6162.677558,7276.886828,7413.934401,8838.585411,9540.039348,11154.472627,...,285.0,441.0,6345.066456,34.0,1064.582003,157.0,418.0,528.305187,12.0,9090.329384,451.0,10993.905346,15630.647647,154.0,990.616801,913.889207,1700.223002,414.0,381.0,761.706499,142.0,88.0,154.0,634.400072,142.0,61.0,109.0,100.0,0.0,260.0,234.0,646.646135,834.700347,1.0,1.0,0.0,1.0,1.0,1.0,2.0


In [448]:
# Делаю разбивку на признаки и значение
X = db_train.iloc[:, 1:-1]
y = db_train.iloc[:, -1]

In [449]:
X_train, X_test, y_train, y_test = train_test_split(X, y, test_size=0.3, random_state=0)
# на тестовый датасет - 30% и 70% на обучение

## Обучение

Для обучения мною были выбраны 3 модели и они показали следующие результаты на тестовом датасете:

+ LogisticRegression:     0.6858276163614027
+ XGBClassifier:          0.7077615780445969
+ RandomForestClassifier: 0.6983080260303688

XGBClassifier показал лучший результат и он будет использоваться в предсказании.


In [450]:
# model_LR = LogisticRegression(random_state=0, max_iter=1000)
# model_LR.fit(X_train, y_train)
# model_RF = RandomForestClassifier(n_estimators=400,max_depth=10 ,  random_state=0)
# model_RF.fit(X_train, y_train)
model_XGB = xgboost.XGBClassifier(max_depth=5, n_estimators=250, random_state=0)
model_XGB.fit(X_train, y_train)


XGBClassifier(base_score=0.5, booster='gbtree', colsample_bylevel=1,
              colsample_bynode=1, colsample_bytree=1, gamma=0,
              learning_rate=0.1, max_delta_step=0, max_depth=5,
              min_child_weight=1, missing=None, n_estimators=250, n_jobs=1,
              nthread=None, objective='multi:softprob', random_state=0,
              reg_alpha=0, reg_lambda=1, scale_pos_weight=1, seed=None,
              silent=None, subsample=1, verbosity=1)

In [451]:
# print ('model_LR accuracy: TRAINING', model_LR.score(X_train,y_train))
# print ('model_LR accuracy: TESTING', model_LR.score(X_test,y_test))

print ('model_XGB accuracy: TRAINING', model_XGB.score(X_train,y_train)) # точность на обучаемом наборе
print ('model_XGB accuracy: TESTING', model_XGB.score(X_test,y_test)) # точность на тестовом наборе

# print ('model_RF accuracy: TRAINING', model_RF.score(X_train,y_train))
# print ('model_RF accuracy: TESTING', model_RF.score(X_test,y_test))

model_XGB accuracy: TRAINING 0.9178384339674662
model_XGB accuracy: TESTING 0.7077615780445969


In [452]:
# a = model_LR  .predict(X_test) # предсказание
b = model_XGB .predict(X_test) # предсказание
# c = model_RF  .predict(X_test) # предсказание

In [453]:
f1 = f1_score(y_test, b, average='macro')
f1 # показатель f1_macro достигает 0.4309747515624632

0.4309747515624632

## Использование модели

In [463]:
db_test = pd.read_csv('contest_test.csv')
db_test

Unnamed: 0,ID,FEATURE_0,FEATURE_1,FEATURE_2,FEATURE_3,FEATURE_4,FEATURE_5,FEATURE_6,FEATURE_7,FEATURE_8,FEATURE_9,FEATURE_10,FEATURE_11,FEATURE_12,FEATURE_13,FEATURE_14,FEATURE_15,FEATURE_16,FEATURE_17,FEATURE_18,FEATURE_19,FEATURE_20,FEATURE_21,FEATURE_22,FEATURE_23,FEATURE_24,FEATURE_25,FEATURE_26,FEATURE_27,FEATURE_28,FEATURE_29,FEATURE_30,FEATURE_31,FEATURE_32,FEATURE_33,FEATURE_34,FEATURE_35,FEATURE_36,FEATURE_37,FEATURE_38,...,FEATURE_220,FEATURE_221,FEATURE_222,FEATURE_223,FEATURE_224,FEATURE_225,FEATURE_226,FEATURE_227,FEATURE_228,FEATURE_229,FEATURE_230,FEATURE_231,FEATURE_232,FEATURE_233,FEATURE_234,FEATURE_235,FEATURE_236,FEATURE_237,FEATURE_238,FEATURE_239,FEATURE_240,FEATURE_241,FEATURE_242,FEATURE_243,FEATURE_244,FEATURE_245,FEATURE_246,FEATURE_247,FEATURE_248,FEATURE_249,FEATURE_250,FEATURE_251,FEATURE_252,FEATURE_253,FEATURE_254,FEATURE_255,FEATURE_256,FEATURE_257,FEATURE_258,FEATURE_259
0,84728433,0.0,0.0,0.0,0.0,0.0,0.0,0.0,35.265322,110.813724,0.0,0.0,1.0,154.726900,1.0,1.0,1.0,0.0,1.0,0.0,0.0,0.0,-13.857818,0.0,61.287620,30.607010,0.0,35.720469,0.0,0.0,0.0,0.0,0.0,0.0,-13.363083,27.042315,121.164445,410.777662,418.184730,288.779844,...,1.0,124.0,135.0,112.181055,1.0,296.639806,19.0,214.0,222.735520,0.0,424.661172,0.0,279.008933,722.177313,0.0,115.394387,269.617846,473.058974,40.0,142.0,293.151579,0.0,0.0,0.0,417.527713,0.0,0.0,0.0,23.0,0.0,0.0,0.0,436.883761,427.497873,0.0,1.0,0.0,1.0,1.0,1.0
1,335016156,0.0,0.0,0.0,0.0,0.0,0.0,0.0,51.316538,14.427879,0.0,0.0,0.0,-62.499009,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,-0.680881,0.0,-2.500994,-0.107472,0.0,4.270219,0.0,0.0,0.0,0.0,0.0,0.0,-47.851014,-98.661813,78.362264,54.625172,-79.215239,95.849191,...,1.0,28.0,68.0,-42.026531,0.0,101.255562,20.0,170.0,199.235015,0.0,28.621848,0.0,1.191704,-109.873571,0.0,117.895003,134.120558,250.132950,8.0,35.0,101.391468,0.0,0.0,0.0,-22.853844,0.0,0.0,0.0,24.0,0.0,0.0,0.0,53.400612,58.863404,1.0,1.0,0.0,1.0,1.0,1.0
2,1163752045,16.0,4.0,0.0,0.0,0.0,0.0,1.0,457.087009,1172.556882,2.0,1.0,1.0,-17.145833,2.0,1.0,0.0,0.0,1.0,0.0,1.0,0.0,-11.121329,0.0,31.933272,27.815267,0.0,5.226699,0.0,0.0,0.0,0.0,0.0,0.0,-5.404270,-6.110141,5.470973,-0.292772,-39.593356,-14.416002,...,1.0,60.0,85.0,108.249053,1.0,779.800830,15.0,192.0,202.892636,0.0,-4.097048,0.0,377.752387,406.987244,0.0,217.211243,288.221858,484.982885,90.0,150.0,642.775704,0.0,0.0,148.0,-22.495845,0.0,0.0,3.0,22.0,0.0,131.0,151.0,217.419547,490.478263,0.0,1.0,0.0,1.0,1.0,1.0
3,84929758,2.0,1.0,0.0,0.0,0.0,0.0,0.0,29.672314,58.511494,0.0,0.0,0.0,622.518469,4.0,3.0,0.0,1.0,0.0,0.0,0.0,0.0,36.869571,0.0,-69.482265,-34.027897,0.0,19.560967,0.0,0.0,0.0,0.0,0.0,0.0,33.413963,65.485910,0.717419,57.305712,107.853108,21.628844,...,1.0,30.0,65.0,107.464263,2.0,699.030798,0.0,53.0,-17.804364,0.0,79.934708,0.0,246.160625,385.577306,0.0,173.509277,184.734672,288.500644,42.0,92.0,676.458077,0.0,0.0,0.0,707.880192,0.0,0.0,0.0,0.0,0.0,0.0,0.0,657.453708,692.191717,1.0,1.0,0.0,2.0,1.0,1.0
4,143090121,2.0,1.0,0.0,0.0,1.0,0.0,0.0,522.148408,2724.481080,1.0,1.0,0.0,1414.701598,18.0,12.0,0.0,0.0,0.0,0.0,0.0,0.0,0.880909,0.0,-13.217971,-7.161856,0.0,-9.039134,0.0,0.0,0.0,0.0,0.0,0.0,81.712738,-0.636251,44.802230,-23.565437,-7.226179,-1.940260,...,2.0,83.0,101.0,309.469092,2.0,2229.095009,63.0,383.0,452.270935,0.0,-34.062830,0.0,2371.187143,2338.368691,0.0,542.545439,295.343217,785.043493,432.0,198.0,1595.408553,0.0,0.0,22.0,554.123915,0.0,0.0,167.0,173.0,0.0,50.0,188.0,1253.064074,1670.514312,0.0,1.0,0.0,1.0,1.0,2.0
...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...
6126,87822820,2.0,2.0,1.0,0.0,0.0,0.0,0.0,910.007459,18.527934,1.0,1.0,1.0,904.478054,21.0,5.0,0.0,0.0,0.0,0.0,0.0,0.0,6.389485,0.0,-26.430280,-31.202674,0.0,-27.496064,0.0,0.0,0.0,0.0,0.0,0.0,60.417864,25.455629,49.752596,-28.489181,-44.424639,66.527319,...,1.0,514.0,761.0,4796.591627,0.0,758.796116,85.0,461.0,542.174723,0.0,-17.252687,0.0,10622.082164,10663.451665,0.0,1011.922173,719.412424,1786.692185,53.0,97.0,510.401903,0.0,0.0,320.0,311.966284,0.0,0.0,0.0,0.0,0.0,5.0,320.0,373.468995,652.534787,1.0,1.0,0.0,1.0,1.0,1.0
6127,648084027,1.0,1.0,0.0,0.0,0.0,0.0,0.0,0.461696,-25.037712,0.0,0.0,0.0,69.340378,2.0,1.0,0.0,1.0,0.0,0.0,0.0,0.0,5.100396,0.0,-4.143027,-5.297602,0.0,19.522508,0.0,0.0,0.0,0.0,0.0,0.0,522.094875,603.430330,2666.679896,67.087204,-54.191881,33.704608,...,1.0,35.0,148.0,54.768166,1.0,353.919081,42.0,250.0,291.210439,0.0,39.118080,0.0,-46.264096,-47.716517,0.0,334.452197,484.894106,875.451403,76.0,218.0,502.555489,0.0,0.0,0.0,208.077672,0.0,0.0,0.0,6.0,0.0,85.0,0.0,220.856363,347.157907,1.0,1.0,0.0,1.0,1.0,1.0
6128,1402792850,14.0,6.0,1.0,0.0,0.0,0.0,0.0,868.853981,,3.0,1.0,1.0,41.709484,1.0,1.0,0.0,0.0,0.0,0.0,1.0,0.0,203.339456,-100.0,2.506877,-7.133139,2.6,-30.553400,0.0,0.0,0.0,0.0,0.0,0.0,6.625226,29.402575,-39.601529,74.430794,-91.266388,3030.350822,...,1.0,31.0,35.0,10455.138655,5.0,965.525841,22.0,102.0,123.503082,0.0,981.871875,681.0,25602.701961,26631.054684,34.0,38.386816,77.138600,145.675786,19.0,21.0,534.322561,182.0,0.0,0.0,496.198616,182.0,0.0,0.0,37.0,0.0,28.0,0.0,558.439963,891.069074,1.0,1.0,0.0,2.0,2.0,1.0
6129,1032223762,1.0,1.0,0.0,0.0,0.0,0.0,0.0,-13.849651,-30.745913,0.0,0.0,0.0,-30.929532,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.071182,0.0,7.475321,1.729710,0.0,20.192864,0.0,0.0,0.0,0.0,0.0,0.0,-35.113922,-26.468310,20.624682,-33.640450,-55.017116,7.039664,...,2.0,13.0,25.0,-36.309658,1.0,108.780349,0.0,44.0,40.632235,0.0,-15.142559,0.0,20.469993,119.402192,0.0,63.201818,63.717064,233.537834,32.0,26.0,105.544543,0.0,0.0,0.0,25.463984,0.0,0.0,0.0,0.0,0.0,1.0,0.0,69.275919,53.229676,0.0,0.0,0.0,1.0,2.0,2.0


In [464]:
db_test = db_test.drop(db_test.columns[s], axis=1)  # удаляем признаки, которые не участвовали в обучении

In [465]:
x_test = db_test.iloc[:, 1:] # убираем ID из выборки
b = model_XGB .predict(x_test) # получаем серию предсказаний
db_test_new = db_test.assign(TARGET = b) # добавляем к начальному датасету столбец с предсказаниями
db_save = db_test_new[['ID','TARGET']] # оставляем только необходимые столбцы
db_save.to_csv('contest_answer.csv', index=False, header=True) # сохраняем файл без индексов и в требуемом порядке