# GET DATA

In [1]:
import pandas as pd
import numpy as np
from tqdm import tqdm
import copy

In [2]:
train = pd.read_csv('Data/Train.csv')
test = pd.read_csv('Data/Test.csv')
submission = pd.read_csv('Data/SampleSubmission.csv')

# NEW FORMAT

In [3]:
X_train = []
X_train_columns = train.columns
c = 0
for v in train.values:
  info = v[:8]
  binary = v[8:]
  index = [k for k, i in enumerate(binary) if i == 1]
  for i in index:
    c+=1
    for k in range(len(binary)):
      if k == i:
        binary_transformed = list(copy.copy(binary))
        binary_transformed[i] = 0
        X_train.append(list(info) + binary_transformed + [X_train_columns[8+k]] + [c])

X_train = pd.DataFrame(X_train)
X_train.columns = ['ID', 'join_date', 'sex', 'marital_status', 'birth_year', 'branch_code',
       'occupation_code', 'occupation_category_code', 'P5DA', 'RIBP', '8NN1',
       '7POT', '66FJ', 'GYSR', 'SOP4', 'RVSZ', 'PYUQ', 'LJR9', 'N2MW', 'AHXO',
       'BSTQ', 'FM3X', 'K6QO', 'QBOL', 'JWFN', 'JZ9D', 'J9JW', 'GHYX', 'ECY3', 'product_pred', 'ID2']

In [4]:
X_test = []
true_values = []
c = 0
for v in test.values:
  c += 1
  info = v[:8]
  binary = v[8:]
  index = [k for k, i in enumerate(binary) if i == 1]
  X_test.append(list(info) + list(binary) + [c])
  for k in test.columns[8:][index]:
    true_values.append(v[0] + ' X ' + k)

X_test = pd.DataFrame(X_test)
X_test.columns = ['ID', 'join_date', 'sex', 'marital_status', 'birth_year', 'branch_code',
       'occupation_code', 'occupation_category_code', 'P5DA', 'RIBP', '8NN1',
       '7POT', '66FJ', 'GYSR', 'SOP4', 'RVSZ', 'PYUQ', 'LJR9', 'N2MW', 'AHXO',
       'BSTQ', 'FM3X', 'K6QO', 'QBOL', 'JWFN', 'JZ9D', 'J9JW', 'GHYX', 'ECY3', 'ID2']

# TRANSFORM DATA

In [5]:
features_train = []
features_test = []
columns = []

append_features = ['P5DA', 'RIBP', '8NN1', '7POT', '66FJ', 'GYSR', 'SOP4', 'RVSZ', 'PYUQ', 'LJR9', 
'N2MW', 'AHXO','BSTQ', 'FM3X', 'K6QO', 'QBOL', 'JWFN', 'JZ9D', 'J9JW', 'GHYX', 
'ECY3', 'ID', 'ID2', 'join_date', 'sex', 'marital_status', 'branch_code', 'occupation_code', 'occupation_category_code',
'birth_year']
for v in append_features:
  features_train.append(X_train[v].values.reshape(-1, 1))
  features_test.append(X_test[v].values.reshape(-1, 1))
  columns.append(np.array([v]))

y_train = X_train[['product_pred']]

In [6]:
features_train = np.concatenate(features_train, axis=1)
features_test = np.concatenate(features_test, axis=1)
columns = np.concatenate(np.array(columns))

X_train = pd.DataFrame(features_train)
X_train.columns = columns
X_test = pd.DataFrame(features_test)
X_test.columns = columns

# NEW FEATURES

In [7]:
X_train['date1'] = X_train['join_date'].apply(lambda x: int(x.split('/')[0]) if (x == x) else np.nan)
X_train['date2'] = X_train['join_date'].apply(lambda x: int(x.split('/')[1]) if (x == x) else np.nan)
X_train['date3'] = X_train['join_date'].apply(lambda x: int(x.split('/')[2]) if (x == x) else np.nan)
X_train.drop('join_date', axis=1, inplace=True)

X_test['date1'] = X_test['join_date'].apply(lambda x: int(x.split('/')[0]) if (x == x) else np.nan)
X_test['date2'] = X_test['join_date'].apply(lambda x: int(x.split('/')[1]) if (x == x) else np.nan)
X_test['date3'] = X_test['join_date'].apply(lambda x: int(x.split('/')[2]) if (x == x) else np.nan)
X_test.drop('join_date', axis=1, inplace=True)

X_train['date_diff'] = X_train['date3'] - X_train['birth_year']
X_test['date_diff'] = X_test['date3'] - X_test['birth_year']

# CHANGE TYPES

In [8]:
X_train = X_train.fillna(0)
X_test = X_test.fillna(0)
y_train = y_train.fillna(0)

In [9]:
from sklearn.preprocessing import LabelEncoder
le = LabelEncoder()
data = X_train.append(X_test)
for v in ['sex', 'marital_status', 'branch_code', 'occupation_code', 'occupation_category_code',]:
  data.loc[:,v] = le.fit_transform(data.loc[:,v])
X_train = data[:X_train.shape[0]]
X_test = data[-X_test.shape[0]:]

In [10]:
le.fit(y_train.iloc[:,0])
y_train = pd.DataFrame(le.transform(y_train.iloc[:,0]))
y_train.columns = ['target']

# MODEL

In [11]:
from catboost import CatBoostClassifier
model = CatBoostClassifier(iterations=2000)
model.fit(X_train.drop(columns=['ID', 'ID2']), y_train, cat_features=['sex','marital_status','branch_code','occupation_code','occupation_category_code'])

Learning rate set to 0.054559
0:	learn: 2.3045876	total: 4.28s	remaining: 2h 22m 33s
1:	learn: 2.0324586	total: 8.02s	remaining: 2h 13m 32s
2:	learn: 1.8435748	total: 12.8s	remaining: 2h 22m 20s
3:	learn: 1.7121058	total: 15.5s	remaining: 2h 9m 13s
4:	learn: 1.5907272	total: 20.1s	remaining: 2h 13m 47s
5:	learn: 1.4920917	total: 24.4s	remaining: 2h 14m 59s
6:	learn: 1.4109468	total: 28.2s	remaining: 2h 13m 35s
7:	learn: 1.3401344	total: 32.1s	remaining: 2h 13m 18s
8:	learn: 1.2774178	total: 35.8s	remaining: 2h 11m 59s
9:	learn: 1.2227315	total: 39.6s	remaining: 2h 11m 29s
10:	learn: 1.1730231	total: 44s	remaining: 2h 12m 40s
11:	learn: 1.1268018	total: 47.8s	remaining: 2h 11m 57s
12:	learn: 1.0843030	total: 51s	remaining: 2h 9m 53s
13:	learn: 1.0449043	total: 54.3s	remaining: 2h 8m 28s
14:	learn: 1.0106200	total: 57.7s	remaining: 2h 7m 21s
15:	learn: 0.9804906	total: 1m 1s	remaining: 2h 7m 23s
16:	learn: 0.9508918	total: 1m 5s	remaining: 2h 7m 51s
17:	learn: 0.9260242	total: 1m 10s	rem

145:	learn: 0.4404635	total: 11m 9s	remaining: 2h 21m 43s
146:	learn: 0.4400884	total: 11m 14s	remaining: 2h 21m 44s
147:	learn: 0.4399542	total: 11m 20s	remaining: 2h 21m 50s
148:	learn: 0.4392280	total: 11m 24s	remaining: 2h 21m 46s
149:	learn: 0.4384105	total: 11m 28s	remaining: 2h 21m 28s
150:	learn: 0.4375344	total: 11m 32s	remaining: 2h 21m 17s
151:	learn: 0.4366661	total: 11m 36s	remaining: 2h 21m 5s
152:	learn: 0.4360923	total: 11m 40s	remaining: 2h 21m
153:	learn: 0.4356064	total: 11m 46s	remaining: 2h 21m 4s
154:	learn: 0.4352912	total: 11m 51s	remaining: 2h 21m 3s
155:	learn: 0.4349360	total: 11m 56s	remaining: 2h 21m 5s
156:	learn: 0.4346473	total: 12m 1s	remaining: 2h 21m 5s
157:	learn: 0.4341978	total: 12m 6s	remaining: 2h 21m 11s
158:	learn: 0.4338346	total: 12m 11s	remaining: 2h 21m 14s
159:	learn: 0.4334461	total: 12m 15s	remaining: 2h 21m
160:	learn: 0.4327345	total: 12m 20s	remaining: 2h 20m 54s
161:	learn: 0.4323253	total: 12m 24s	remaining: 2h 20m 48s
162:	learn: 0

286:	learn: 0.3889529	total: 21m 7s	remaining: 2h 6m 5s
287:	learn: 0.3887791	total: 21m 11s	remaining: 2h 5m 58s
288:	learn: 0.3886159	total: 21m 15s	remaining: 2h 5m 50s
289:	learn: 0.3884089	total: 21m 19s	remaining: 2h 5m 47s
290:	learn: 0.3883278	total: 21m 24s	remaining: 2h 5m 43s
291:	learn: 0.3880570	total: 21m 28s	remaining: 2h 5m 37s
292:	learn: 0.3877378	total: 21m 32s	remaining: 2h 5m 31s
293:	learn: 0.3875902	total: 21m 37s	remaining: 2h 5m 26s
294:	learn: 0.3874267	total: 21m 42s	remaining: 2h 5m 25s
295:	learn: 0.3872015	total: 21m 45s	remaining: 2h 5m 16s
296:	learn: 0.3867419	total: 21m 49s	remaining: 2h 5m 7s
297:	learn: 0.3866968	total: 21m 54s	remaining: 2h 5m 4s
298:	learn: 0.3866249	total: 21m 58s	remaining: 2h 5m 2s
299:	learn: 0.3862559	total: 22m 2s	remaining: 2h 4m 51s
300:	learn: 0.3859854	total: 22m 5s	remaining: 2h 4m 44s
301:	learn: 0.3857498	total: 22m 10s	remaining: 2h 4m 39s
302:	learn: 0.3856595	total: 22m 14s	remaining: 2h 4m 32s
303:	learn: 0.3855068

429:	learn: 0.3687904	total: 32m 9s	remaining: 1h 57m 24s
430:	learn: 0.3686600	total: 32m 13s	remaining: 1h 57m 20s
431:	learn: 0.3686507	total: 32m 19s	remaining: 1h 57m 18s
432:	learn: 0.3686390	total: 32m 22s	remaining: 1h 57m 10s
433:	learn: 0.3685736	total: 32m 27s	remaining: 1h 57m 6s
434:	learn: 0.3682873	total: 32m 31s	remaining: 1h 57m
435:	learn: 0.3682267	total: 32m 36s	remaining: 1h 56m 57s
436:	learn: 0.3681487	total: 32m 42s	remaining: 1h 56m 57s
437:	learn: 0.3680601	total: 32m 47s	remaining: 1h 56m 56s
438:	learn: 0.3679839	total: 32m 54s	remaining: 1h 56m 59s
439:	learn: 0.3678247	total: 32m 58s	remaining: 1h 56m 53s
440:	learn: 0.3678104	total: 33m 2s	remaining: 1h 56m 49s
441:	learn: 0.3677532	total: 33m 6s	remaining: 1h 56m 41s
442:	learn: 0.3674336	total: 33m 9s	remaining: 1h 56m 32s
443:	learn: 0.3673164	total: 33m 14s	remaining: 1h 56m 29s
444:	learn: 0.3672103	total: 33m 19s	remaining: 1h 56m 26s
445:	learn: 0.3671500	total: 33m 23s	remaining: 1h 56m 21s
446:	l

569:	learn: 0.3557043	total: 43m 15s	remaining: 1h 48m 31s
570:	learn: 0.3556445	total: 43m 20s	remaining: 1h 48m 26s
571:	learn: 0.3555577	total: 43m 24s	remaining: 1h 48m 22s
572:	learn: 0.3554466	total: 43m 28s	remaining: 1h 48m 16s
573:	learn: 0.3553284	total: 43m 32s	remaining: 1h 48m 10s
574:	learn: 0.3552723	total: 43m 36s	remaining: 1h 48m 4s
575:	learn: 0.3552597	total: 43m 41s	remaining: 1h 48m
576:	learn: 0.3552237	total: 43m 46s	remaining: 1h 47m 56s
577:	learn: 0.3551856	total: 43m 51s	remaining: 1h 47m 54s
578:	learn: 0.3551510	total: 43m 57s	remaining: 1h 47m 53s
579:	learn: 0.3550418	total: 44m 3s	remaining: 1h 47m 51s
580:	learn: 0.3549763	total: 44m 8s	remaining: 1h 47m 48s
581:	learn: 0.3548941	total: 44m 14s	remaining: 1h 47m 46s
582:	learn: 0.3546759	total: 44m 19s	remaining: 1h 47m 42s
583:	learn: 0.3546372	total: 44m 23s	remaining: 1h 47m 37s
584:	learn: 0.3544853	total: 44m 27s	remaining: 1h 47m 31s
585:	learn: 0.3544261	total: 44m 31s	remaining: 1h 47m 25s
586:

709:	learn: 0.3464417	total: 53m 36s	remaining: 1h 37m 24s
710:	learn: 0.3463620	total: 53m 40s	remaining: 1h 37m 18s
711:	learn: 0.3463354	total: 53m 44s	remaining: 1h 37m 13s
712:	learn: 0.3462865	total: 53m 49s	remaining: 1h 37m 9s
713:	learn: 0.3462376	total: 53m 54s	remaining: 1h 37m 5s
714:	learn: 0.3462206	total: 53m 59s	remaining: 1h 37m 1s
715:	learn: 0.3461833	total: 54m 5s	remaining: 1h 37m
716:	learn: 0.3461716	total: 54m 9s	remaining: 1h 36m 55s
717:	learn: 0.3461334	total: 54m 14s	remaining: 1h 36m 51s
718:	learn: 0.3460399	total: 54m 18s	remaining: 1h 36m 45s
719:	learn: 0.3459969	total: 54m 23s	remaining: 1h 36m 40s
720:	learn: 0.3459814	total: 54m 27s	remaining: 1h 36m 35s
721:	learn: 0.3458985	total: 54m 31s	remaining: 1h 36m 30s
722:	learn: 0.3458465	total: 54m 37s	remaining: 1h 36m 28s
723:	learn: 0.3458457	total: 54m 41s	remaining: 1h 36m 22s
724:	learn: 0.3458415	total: 54m 45s	remaining: 1h 36m 17s
725:	learn: 0.3457289	total: 54m 49s	remaining: 1h 36m 12s
726:	l

848:	learn: 0.3385730	total: 1h 3m 52s	remaining: 1h 26m 35s
849:	learn: 0.3385371	total: 1h 3m 57s	remaining: 1h 26m 31s
850:	learn: 0.3384527	total: 1h 4m 1s	remaining: 1h 26m 27s
851:	learn: 0.3384219	total: 1h 4m 6s	remaining: 1h 26m 22s
852:	learn: 0.3383827	total: 1h 4m 11s	remaining: 1h 26m 19s
853:	learn: 0.3382915	total: 1h 4m 17s	remaining: 1h 26m 16s
854:	learn: 0.3382591	total: 1h 4m 21s	remaining: 1h 26m 11s
855:	learn: 0.3382152	total: 1h 4m 26s	remaining: 1h 26m 7s
856:	learn: 0.3381608	total: 1h 4m 30s	remaining: 1h 26m 2s
857:	learn: 0.3381276	total: 1h 4m 35s	remaining: 1h 25m 57s
858:	learn: 0.3379995	total: 1h 4m 39s	remaining: 1h 25m 52s
859:	learn: 0.3379283	total: 1h 4m 43s	remaining: 1h 25m 48s
860:	learn: 0.3378728	total: 1h 4m 47s	remaining: 1h 25m 42s
861:	learn: 0.3378229	total: 1h 4m 51s	remaining: 1h 25m 37s
862:	learn: 0.3377694	total: 1h 4m 56s	remaining: 1h 25m 33s
863:	learn: 0.3377417	total: 1h 4m 59s	remaining: 1h 25m 27s
864:	learn: 0.3376673	total:

983:	learn: 0.3314420	total: 1h 14m 13s	remaining: 1h 16m 37s
984:	learn: 0.3313985	total: 1h 14m 16s	remaining: 1h 16m 32s
985:	learn: 0.3313540	total: 1h 14m 20s	remaining: 1h 16m 26s
986:	learn: 0.3313149	total: 1h 14m 24s	remaining: 1h 16m 22s
987:	learn: 0.3313017	total: 1h 14m 29s	remaining: 1h 16m 17s
988:	learn: 0.3312418	total: 1h 14m 33s	remaining: 1h 16m 13s
989:	learn: 0.3312171	total: 1h 14m 38s	remaining: 1h 16m 8s
990:	learn: 0.3311960	total: 1h 14m 43s	remaining: 1h 16m 4s
991:	learn: 0.3311798	total: 1h 14m 48s	remaining: 1h 16m
992:	learn: 0.3311092	total: 1h 14m 52s	remaining: 1h 15m 55s
993:	learn: 0.3310724	total: 1h 14m 57s	remaining: 1h 15m 51s
994:	learn: 0.3310282	total: 1h 15m 1s	remaining: 1h 15m 46s
995:	learn: 0.3309180	total: 1h 15m 5s	remaining: 1h 15m 41s
996:	learn: 0.3308040	total: 1h 15m 9s	remaining: 1h 15m 37s
997:	learn: 0.3307555	total: 1h 15m 13s	remaining: 1h 15m 31s
998:	learn: 0.3307058	total: 1h 15m 17s	remaining: 1h 15m 26s
999:	learn: 0.330

1116:	learn: 0.3241568	total: 1h 24m 31s	remaining: 1h 6m 48s
1117:	learn: 0.3241359	total: 1h 24m 37s	remaining: 1h 6m 45s
1118:	learn: 0.3240932	total: 1h 24m 42s	remaining: 1h 6m 41s
1119:	learn: 0.3240766	total: 1h 24m 47s	remaining: 1h 6m 37s
1120:	learn: 0.3240052	total: 1h 24m 52s	remaining: 1h 6m 32s
1121:	learn: 0.3239573	total: 1h 24m 55s	remaining: 1h 6m 27s
1122:	learn: 0.3239452	total: 1h 25m	remaining: 1h 6m 23s
1123:	learn: 0.3239128	total: 1h 25m 4s	remaining: 1h 6m 18s
1124:	learn: 0.3238578	total: 1h 25m 8s	remaining: 1h 6m 13s
1125:	learn: 0.3238312	total: 1h 25m 13s	remaining: 1h 6m 9s
1126:	learn: 0.3238204	total: 1h 25m 18s	remaining: 1h 6m 4s
1127:	learn: 0.3237466	total: 1h 25m 23s	remaining: 1h 6m
1128:	learn: 0.3236760	total: 1h 25m 27s	remaining: 1h 5m 55s
1129:	learn: 0.3236233	total: 1h 25m 32s	remaining: 1h 5m 51s
1130:	learn: 0.3235870	total: 1h 25m 37s	remaining: 1h 5m 47s
1131:	learn: 0.3235638	total: 1h 25m 42s	remaining: 1h 5m 42s
1132:	learn: 0.32352

1252:	learn: 0.3177233	total: 1h 35m 46s	remaining: 57m 6s
1253:	learn: 0.3176872	total: 1h 35m 52s	remaining: 57m 1s
1254:	learn: 0.3176491	total: 1h 35m 58s	remaining: 56m 58s
1255:	learn: 0.3174372	total: 1h 36m 4s	remaining: 56m 54s
1256:	learn: 0.3174266	total: 1h 36m 11s	remaining: 56m 51s
1257:	learn: 0.3174115	total: 1h 36m 16s	remaining: 56m 46s
1258:	learn: 0.3173929	total: 1h 36m 21s	remaining: 56m 42s
1259:	learn: 0.3173664	total: 1h 36m 26s	remaining: 56m 38s
1260:	learn: 0.3173587	total: 1h 36m 34s	remaining: 56m 35s
1261:	learn: 0.3173264	total: 1h 36m 40s	remaining: 56m 31s
1262:	learn: 0.3172795	total: 1h 36m 44s	remaining: 56m 26s
1263:	learn: 0.3172444	total: 1h 36m 48s	remaining: 56m 22s
1264:	learn: 0.3172058	total: 1h 36m 53s	remaining: 56m 18s
1265:	learn: 0.3171818	total: 1h 36m 58s	remaining: 56m 13s
1266:	learn: 0.3171410	total: 1h 37m 5s	remaining: 56m 10s
1267:	learn: 0.3171240	total: 1h 37m 11s	remaining: 56m 6s
1268:	learn: 0.3170951	total: 1h 37m 17s	rema

1390:	learn: 0.3110809	total: 1h 48m 41s	remaining: 47m 35s
1391:	learn: 0.3110731	total: 1h 48m 45s	remaining: 47m 30s
1392:	learn: 0.3110303	total: 1h 48m 51s	remaining: 47m 25s
1393:	learn: 0.3109809	total: 1h 48m 56s	remaining: 47m 21s
1394:	learn: 0.3109475	total: 1h 49m 1s	remaining: 47m 16s
1395:	learn: 0.3109011	total: 1h 49m 6s	remaining: 47m 12s
1396:	learn: 0.3108588	total: 1h 49m 10s	remaining: 47m 7s
1397:	learn: 0.3107919	total: 1h 49m 14s	remaining: 47m 2s
1398:	learn: 0.3107610	total: 1h 49m 19s	remaining: 46m 58s
1399:	learn: 0.3107532	total: 1h 49m 27s	remaining: 46m 54s
1400:	learn: 0.3107316	total: 1h 49m 33s	remaining: 46m 50s
1401:	learn: 0.3106735	total: 1h 49m 37s	remaining: 46m 45s
1402:	learn: 0.3106295	total: 1h 49m 42s	remaining: 46m 41s
1403:	learn: 0.3106234	total: 1h 49m 47s	remaining: 46m 36s
1404:	learn: 0.3106113	total: 1h 49m 52s	remaining: 46m 32s
1405:	learn: 0.3105606	total: 1h 49m 57s	remaining: 46m 27s
1406:	learn: 0.3104920	total: 1h 50m	remaini

1529:	learn: 0.3052603	total: 2h 29s	remaining: 37m
1530:	learn: 0.3052082	total: 2h 33s	remaining: 36m 55s
1531:	learn: 0.3051554	total: 2h 38s	remaining: 36m 51s
1532:	learn: 0.3051098	total: 2h 42s	remaining: 36m 46s
1533:	learn: 0.3050897	total: 2h 46s	remaining: 36m 41s
1534:	learn: 0.3050338	total: 2h 51s	remaining: 36m 36s
1535:	learn: 0.3050236	total: 2h 56s	remaining: 36m 32s
1536:	learn: 0.3050094	total: 2h 1m 1s	remaining: 36m 27s
1537:	learn: 0.3049793	total: 2h 1m 7s	remaining: 36m 23s
1538:	learn: 0.3049544	total: 2h 1m 13s	remaining: 36m 18s
1539:	learn: 0.3049026	total: 2h 1m 19s	remaining: 36m 14s
1540:	learn: 0.3048387	total: 2h 1m 24s	remaining: 36m 9s
1541:	learn: 0.3047784	total: 2h 1m 28s	remaining: 36m 4s
1542:	learn: 0.3047450	total: 2h 1m 35s	remaining: 36m
1543:	learn: 0.3045777	total: 2h 1m 39s	remaining: 35m 55s
1544:	learn: 0.3045314	total: 2h 1m 44s	remaining: 35m 51s
1545:	learn: 0.3045212	total: 2h 1m 48s	remaining: 35m 46s
1546:	learn: 0.3044305	total: 

1669:	learn: 0.2991494	total: 2h 12m 25s	remaining: 26m 9s
1670:	learn: 0.2991208	total: 2h 12m 29s	remaining: 26m 5s
1671:	learn: 0.2990953	total: 2h 12m 34s	remaining: 26m
1672:	learn: 0.2990472	total: 2h 12m 38s	remaining: 25m 55s
1673:	learn: 0.2990188	total: 2h 12m 43s	remaining: 25m 50s
1674:	learn: 0.2989847	total: 2h 12m 50s	remaining: 25m 46s
1675:	learn: 0.2988869	total: 2h 12m 57s	remaining: 25m 42s
1676:	learn: 0.2988364	total: 2h 13m 2s	remaining: 25m 37s
1677:	learn: 0.2987981	total: 2h 13m 8s	remaining: 25m 32s
1678:	learn: 0.2987569	total: 2h 13m 12s	remaining: 25m 28s
1679:	learn: 0.2986941	total: 2h 13m 17s	remaining: 25m 23s
1680:	learn: 0.2985984	total: 2h 13m 20s	remaining: 25m 18s
1681:	learn: 0.2985363	total: 2h 13m 26s	remaining: 25m 13s
1682:	learn: 0.2985206	total: 2h 13m 33s	remaining: 25m 9s
1683:	learn: 0.2984268	total: 2h 13m 39s	remaining: 25m 4s
1684:	learn: 0.2983511	total: 2h 13m 45s	remaining: 25m
1685:	learn: 0.2983066	total: 2h 13m 50s	remaining: 24

1807:	learn: 0.2926554	total: 2h 24m 37s	remaining: 15m 21s
1808:	learn: 0.2926167	total: 2h 24m 41s	remaining: 15m 16s
1809:	learn: 0.2925756	total: 2h 24m 48s	remaining: 15m 12s
1810:	learn: 0.2925331	total: 2h 24m 54s	remaining: 15m 7s
1811:	learn: 0.2925161	total: 2h 25m	remaining: 15m 2s
1812:	learn: 0.2924868	total: 2h 25m 6s	remaining: 14m 57s
1813:	learn: 0.2924786	total: 2h 25m 10s	remaining: 14m 53s
1814:	learn: 0.2924149	total: 2h 25m 17s	remaining: 14m 48s
1815:	learn: 0.2923393	total: 2h 25m 22s	remaining: 14m 43s
1816:	learn: 0.2922640	total: 2h 25m 27s	remaining: 14m 38s
1817:	learn: 0.2920789	total: 2h 25m 31s	remaining: 14m 34s
1818:	learn: 0.2920437	total: 2h 25m 36s	remaining: 14m 29s
1819:	learn: 0.2920140	total: 2h 25m 41s	remaining: 14m 24s
1820:	learn: 0.2919392	total: 2h 25m 47s	remaining: 14m 19s
1821:	learn: 0.2918850	total: 2h 25m 52s	remaining: 14m 15s
1822:	learn: 0.2918653	total: 2h 25m 58s	remaining: 14m 10s
1823:	learn: 0.2918310	total: 2h 26m 5s	remaini

1946:	learn: 0.2865434	total: 2h 36m 48s	remaining: 4m 16s
1947:	learn: 0.2864934	total: 2h 36m 52s	remaining: 4m 11s
1948:	learn: 0.2864029	total: 2h 36m 58s	remaining: 4m 6s
1949:	learn: 0.2863590	total: 2h 37m 4s	remaining: 4m 1s
1950:	learn: 0.2862632	total: 2h 37m 10s	remaining: 3m 56s
1951:	learn: 0.2862333	total: 2h 37m 16s	remaining: 3m 52s
1952:	learn: 0.2862042	total: 2h 37m 22s	remaining: 3m 47s
1953:	learn: 0.2861585	total: 2h 37m 27s	remaining: 3m 42s
1954:	learn: 0.2861367	total: 2h 37m 34s	remaining: 3m 37s
1955:	learn: 0.2860984	total: 2h 37m 39s	remaining: 3m 32s
1956:	learn: 0.2860644	total: 2h 37m 44s	remaining: 3m 27s
1957:	learn: 0.2860164	total: 2h 37m 48s	remaining: 3m 23s
1958:	learn: 0.2859168	total: 2h 37m 53s	remaining: 3m 18s
1959:	learn: 0.2858911	total: 2h 37m 57s	remaining: 3m 13s
1960:	learn: 0.2858257	total: 2h 38m 2s	remaining: 3m 8s
1961:	learn: 0.2857972	total: 2h 38m 7s	remaining: 3m 3s
1962:	learn: 0.2857919	total: 2h 38m 14s	remaining: 2m 58s
1963

<catboost.core.CatBoostClassifier at 0x20d0147bd30>

In [12]:
proba = model.predict_proba(X_test.drop(columns=['ID','ID2'], axis=1))
y_test = pd.DataFrame(proba)
print(y_test.columns)
y_test.columns = le.inverse_transform(y_test.columns)

RangeIndex(start=0, stop=21, step=1)


# SUBMIT

In [13]:
print(y_test.columns)
answer_mass = []
for i in range(X_test.shape[0]):
  id = X_test['ID'].iloc[i]
  for c in y_test.columns:
    answer_mass.append([id + ' X ' + str(c), y_test[c].iloc[i]])
    

df_answer = pd.DataFrame(answer_mass)
df_answer.columns = ['ID X PCODE', 'Label']
for i in range(df_answer.shape[0]):
  if df_answer['ID X PCODE'].iloc[i] in true_values:
    df_answer['Label'].iloc[i] = 1.0

Index(['66FJ', '7POT', '8NN1', 'AHXO', 'BSTQ', 'ECY3', 'FM3X', 'GHYX', 'GYSR',
       'J9JW', 'JWFN', 'JZ9D', 'K6QO', 'LJR9', 'N2MW', 'P5DA', 'PYUQ', 'QBOL',
       'RIBP', 'RVSZ', 'SOP4'],
      dtype='object')


A value is trying to be set on a copy of a slice from a DataFrame

See the caveats in the documentation: https://pandas.pydata.org/pandas-docs/stable/user_guide/indexing.html#returning-a-view-versus-a-copy
  self._setitem_with_indexer(indexer, value)


In [14]:
df_answer.reset_index(drop=True, inplace=True)
df_answer.to_csv('submission.csv', index=False)