# GET DATA

In [1]:
import pandas as pd
import numpy as np
from tqdm import tqdm
import copy

In [2]:
train = pd.read_csv('Data/Train.csv')
test = pd.read_csv('Data/Test.csv')
submission = pd.read_csv('Data/SampleSubmission.csv')

# NEW FORMAT

In [3]:
X_train = []
X_train_columns = train.columns
c = 0
for v in train.values:
  info = v[:8]
  binary = v[8:]
  index = [k for k, i in enumerate(binary) if i == 1]
  for i in index:
    c+=1
    for k in range(len(binary)):
      if k == i:
        binary_transformed = list(copy.copy(binary))
        binary_transformed[i] = 0
        X_train.append(list(info) + binary_transformed + [X_train_columns[8+k]] + [c])

X_train = pd.DataFrame(X_train)
X_train.columns = ['ID', 'join_date', 'sex', 'marital_status', 'birth_year', 'branch_code',
       'occupation_code', 'occupation_category_code', 'P5DA', 'RIBP', '8NN1',
       '7POT', '66FJ', 'GYSR', 'SOP4', 'RVSZ', 'PYUQ', 'LJR9', 'N2MW', 'AHXO',
       'BSTQ', 'FM3X', 'K6QO', 'QBOL', 'JWFN', 'JZ9D', 'J9JW', 'GHYX', 'ECY3', 'product_pred', 'ID2']

In [4]:
X_test = []
true_values = []
c = 0
for v in test.values:
  c += 1
  info = v[:8]
  binary = v[8:]
  index = [k for k, i in enumerate(binary) if i == 1]
  X_test.append(list(info) + list(binary) + [c])
  for k in test.columns[8:][index]:
    true_values.append(v[0] + ' X ' + k)

X_test = pd.DataFrame(X_test)
X_test.columns = ['ID', 'join_date', 'sex', 'marital_status', 'birth_year', 'branch_code',
       'occupation_code', 'occupation_category_code', 'P5DA', 'RIBP', '8NN1',
       '7POT', '66FJ', 'GYSR', 'SOP4', 'RVSZ', 'PYUQ', 'LJR9', 'N2MW', 'AHXO',
       'BSTQ', 'FM3X', 'K6QO', 'QBOL', 'JWFN', 'JZ9D', 'J9JW', 'GHYX', 'ECY3', 'ID2']

# TRANSFORM DATA

In [5]:
features_train = []
features_test = []
columns = []

append_features = ['P5DA', 'RIBP', '8NN1', '7POT', '66FJ', 'GYSR', 'SOP4', 'RVSZ', 'PYUQ', 'LJR9', 
'N2MW', 'AHXO','BSTQ', 'FM3X', 'K6QO', 'QBOL', 'JWFN', 'JZ9D', 'J9JW', 'GHYX', 
'ECY3', 'ID', 'ID2', 'join_date', 'sex', 'marital_status', 'branch_code', 'occupation_code', 'occupation_category_code',
'birth_year']
for v in append_features:
  features_train.append(X_train[v].values.reshape(-1, 1))
  features_test.append(X_test[v].values.reshape(-1, 1))
  columns.append(np.array([v]))

y_train = X_train[['product_pred']]

In [6]:
features_train = np.concatenate(features_train, axis=1)
features_test = np.concatenate(features_test, axis=1)
columns = np.concatenate(np.array(columns))

X_train = pd.DataFrame(features_train)
X_train.columns = columns
X_test = pd.DataFrame(features_test)
X_test.columns = columns

# NEW FEATURES

In [7]:
X_train['date1'] = X_train['join_date'].apply(lambda x: int(x.split('/')[0]) if (x == x) else np.nan)
X_train['date2'] = X_train['join_date'].apply(lambda x: int(x.split('/')[1]) if (x == x) else np.nan)
X_train['date3'] = X_train['join_date'].apply(lambda x: int(x.split('/')[2]) if (x == x) else np.nan)
X_train.drop('join_date', axis=1, inplace=True)

X_test['date1'] = X_test['join_date'].apply(lambda x: int(x.split('/')[0]) if (x == x) else np.nan)
X_test['date2'] = X_test['join_date'].apply(lambda x: int(x.split('/')[1]) if (x == x) else np.nan)
X_test['date3'] = X_test['join_date'].apply(lambda x: int(x.split('/')[2]) if (x == x) else np.nan)
X_test.drop('join_date', axis=1, inplace=True)

X_train['date_diff'] = X_train['date3'] - X_train['birth_year']
X_test['date_diff'] = X_test['date3'] - X_test['birth_year']

# CHANGE TYPES

In [8]:
X_train = X_train.fillna(0)
X_test = X_test.fillna(0)
y_train = y_train.fillna(0)

In [9]:
from sklearn.preprocessing import LabelEncoder
le = LabelEncoder()
data = X_train.append(X_test)
for v in ['sex', 'marital_status', 'branch_code', 'occupation_code', 'occupation_category_code',]:
  data.loc[:,v] = le.fit_transform(data.loc[:,v])
X_train = data[:X_train.shape[0]]
X_test = data[-X_test.shape[0]:]

In [10]:
le.fit(y_train.iloc[:,0])
y_train = pd.DataFrame(le.transform(y_train.iloc[:,0]))
y_train.columns = ['target']

# MODEL

In [11]:
from catboost import CatBoostClassifier
model = CatBoostClassifier(iterations=1750)
model.fit(X_train.drop(columns=['ID', 'ID2']), y_train, cat_features=['sex','marital_status','branch_code','occupation_code','occupation_category_code'])

Learning rate set to 0.061076
0:	learn: 2.2214642	total: 4.2s	remaining: 2h 2m 19s
1:	learn: 1.9514087	total: 8.2s	remaining: 1h 59m 23s
2:	learn: 1.7630698	total: 11.9s	remaining: 1h 55m 23s
3:	learn: 1.6335146	total: 14.4s	remaining: 1h 45m 6s
4:	learn: 1.5138922	total: 19.4s	remaining: 1h 52m 51s
5:	learn: 1.4166809	total: 22.9s	remaining: 1h 51m 8s
6:	learn: 1.3373491	total: 27s	remaining: 1h 52m 2s
7:	learn: 1.2682933	total: 30.7s	remaining: 1h 51m 17s
8:	learn: 1.2074731	total: 34.5s	remaining: 1h 51m 16s
9:	learn: 1.1547693	total: 38.4s	remaining: 1h 51m 22s
10:	learn: 1.1067741	total: 42.5s	remaining: 1h 51m 55s
11:	learn: 1.0621013	total: 46.2s	remaining: 1h 51m 28s
12:	learn: 1.0250812	total: 49.9s	remaining: 1h 51m 11s
13:	learn: 0.9882269	total: 54s	remaining: 1h 51m 42s
14:	learn: 0.9565166	total: 58.1s	remaining: 1h 51m 56s
15:	learn: 0.9266555	total: 1m 1s	remaining: 1h 51m 57s
16:	learn: 0.8992684	total: 1m 6s	remaining: 1h 52m 18s
17:	learn: 0.8743201	total: 1m 9s	rema

144:	learn: 0.4314104	total: 9m 47s	remaining: 1h 48m 24s
145:	learn: 0.4308255	total: 9m 52s	remaining: 1h 48m 34s
146:	learn: 0.4305778	total: 9m 57s	remaining: 1h 48m 35s
147:	learn: 0.4299585	total: 10m 1s	remaining: 1h 48m 29s
148:	learn: 0.4293002	total: 10m 5s	remaining: 1h 48m 22s
149:	learn: 0.4286320	total: 10m 8s	remaining: 1h 48m 11s
150:	learn: 0.4284971	total: 10m 13s	remaining: 1h 48m 15s
151:	learn: 0.4279774	total: 10m 17s	remaining: 1h 48m 8s
152:	learn: 0.4277046	total: 10m 21s	remaining: 1h 48m 10s
153:	learn: 0.4272313	total: 10m 26s	remaining: 1h 48m 14s
154:	learn: 0.4270484	total: 10m 32s	remaining: 1h 48m 28s
155:	learn: 0.4265376	total: 10m 37s	remaining: 1h 48m 35s
156:	learn: 0.4260372	total: 10m 42s	remaining: 1h 48m 36s
157:	learn: 0.4257743	total: 10m 47s	remaining: 1h 48m 45s
158:	learn: 0.4255568	total: 10m 53s	remaining: 1h 49m 2s
159:	learn: 0.4249080	total: 10m 57s	remaining: 1h 48m 52s
160:	learn: 0.4246298	total: 11m 2s	remaining: 1h 48m 56s
161:	l

284:	learn: 0.3827652	total: 20m 27s	remaining: 1h 45m 10s
285:	learn: 0.3826670	total: 20m 33s	remaining: 1h 45m 16s
286:	learn: 0.3824592	total: 20m 39s	remaining: 1h 45m 17s
287:	learn: 0.3823768	total: 20m 44s	remaining: 1h 45m 15s
288:	learn: 0.3822659	total: 20m 49s	remaining: 1h 45m 16s
289:	learn: 0.3821250	total: 20m 54s	remaining: 1h 45m 16s
290:	learn: 0.3819844	total: 21m	remaining: 1h 45m 17s
291:	learn: 0.3817273	total: 21m 4s	remaining: 1h 45m 11s
292:	learn: 0.3816939	total: 21m 8s	remaining: 1h 45m 7s
293:	learn: 0.3814594	total: 21m 12s	remaining: 1h 45m 2s
294:	learn: 0.3813067	total: 21m 16s	remaining: 1h 44m 56s
295:	learn: 0.3811960	total: 21m 21s	remaining: 1h 44m 53s
296:	learn: 0.3810191	total: 21m 26s	remaining: 1h 44m 54s
297:	learn: 0.3808546	total: 21m 32s	remaining: 1h 44m 57s
298:	learn: 0.3808005	total: 21m 37s	remaining: 1h 44m 56s
299:	learn: 0.3807200	total: 21m 42s	remaining: 1h 44m 55s
300:	learn: 0.3805871	total: 21m 48s	remaining: 1h 44m 56s
301:	

424:	learn: 0.3639328	total: 32m 21s	remaining: 1h 40m 54s
425:	learn: 0.3637504	total: 32m 27s	remaining: 1h 40m 52s
426:	learn: 0.3636488	total: 32m 32s	remaining: 1h 40m 49s
427:	learn: 0.3635905	total: 32m 37s	remaining: 1h 40m 45s
428:	learn: 0.3635696	total: 32m 42s	remaining: 1h 40m 42s
429:	learn: 0.3635063	total: 32m 47s	remaining: 1h 40m 39s
430:	learn: 0.3634372	total: 32m 51s	remaining: 1h 40m 34s
431:	learn: 0.3634046	total: 32m 58s	remaining: 1h 40m 35s
432:	learn: 0.3632651	total: 33m 4s	remaining: 1h 40m 34s
433:	learn: 0.3632100	total: 33m 10s	remaining: 1h 40m 35s
434:	learn: 0.3630104	total: 33m 15s	remaining: 1h 40m 33s
435:	learn: 0.3629597	total: 33m 21s	remaining: 1h 40m 32s
436:	learn: 0.3628796	total: 33m 26s	remaining: 1h 40m 29s
437:	learn: 0.3628413	total: 33m 33s	remaining: 1h 40m 30s
438:	learn: 0.3627826	total: 33m 38s	remaining: 1h 40m 27s
439:	learn: 0.3626867	total: 33m 42s	remaining: 1h 40m 22s
440:	learn: 0.3624052	total: 33m 46s	remaining: 1h 40m 14

564:	learn: 0.3523093	total: 44m 14s	remaining: 1h 32m 48s
565:	learn: 0.3523063	total: 44m 18s	remaining: 1h 32m 41s
566:	learn: 0.3522454	total: 44m 22s	remaining: 1h 32m 35s
567:	learn: 0.3522170	total: 44m 28s	remaining: 1h 32m 33s
568:	learn: 0.3521417	total: 44m 34s	remaining: 1h 32m 30s
569:	learn: 0.3521055	total: 44m 40s	remaining: 1h 32m 29s
570:	learn: 0.3519779	total: 44m 45s	remaining: 1h 32m 25s
571:	learn: 0.3518752	total: 44m 50s	remaining: 1h 32m 21s
572:	learn: 0.3517216	total: 44m 56s	remaining: 1h 32m 17s
573:	learn: 0.3516166	total: 45m 1s	remaining: 1h 32m 14s
574:	learn: 0.3514628	total: 45m 7s	remaining: 1h 32m 12s
575:	learn: 0.3514074	total: 45m 12s	remaining: 1h 32m 7s
576:	learn: 0.3512777	total: 45m 16s	remaining: 1h 32m 1s
577:	learn: 0.3512487	total: 45m 21s	remaining: 1h 31m 57s
578:	learn: 0.3511470	total: 45m 27s	remaining: 1h 31m 56s
579:	learn: 0.3511156	total: 45m 33s	remaining: 1h 31m 53s
580:	learn: 0.3510712	total: 45m 39s	remaining: 1h 31m 52s
5

704:	learn: 0.3417689	total: 56m 19s	remaining: 1h 23m 29s
705:	learn: 0.3417287	total: 56m 26s	remaining: 1h 23m 28s
706:	learn: 0.3417093	total: 56m 32s	remaining: 1h 23m 24s
707:	learn: 0.3416216	total: 56m 37s	remaining: 1h 23m 20s
708:	learn: 0.3415745	total: 56m 41s	remaining: 1h 23m 14s
709:	learn: 0.3414968	total: 56m 45s	remaining: 1h 23m 8s
710:	learn: 0.3414699	total: 56m 51s	remaining: 1h 23m 5s
711:	learn: 0.3414071	total: 56m 57s	remaining: 1h 23m 2s
712:	learn: 0.3413702	total: 57m 3s	remaining: 1h 22m 59s
713:	learn: 0.3413328	total: 57m 9s	remaining: 1h 22m 55s
714:	learn: 0.3412969	total: 57m 14s	remaining: 1h 22m 51s
715:	learn: 0.3412348	total: 57m 20s	remaining: 1h 22m 48s
716:	learn: 0.3411841	total: 57m 25s	remaining: 1h 22m 43s
717:	learn: 0.3410845	total: 57m 30s	remaining: 1h 22m 40s
718:	learn: 0.3410424	total: 57m 35s	remaining: 1h 22m 35s
719:	learn: 0.3409401	total: 57m 40s	remaining: 1h 22m 30s
720:	learn: 0.3408943	total: 57m 44s	remaining: 1h 22m 24s
72

842:	learn: 0.3334550	total: 1h 8m 27s	remaining: 1h 13m 39s
843:	learn: 0.3334409	total: 1h 8m 33s	remaining: 1h 13m 35s
844:	learn: 0.3334233	total: 1h 8m 38s	remaining: 1h 13m 31s
845:	learn: 0.3333923	total: 1h 8m 42s	remaining: 1h 13m 25s
846:	learn: 0.3333548	total: 1h 8m 47s	remaining: 1h 13m 19s
847:	learn: 0.3332697	total: 1h 8m 51s	remaining: 1h 13m 14s
848:	learn: 0.3332313	total: 1h 8m 55s	remaining: 1h 13m 9s
849:	learn: 0.3332076	total: 1h 9m 1s	remaining: 1h 13m 5s
850:	learn: 0.3331896	total: 1h 9m 7s	remaining: 1h 13m 1s
851:	learn: 0.3331490	total: 1h 9m 12s	remaining: 1h 12m 56s
852:	learn: 0.3331253	total: 1h 9m 16s	remaining: 1h 12m 51s
853:	learn: 0.3330961	total: 1h 9m 21s	remaining: 1h 12m 45s
854:	learn: 0.3330311	total: 1h 9m 27s	remaining: 1h 12m 42s
855:	learn: 0.3329864	total: 1h 9m 33s	remaining: 1h 12m 38s
856:	learn: 0.3329354	total: 1h 9m 39s	remaining: 1h 12m 35s
857:	learn: 0.3329165	total: 1h 9m 45s	remaining: 1h 12m 31s
858:	learn: 0.3328242	total: 

977:	learn: 0.3269374	total: 1h 20m 12s	remaining: 1h 3m 18s
978:	learn: 0.3269291	total: 1h 20m 16s	remaining: 1h 3m 13s
979:	learn: 0.3269107	total: 1h 20m 22s	remaining: 1h 3m 8s
980:	learn: 0.3268261	total: 1h 20m 27s	remaining: 1h 3m 4s
981:	learn: 0.3267806	total: 1h 20m 33s	remaining: 1h 3m
982:	learn: 0.3266629	total: 1h 20m 38s	remaining: 1h 2m 55s
983:	learn: 0.3266300	total: 1h 20m 44s	remaining: 1h 2m 51s
984:	learn: 0.3265353	total: 1h 20m 48s	remaining: 1h 2m 45s
985:	learn: 0.3264664	total: 1h 20m 54s	remaining: 1h 2m 41s
986:	learn: 0.3263999	total: 1h 20m 58s	remaining: 1h 2m 36s
987:	learn: 0.3263846	total: 1h 21m 3s	remaining: 1h 2m 31s
988:	learn: 0.3263698	total: 1h 21m 7s	remaining: 1h 2m 25s
989:	learn: 0.3263450	total: 1h 21m 13s	remaining: 1h 2m 21s
990:	learn: 0.3263357	total: 1h 21m 19s	remaining: 1h 2m 16s
991:	learn: 0.3261842	total: 1h 21m 24s	remaining: 1h 2m 12s
992:	learn: 0.3261504	total: 1h 21m 29s	remaining: 1h 2m 7s
993:	learn: 0.3261369	total: 1h 2

1115:	learn: 0.3185821	total: 1h 32m 8s	remaining: 52m 20s
1116:	learn: 0.3185569	total: 1h 32m 14s	remaining: 52m 16s
1117:	learn: 0.3185114	total: 1h 32m 20s	remaining: 52m 12s
1118:	learn: 0.3184467	total: 1h 32m 27s	remaining: 52m 8s
1119:	learn: 0.3184096	total: 1h 32m 32s	remaining: 52m 3s
1120:	learn: 0.3183758	total: 1h 32m 38s	remaining: 51m 58s
1121:	learn: 0.3183409	total: 1h 32m 44s	remaining: 51m 54s
1122:	learn: 0.3183005	total: 1h 32m 50s	remaining: 51m 49s
1123:	learn: 0.3182394	total: 1h 32m 54s	remaining: 51m 44s
1124:	learn: 0.3182245	total: 1h 32m 59s	remaining: 51m 39s
1125:	learn: 0.3181043	total: 1h 33m 5s	remaining: 51m 35s
1126:	learn: 0.3180775	total: 1h 33m 11s	remaining: 51m 30s
1127:	learn: 0.3180317	total: 1h 33m 15s	remaining: 51m 25s
1128:	learn: 0.3180105	total: 1h 33m 18s	remaining: 51m 19s
1129:	learn: 0.3179221	total: 1h 33m 23s	remaining: 51m 14s
1130:	learn: 0.3179051	total: 1h 33m 26s	remaining: 51m 8s
1131:	learn: 0.3178900	total: 1h 33m 32s	rema

1253:	learn: 0.3111294	total: 1h 44m 23s	remaining: 41m 17s
1254:	learn: 0.3110605	total: 1h 44m 29s	remaining: 41m 13s
1255:	learn: 0.3110444	total: 1h 44m 34s	remaining: 41m 7s
1256:	learn: 0.3109746	total: 1h 44m 40s	remaining: 41m 3s
1257:	learn: 0.3109375	total: 1h 44m 45s	remaining: 40m 58s
1258:	learn: 0.3108913	total: 1h 44m 51s	remaining: 40m 53s
1259:	learn: 0.3107857	total: 1h 44m 56s	remaining: 40m 48s
1260:	learn: 0.3107663	total: 1h 45m 2s	remaining: 40m 44s
1261:	learn: 0.3107444	total: 1h 45m 8s	remaining: 40m 39s
1262:	learn: 0.3106414	total: 1h 45m 12s	remaining: 40m 33s
1263:	learn: 0.3105270	total: 1h 45m 16s	remaining: 40m 28s
1264:	learn: 0.3105071	total: 1h 45m 19s	remaining: 40m 23s
1265:	learn: 0.3104706	total: 1h 45m 25s	remaining: 40m 18s
1266:	learn: 0.3104362	total: 1h 45m 30s	remaining: 40m 13s
1267:	learn: 0.3102974	total: 1h 45m 36s	remaining: 40m 8s
1268:	learn: 0.3102757	total: 1h 45m 41s	remaining: 40m 3s
1269:	learn: 0.3102303	total: 1h 45m 47s	remai

1391:	learn: 0.3039970	total: 1h 56m 41s	remaining: 30m
1392:	learn: 0.3039002	total: 1h 56m 46s	remaining: 29m 55s
1393:	learn: 0.3038847	total: 1h 56m 51s	remaining: 29m 50s
1394:	learn: 0.3038387	total: 1h 56m 57s	remaining: 29m 45s
1395:	learn: 0.3037740	total: 1h 57m 3s	remaining: 29m 41s
1396:	learn: 0.3037364	total: 1h 57m 9s	remaining: 29m 36s
1397:	learn: 0.3037266	total: 1h 57m 14s	remaining: 29m 31s
1398:	learn: 0.3036648	total: 1h 57m 19s	remaining: 29m 26s
1399:	learn: 0.3036345	total: 1h 57m 25s	remaining: 29m 21s
1400:	learn: 0.3035825	total: 1h 57m 30s	remaining: 29m 16s
1401:	learn: 0.3035072	total: 1h 57m 35s	remaining: 29m 11s
1402:	learn: 0.3034591	total: 1h 57m 40s	remaining: 29m 6s
1403:	learn: 0.3033746	total: 1h 57m 44s	remaining: 29m
1404:	learn: 0.3033494	total: 1h 57m 49s	remaining: 28m 55s
1405:	learn: 0.3032759	total: 1h 57m 55s	remaining: 28m 51s
1406:	learn: 0.3032247	total: 1h 58m 1s	remaining: 28m 46s
1407:	learn: 0.3032156	total: 1h 58m 6s	remaining: 2

1531:	learn: 0.2966585	total: 2h 9m 20s	remaining: 18m 24s
1532:	learn: 0.2966178	total: 2h 9m 25s	remaining: 18m 19s
1533:	learn: 0.2965495	total: 2h 9m 29s	remaining: 18m 14s
1534:	learn: 0.2964937	total: 2h 9m 33s	remaining: 18m 8s
1535:	learn: 0.2964669	total: 2h 9m 38s	remaining: 18m 3s
1536:	learn: 0.2963848	total: 2h 9m 43s	remaining: 17m 58s
1537:	learn: 0.2962995	total: 2h 9m 48s	remaining: 17m 53s
1538:	learn: 0.2962666	total: 2h 9m 53s	remaining: 17m 48s
1539:	learn: 0.2962329	total: 2h 9m 58s	remaining: 17m 43s
1540:	learn: 0.2962116	total: 2h 10m 2s	remaining: 17m 38s
1541:	learn: 0.2961756	total: 2h 10m 9s	remaining: 17m 33s
1542:	learn: 0.2961300	total: 2h 10m 15s	remaining: 17m 28s
1543:	learn: 0.2960904	total: 2h 10m 20s	remaining: 17m 23s
1544:	learn: 0.2960480	total: 2h 10m 27s	remaining: 17m 18s
1545:	learn: 0.2959683	total: 2h 10m 30s	remaining: 17m 13s
1546:	learn: 0.2958992	total: 2h 10m 35s	remaining: 17m 8s
1547:	learn: 0.2958636	total: 2h 10m 40s	remaining: 17

1670:	learn: 0.2904340	total: 2h 21m 32s	remaining: 6m 41s
1671:	learn: 0.2903977	total: 2h 21m 38s	remaining: 6m 36s
1672:	learn: 0.2903580	total: 2h 21m 44s	remaining: 6m 31s
1673:	learn: 0.2903122	total: 2h 21m 48s	remaining: 6m 26s
1674:	learn: 0.2903071	total: 2h 21m 55s	remaining: 6m 21s
1675:	learn: 0.2902668	total: 2h 22m 1s	remaining: 6m 16s
1676:	learn: 0.2901937	total: 2h 22m 5s	remaining: 6m 11s
1677:	learn: 0.2901572	total: 2h 22m 10s	remaining: 6m 6s
1678:	learn: 0.2901086	total: 2h 22m 13s	remaining: 6m
1679:	learn: 0.2900808	total: 2h 22m 19s	remaining: 5m 55s
1680:	learn: 0.2900642	total: 2h 22m 25s	remaining: 5m 50s
1681:	learn: 0.2900486	total: 2h 22m 31s	remaining: 5m 45s
1682:	learn: 0.2900372	total: 2h 22m 37s	remaining: 5m 40s
1683:	learn: 0.2899850	total: 2h 22m 43s	remaining: 5m 35s
1684:	learn: 0.2899403	total: 2h 22m 48s	remaining: 5m 30s
1685:	learn: 0.2899298	total: 2h 22m 53s	remaining: 5m 25s
1686:	learn: 0.2898685	total: 2h 22m 59s	remaining: 5m 20s
1687

<catboost.core.CatBoostClassifier at 0x15e2ec6cd00>

In [12]:
proba = model.predict_proba(X_test.drop(columns=['ID','ID2'], axis=1))
y_test = pd.DataFrame(proba)
print(y_test.columns)
y_test.columns = le.inverse_transform(y_test.columns)

RangeIndex(start=0, stop=21, step=1)


# SUBMIT

In [13]:
print(y_test.columns)
answer_mass = []
for i in range(X_test.shape[0]):
  id = X_test['ID'].iloc[i]
  for c in y_test.columns:
    answer_mass.append([id + ' X ' + str(c), y_test[c].iloc[i]])
    

df_answer = pd.DataFrame(answer_mass)
df_answer.columns = ['ID X PCODE', 'Label']
for i in range(df_answer.shape[0]):
  if df_answer['ID X PCODE'].iloc[i] in true_values:
    df_answer['Label'].iloc[i] = 1.0

Index(['66FJ', '7POT', '8NN1', 'AHXO', 'BSTQ', 'ECY3', 'FM3X', 'GHYX', 'GYSR',
       'J9JW', 'JWFN', 'JZ9D', 'K6QO', 'LJR9', 'N2MW', 'P5DA', 'PYUQ', 'QBOL',
       'RIBP', 'RVSZ', 'SOP4'],
      dtype='object')


A value is trying to be set on a copy of a slice from a DataFrame

See the caveats in the documentation: https://pandas.pydata.org/pandas-docs/stable/user_guide/indexing.html#returning-a-view-versus-a-copy
  self._setitem_with_indexer(indexer, value)


In [14]:
df_answer.reset_index(drop=True, inplace=True)
df_answer.to_csv('submission.csv', index=False)