# GET DATA

In [1]:
import pandas as pd
import numpy as np
from tqdm import tqdm
import copy

In [2]:
train = pd.read_csv('Data/Train.csv')
test = pd.read_csv('Data/Test.csv')
submission = pd.read_csv('Data/SampleSubmission.csv')

# NEW FORMAT

In [3]:
X_train = []
X_train_columns = train.columns
c = 0
for v in train.values:
  info = v[:8]
  binary = v[8:]
  index = [k for k, i in enumerate(binary) if i == 1]
  for i in index:
    c+=1
    for k in range(len(binary)):
      if k == i:
        binary_transformed = list(copy.copy(binary))
        binary_transformed[i] = 0
        X_train.append(list(info) + binary_transformed + [X_train_columns[8+k]] + [c])

X_train = pd.DataFrame(X_train)
X_train.columns = ['ID', 'join_date', 'sex', 'marital_status', 'birth_year', 'branch_code',
       'occupation_code', 'occupation_category_code', 'P5DA', 'RIBP', '8NN1',
       '7POT', '66FJ', 'GYSR', 'SOP4', 'RVSZ', 'PYUQ', 'LJR9', 'N2MW', 'AHXO',
       'BSTQ', 'FM3X', 'K6QO', 'QBOL', 'JWFN', 'JZ9D', 'J9JW', 'GHYX', 'ECY3', 'product_pred', 'ID2']

In [4]:
X_test = []
true_values = []
c = 0
for v in test.values:
  c += 1
  info = v[:8]
  binary = v[8:]
  index = [k for k, i in enumerate(binary) if i == 1]
  X_test.append(list(info) + list(binary) + [c])
  for k in test.columns[8:][index]:
    true_values.append(v[0] + ' X ' + k)

X_test = pd.DataFrame(X_test)
X_test.columns = ['ID', 'join_date', 'sex', 'marital_status', 'birth_year', 'branch_code',
       'occupation_code', 'occupation_category_code', 'P5DA', 'RIBP', '8NN1',
       '7POT', '66FJ', 'GYSR', 'SOP4', 'RVSZ', 'PYUQ', 'LJR9', 'N2MW', 'AHXO',
       'BSTQ', 'FM3X', 'K6QO', 'QBOL', 'JWFN', 'JZ9D', 'J9JW', 'GHYX', 'ECY3', 'ID2']

# TRANSFORM DATA

In [5]:
features_train = []
features_test = []
columns = []

append_features = ['P5DA', 'RIBP', '8NN1', '7POT', '66FJ', 'GYSR', 'SOP4', 'RVSZ', 'PYUQ', 'LJR9', 
'N2MW', 'AHXO','BSTQ', 'FM3X', 'K6QO', 'QBOL', 'JWFN', 'JZ9D', 'J9JW', 'GHYX', 
'ECY3', 'ID', 'ID2', 'join_date', 'sex', 'marital_status', 'branch_code', 'occupation_code', 'occupation_category_code',
'birth_year']
for v in append_features:
  features_train.append(X_train[v].values.reshape(-1, 1))
  features_test.append(X_test[v].values.reshape(-1, 1))
  columns.append(np.array([v]))

y_train = X_train[['product_pred']]

In [6]:
features_train = np.concatenate(features_train, axis=1)
features_test = np.concatenate(features_test, axis=1)
columns = np.concatenate(np.array(columns))

X_train = pd.DataFrame(features_train)
X_train.columns = columns
X_test = pd.DataFrame(features_test)
X_test.columns = columns

# NEW FEATURES

In [7]:
X_train['date1'] = X_train['join_date'].apply(lambda x: int(x.split('/')[0]) if (x == x) else np.nan)
X_train['date2'] = X_train['join_date'].apply(lambda x: int(x.split('/')[1]) if (x == x) else np.nan)
X_train['date3'] = X_train['join_date'].apply(lambda x: int(x.split('/')[2]) if (x == x) else np.nan)
X_train.drop('join_date', axis=1, inplace=True)

X_test['date1'] = X_test['join_date'].apply(lambda x: int(x.split('/')[0]) if (x == x) else np.nan)
X_test['date2'] = X_test['join_date'].apply(lambda x: int(x.split('/')[1]) if (x == x) else np.nan)
X_test['date3'] = X_test['join_date'].apply(lambda x: int(x.split('/')[2]) if (x == x) else np.nan)
X_test.drop('join_date', axis=1, inplace=True)

X_train['date_diff'] = X_train['date3'] - X_train['birth_year']
X_test['date_diff'] = X_test['date3'] - X_test['birth_year']

# CHANGE TYPES

In [8]:
X_train = X_train.fillna(0)
X_test = X_test.fillna(0)
y_train = y_train.fillna(0)

In [9]:
from sklearn.preprocessing import LabelEncoder
le = LabelEncoder()
data = X_train.append(X_test)
for v in ['sex', 'marital_status', 'branch_code', 'occupation_code', 'occupation_category_code',]:
  data.loc[:,v] = le.fit_transform(data.loc[:,v])
X_train = data[:X_train.shape[0]]
X_test = data[-X_test.shape[0]:]

In [10]:
le.fit(y_train.iloc[:,0])
y_train = pd.DataFrame(le.transform(y_train.iloc[:,0]))
y_train.columns = ['target']

# MODEL

In [11]:
from catboost import CatBoostClassifier

model = CatBoostClassifier(iterations=2000, learning_rate=0.062559, random_seed=42, task_type="GPU")
model.fit(X_train.drop(columns=['ID', 'ID2']), y_train, cat_features=['sex','marital_status','branch_code','occupation_code','occupation_category_code'])

0:	learn: 2.4170156	total: 59.8ms	remaining: 1m 59s
1:	learn: 2.1659161	total: 118ms	remaining: 1m 57s
2:	learn: 2.0031164	total: 173ms	remaining: 1m 55s
3:	learn: 1.7761165	total: 231ms	remaining: 1m 55s
4:	learn: 1.6178056	total: 286ms	remaining: 1m 54s
5:	learn: 1.4961529	total: 341ms	remaining: 1m 53s
6:	learn: 1.3983548	total: 393ms	remaining: 1m 52s
7:	learn: 1.3161122	total: 450ms	remaining: 1m 52s
8:	learn: 1.2454486	total: 502ms	remaining: 1m 51s
9:	learn: 1.1836466	total: 554ms	remaining: 1m 50s
10:	learn: 1.1300935	total: 609ms	remaining: 1m 50s
11:	learn: 1.0818357	total: 663ms	remaining: 1m 49s
12:	learn: 1.0385617	total: 721ms	remaining: 1m 50s
13:	learn: 0.9995664	total: 778ms	remaining: 1m 50s
14:	learn: 0.9634558	total: 842ms	remaining: 1m 51s
15:	learn: 0.9315449	total: 894ms	remaining: 1m 50s
16:	learn: 0.9020883	total: 947ms	remaining: 1m 50s
17:	learn: 0.8756440	total: 1s	remaining: 1m 50s
18:	learn: 0.8510862	total: 1.05s	remaining: 1m 49s
19:	learn: 0.8286072	tot

159:	learn: 0.4170803	total: 8.69s	remaining: 1m 39s
160:	learn: 0.4164965	total: 8.74s	remaining: 1m 39s
161:	learn: 0.4160770	total: 8.8s	remaining: 1m 39s
162:	learn: 0.4157594	total: 8.85s	remaining: 1m 39s
163:	learn: 0.4153430	total: 8.9s	remaining: 1m 39s
164:	learn: 0.4147006	total: 8.96s	remaining: 1m 39s
165:	learn: 0.4140014	total: 9.02s	remaining: 1m 39s
166:	learn: 0.4135697	total: 9.07s	remaining: 1m 39s
167:	learn: 0.4130886	total: 9.12s	remaining: 1m 39s
168:	learn: 0.4122325	total: 9.18s	remaining: 1m 39s
169:	learn: 0.4115755	total: 9.23s	remaining: 1m 39s
170:	learn: 0.4111266	total: 9.28s	remaining: 1m 39s
171:	learn: 0.4104951	total: 9.34s	remaining: 1m 39s
172:	learn: 0.4101463	total: 9.39s	remaining: 1m 39s
173:	learn: 0.4097994	total: 9.44s	remaining: 1m 39s
174:	learn: 0.4091571	total: 9.49s	remaining: 1m 38s
175:	learn: 0.4085877	total: 9.55s	remaining: 1m 38s
176:	learn: 0.4083097	total: 9.6s	remaining: 1m 38s
177:	learn: 0.4077927	total: 9.65s	remaining: 1m 

315:	learn: 0.3676120	total: 17s	remaining: 1m 30s
316:	learn: 0.3675242	total: 17s	remaining: 1m 30s
317:	learn: 0.3673794	total: 17.1s	remaining: 1m 30s
318:	learn: 0.3672566	total: 17.1s	remaining: 1m 30s
319:	learn: 0.3670222	total: 17.2s	remaining: 1m 30s
320:	learn: 0.3668756	total: 17.2s	remaining: 1m 30s
321:	learn: 0.3666916	total: 17.3s	remaining: 1m 30s
322:	learn: 0.3665429	total: 17.3s	remaining: 1m 30s
323:	learn: 0.3661732	total: 17.4s	remaining: 1m 29s
324:	learn: 0.3660820	total: 17.4s	remaining: 1m 29s
325:	learn: 0.3658219	total: 17.5s	remaining: 1m 29s
326:	learn: 0.3657012	total: 17.5s	remaining: 1m 29s
327:	learn: 0.3655732	total: 17.6s	remaining: 1m 29s
328:	learn: 0.3654154	total: 17.6s	remaining: 1m 29s
329:	learn: 0.3652650	total: 17.7s	remaining: 1m 29s
330:	learn: 0.3650570	total: 17.8s	remaining: 1m 29s
331:	learn: 0.3647941	total: 17.8s	remaining: 1m 29s
332:	learn: 0.3646271	total: 17.9s	remaining: 1m 29s
333:	learn: 0.3644879	total: 17.9s	remaining: 1m 2

472:	learn: 0.3467188	total: 25s	remaining: 1m 20s
473:	learn: 0.3466216	total: 25s	remaining: 1m 20s
474:	learn: 0.3465359	total: 25.1s	remaining: 1m 20s
475:	learn: 0.3463963	total: 25.1s	remaining: 1m 20s
476:	learn: 0.3462696	total: 25.2s	remaining: 1m 20s
477:	learn: 0.3460223	total: 25.2s	remaining: 1m 20s
478:	learn: 0.3459988	total: 25.3s	remaining: 1m 20s
479:	learn: 0.3458954	total: 25.3s	remaining: 1m 20s
480:	learn: 0.3458066	total: 25.4s	remaining: 1m 20s
481:	learn: 0.3457398	total: 25.4s	remaining: 1m 20s
482:	learn: 0.3456334	total: 25.5s	remaining: 1m 20s
483:	learn: 0.3455727	total: 25.5s	remaining: 1m 19s
484:	learn: 0.3454735	total: 25.6s	remaining: 1m 19s
485:	learn: 0.3454225	total: 25.6s	remaining: 1m 19s
486:	learn: 0.3453709	total: 25.7s	remaining: 1m 19s
487:	learn: 0.3452992	total: 25.7s	remaining: 1m 19s
488:	learn: 0.3452203	total: 25.8s	remaining: 1m 19s
489:	learn: 0.3450742	total: 25.8s	remaining: 1m 19s
490:	learn: 0.3449598	total: 25.9s	remaining: 1m 1

630:	learn: 0.3329310	total: 32.9s	remaining: 1m 11s
631:	learn: 0.3328088	total: 32.9s	remaining: 1m 11s
632:	learn: 0.3327173	total: 33s	remaining: 1m 11s
633:	learn: 0.3326480	total: 33s	remaining: 1m 11s
634:	learn: 0.3325007	total: 33.1s	remaining: 1m 11s
635:	learn: 0.3324269	total: 33.1s	remaining: 1m 11s
636:	learn: 0.3323315	total: 33.2s	remaining: 1m 10s
637:	learn: 0.3322590	total: 33.2s	remaining: 1m 10s
638:	learn: 0.3321329	total: 33.3s	remaining: 1m 10s
639:	learn: 0.3321019	total: 33.3s	remaining: 1m 10s
640:	learn: 0.3320513	total: 33.3s	remaining: 1m 10s
641:	learn: 0.3318169	total: 33.4s	remaining: 1m 10s
642:	learn: 0.3316997	total: 33.5s	remaining: 1m 10s
643:	learn: 0.3316612	total: 33.5s	remaining: 1m 10s
644:	learn: 0.3315719	total: 33.6s	remaining: 1m 10s
645:	learn: 0.3314822	total: 33.6s	remaining: 1m 10s
646:	learn: 0.3314074	total: 33.7s	remaining: 1m 10s
647:	learn: 0.3313620	total: 33.7s	remaining: 1m 10s
648:	learn: 0.3312454	total: 33.8s	remaining: 1m 1

789:	learn: 0.3218638	total: 40.8s	remaining: 1m 2s
790:	learn: 0.3218147	total: 40.8s	remaining: 1m 2s
791:	learn: 0.3217202	total: 40.9s	remaining: 1m 2s
792:	learn: 0.3216576	total: 41s	remaining: 1m 2s
793:	learn: 0.3215607	total: 41s	remaining: 1m 2s
794:	learn: 0.3214663	total: 41.1s	remaining: 1m 2s
795:	learn: 0.3213637	total: 41.1s	remaining: 1m 2s
796:	learn: 0.3213217	total: 41.2s	remaining: 1m 2s
797:	learn: 0.3212422	total: 41.2s	remaining: 1m 2s
798:	learn: 0.3211961	total: 41.3s	remaining: 1m 2s
799:	learn: 0.3211704	total: 41.3s	remaining: 1m 1s
800:	learn: 0.3211063	total: 41.4s	remaining: 1m 1s
801:	learn: 0.3210267	total: 41.4s	remaining: 1m 1s
802:	learn: 0.3209958	total: 41.4s	remaining: 1m 1s
803:	learn: 0.3209272	total: 41.5s	remaining: 1m 1s
804:	learn: 0.3208663	total: 41.5s	remaining: 1m 1s
805:	learn: 0.3208038	total: 41.6s	remaining: 1m 1s
806:	learn: 0.3207256	total: 41.6s	remaining: 1m 1s
807:	learn: 0.3206921	total: 41.7s	remaining: 1m 1s
808:	learn: 0.32

951:	learn: 0.3114729	total: 48.9s	remaining: 53.9s
952:	learn: 0.3114155	total: 49s	remaining: 53.8s
953:	learn: 0.3114009	total: 49s	remaining: 53.8s
954:	learn: 0.3113325	total: 49.1s	remaining: 53.7s
955:	learn: 0.3112964	total: 49.1s	remaining: 53.7s
956:	learn: 0.3112408	total: 49.2s	remaining: 53.6s
957:	learn: 0.3111587	total: 49.2s	remaining: 53.5s
958:	learn: 0.3110162	total: 49.3s	remaining: 53.5s
959:	learn: 0.3109864	total: 49.3s	remaining: 53.4s
960:	learn: 0.3109493	total: 49.4s	remaining: 53.4s
961:	learn: 0.3109241	total: 49.4s	remaining: 53.3s
962:	learn: 0.3108912	total: 49.5s	remaining: 53.3s
963:	learn: 0.3108564	total: 49.5s	remaining: 53.2s
964:	learn: 0.3108149	total: 49.6s	remaining: 53.2s
965:	learn: 0.3107725	total: 49.6s	remaining: 53.1s
966:	learn: 0.3107341	total: 49.7s	remaining: 53.1s
967:	learn: 0.3107135	total: 49.7s	remaining: 53s
968:	learn: 0.3106045	total: 49.8s	remaining: 53s
969:	learn: 0.3105624	total: 49.8s	remaining: 52.9s
970:	learn: 0.310470

1109:	learn: 0.3027090	total: 56.9s	remaining: 45.6s
1110:	learn: 0.3026658	total: 56.9s	remaining: 45.6s
1111:	learn: 0.3026452	total: 57s	remaining: 45.5s
1112:	learn: 0.3025564	total: 57s	remaining: 45.5s
1113:	learn: 0.3024709	total: 57.1s	remaining: 45.4s
1114:	learn: 0.3024319	total: 57.1s	remaining: 45.4s
1115:	learn: 0.3023863	total: 57.2s	remaining: 45.3s
1116:	learn: 0.3023536	total: 57.2s	remaining: 45.2s
1117:	learn: 0.3022991	total: 57.3s	remaining: 45.2s
1118:	learn: 0.3022242	total: 57.3s	remaining: 45.1s
1119:	learn: 0.3021521	total: 57.4s	remaining: 45.1s
1120:	learn: 0.3020830	total: 57.5s	remaining: 45s
1121:	learn: 0.3020309	total: 57.5s	remaining: 45s
1122:	learn: 0.3019957	total: 57.5s	remaining: 44.9s
1123:	learn: 0.3019539	total: 57.6s	remaining: 44.9s
1124:	learn: 0.3019058	total: 57.6s	remaining: 44.8s
1125:	learn: 0.3018651	total: 57.7s	remaining: 44.8s
1126:	learn: 0.3017836	total: 57.8s	remaining: 44.7s
1127:	learn: 0.3017646	total: 57.8s	remaining: 44.7s
1

1268:	learn: 0.2948763	total: 1m 5s	remaining: 37.4s
1269:	learn: 0.2948453	total: 1m 5s	remaining: 37.4s
1270:	learn: 0.2948243	total: 1m 5s	remaining: 37.3s
1271:	learn: 0.2948069	total: 1m 5s	remaining: 37.3s
1272:	learn: 0.2947216	total: 1m 5s	remaining: 37.2s
1273:	learn: 0.2946954	total: 1m 5s	remaining: 37.2s
1274:	learn: 0.2946567	total: 1m 5s	remaining: 37.1s
1275:	learn: 0.2946106	total: 1m 5s	remaining: 37.1s
1276:	learn: 0.2945857	total: 1m 5s	remaining: 37s
1277:	learn: 0.2945315	total: 1m 5s	remaining: 37s
1278:	learn: 0.2944695	total: 1m 5s	remaining: 36.9s
1279:	learn: 0.2944446	total: 1m 5s	remaining: 36.9s
1280:	learn: 0.2943967	total: 1m 5s	remaining: 36.8s
1281:	learn: 0.2943585	total: 1m 5s	remaining: 36.8s
1282:	learn: 0.2943387	total: 1m 5s	remaining: 36.7s
1283:	learn: 0.2942860	total: 1m 5s	remaining: 36.7s
1284:	learn: 0.2942203	total: 1m 5s	remaining: 36.6s
1285:	learn: 0.2941350	total: 1m 5s	remaining: 36.6s
1286:	learn: 0.2941145	total: 1m 5s	remaining: 36.

1426:	learn: 0.2868830	total: 1m 13s	remaining: 29.3s
1427:	learn: 0.2868483	total: 1m 13s	remaining: 29.3s
1428:	learn: 0.2868214	total: 1m 13s	remaining: 29.2s
1429:	learn: 0.2868000	total: 1m 13s	remaining: 29.2s
1430:	learn: 0.2867578	total: 1m 13s	remaining: 29.1s
1431:	learn: 0.2867192	total: 1m 13s	remaining: 29.1s
1432:	learn: 0.2866778	total: 1m 13s	remaining: 29s
1433:	learn: 0.2865816	total: 1m 13s	remaining: 29s
1434:	learn: 0.2865459	total: 1m 13s	remaining: 28.9s
1435:	learn: 0.2865108	total: 1m 13s	remaining: 28.9s
1436:	learn: 0.2864712	total: 1m 13s	remaining: 28.8s
1437:	learn: 0.2863944	total: 1m 13s	remaining: 28.8s
1438:	learn: 0.2863621	total: 1m 13s	remaining: 28.7s
1439:	learn: 0.2863240	total: 1m 13s	remaining: 28.7s
1440:	learn: 0.2862717	total: 1m 13s	remaining: 28.6s
1441:	learn: 0.2862157	total: 1m 13s	remaining: 28.6s
1442:	learn: 0.2861947	total: 1m 13s	remaining: 28.5s
1443:	learn: 0.2861268	total: 1m 13s	remaining: 28.5s
1444:	learn: 0.2860355	total: 1m

1581:	learn: 0.2797799	total: 1m 21s	remaining: 21.4s
1582:	learn: 0.2797128	total: 1m 21s	remaining: 21.4s
1583:	learn: 0.2796746	total: 1m 21s	remaining: 21.3s
1584:	learn: 0.2796061	total: 1m 21s	remaining: 21.3s
1585:	learn: 0.2795550	total: 1m 21s	remaining: 21.2s
1586:	learn: 0.2794605	total: 1m 21s	remaining: 21.2s
1587:	learn: 0.2794360	total: 1m 21s	remaining: 21.1s
1588:	learn: 0.2794116	total: 1m 21s	remaining: 21.1s
1589:	learn: 0.2793784	total: 1m 21s	remaining: 21s
1590:	learn: 0.2793330	total: 1m 21s	remaining: 21s
1591:	learn: 0.2792790	total: 1m 21s	remaining: 20.9s
1592:	learn: 0.2792652	total: 1m 21s	remaining: 20.9s
1593:	learn: 0.2792121	total: 1m 21s	remaining: 20.8s
1594:	learn: 0.2791972	total: 1m 21s	remaining: 20.7s
1595:	learn: 0.2791796	total: 1m 21s	remaining: 20.7s
1596:	learn: 0.2791522	total: 1m 21s	remaining: 20.6s
1597:	learn: 0.2791178	total: 1m 21s	remaining: 20.6s
1598:	learn: 0.2790531	total: 1m 21s	remaining: 20.5s
1599:	learn: 0.2790233	total: 1m

1737:	learn: 0.2729123	total: 1m 29s	remaining: 13.4s
1738:	learn: 0.2728713	total: 1m 29s	remaining: 13.4s
1739:	learn: 0.2728461	total: 1m 29s	remaining: 13.3s
1740:	learn: 0.2727812	total: 1m 29s	remaining: 13.3s
1741:	learn: 0.2727252	total: 1m 29s	remaining: 13.2s
1742:	learn: 0.2726337	total: 1m 29s	remaining: 13.2s
1743:	learn: 0.2725843	total: 1m 29s	remaining: 13.1s
1744:	learn: 0.2725364	total: 1m 29s	remaining: 13.1s
1745:	learn: 0.2724616	total: 1m 29s	remaining: 13s
1746:	learn: 0.2723923	total: 1m 29s	remaining: 13s
1747:	learn: 0.2723554	total: 1m 29s	remaining: 12.9s
1748:	learn: 0.2723415	total: 1m 29s	remaining: 12.9s
1749:	learn: 0.2723081	total: 1m 29s	remaining: 12.8s
1750:	learn: 0.2722090	total: 1m 29s	remaining: 12.8s
1751:	learn: 0.2721812	total: 1m 29s	remaining: 12.7s
1752:	learn: 0.2721328	total: 1m 29s	remaining: 12.7s
1753:	learn: 0.2721164	total: 1m 29s	remaining: 12.6s
1754:	learn: 0.2720923	total: 1m 29s	remaining: 12.6s
1755:	learn: 0.2719783	total: 1m

1891:	learn: 0.2665583	total: 1m 36s	remaining: 5.53s
1892:	learn: 0.2664862	total: 1m 36s	remaining: 5.48s
1893:	learn: 0.2664222	total: 1m 37s	remaining: 5.43s
1894:	learn: 0.2663942	total: 1m 37s	remaining: 5.38s
1895:	learn: 0.2663793	total: 1m 37s	remaining: 5.33s
1896:	learn: 0.2663617	total: 1m 37s	remaining: 5.28s
1897:	learn: 0.2663502	total: 1m 37s	remaining: 5.22s
1898:	learn: 0.2663311	total: 1m 37s	remaining: 5.17s
1899:	learn: 0.2663236	total: 1m 37s	remaining: 5.12s
1900:	learn: 0.2662920	total: 1m 37s	remaining: 5.07s
1901:	learn: 0.2662616	total: 1m 37s	remaining: 5.02s
1902:	learn: 0.2662300	total: 1m 37s	remaining: 4.97s
1903:	learn: 0.2661782	total: 1m 37s	remaining: 4.92s
1904:	learn: 0.2661413	total: 1m 37s	remaining: 4.87s
1905:	learn: 0.2661135	total: 1m 37s	remaining: 4.82s
1906:	learn: 0.2660992	total: 1m 37s	remaining: 4.76s
1907:	learn: 0.2660064	total: 1m 37s	remaining: 4.71s
1908:	learn: 0.2659960	total: 1m 37s	remaining: 4.66s
1909:	learn: 0.2659789	total

<catboost.core.CatBoostClassifier at 0x1857a44cee0>

In [12]:
proba = model.predict_proba(X_test.drop(columns=['ID','ID2'], axis=1))
y_test = pd.DataFrame(proba)
print(y_test.columns)
y_test.columns = le.inverse_transform(y_test.columns)

RangeIndex(start=0, stop=21, step=1)


# SUBMIT

In [13]:
print(y_test.columns)
answer_mass = []
for i in range(X_test.shape[0]):
  id = X_test['ID'].iloc[i]
  for c in y_test.columns:
    answer_mass.append([id + ' X ' + str(c), y_test[c].iloc[i]])
    

df_answer = pd.DataFrame(answer_mass)
df_answer.columns = ['ID X PCODE', 'Label']
for i in range(df_answer.shape[0]):
  if df_answer['ID X PCODE'].iloc[i] in true_values:
    df_answer['Label'].iloc[i] = 1.0

Index(['66FJ', '7POT', '8NN1', 'AHXO', 'BSTQ', 'ECY3', 'FM3X', 'GHYX', 'GYSR',
       'J9JW', 'JWFN', 'JZ9D', 'K6QO', 'LJR9', 'N2MW', 'P5DA', 'PYUQ', 'QBOL',
       'RIBP', 'RVSZ', 'SOP4'],
      dtype='object')


A value is trying to be set on a copy of a slice from a DataFrame

See the caveats in the documentation: https://pandas.pydata.org/pandas-docs/stable/user_guide/indexing.html#returning-a-view-versus-a-copy
  self._setitem_with_indexer(indexer, value)


In [14]:
df_answer.reset_index(drop=True, inplace=True)
df_answer.to_csv('submission.csv', index=False)