In [1]:
import pandas as pd
import numpy as np
import catboost as cb
from sklearn.model_selection import KFold
from itertools import product,chain
from sklearn.model_selection import GridSearchCV
from sklearn.model_selection import train_test_split

In [2]:
# read in the train and test data from csv files

train_set = pd.read_csv("train.csv", na_values='?')
test_set = pd.read_csv("test.csv", na_values='?')

In [3]:
train_set.head()

Unnamed: 0,Applicant_ID,form_field1,form_field2,form_field3,form_field4,form_field5,form_field6,form_field7,form_field8,form_field9,...,form_field42,form_field43,form_field44,form_field45,form_field46,form_field47,form_field48,form_field49,form_field50,default_status
0,Apcnt_1000000,3436.0,0.28505,1.656,0.0,0.0,0.0,10689720.0,252072.0,4272776.0,...,0.392854,2.02,0.711632,0.0,0.0,charge,,1.129518,0.044335,no
1,Apcnt_1000004,3456.0,0.674,0.2342,0.0,0.0,0.0,898979.0,497531.0,9073814.0,...,0.314281,8.08,0.183584,,0.0,charge,349.80573,1.620483,0.322436,no
2,Apcnt_1000008,3276.0,0.53845,3.151,0.0,6.282,,956940.0,,192944.0,...,0.162965,18.18,0.791136,0.0,0.0,charge,,1.51337,0.01164,yes
3,Apcnt_1000012,3372.0,0.17005,0.505,0.0,0.0,192166.0,3044703.0,385499.0,3986472.0,...,0.488884,2.02,0.685168,,0.0,charge,89.9401,0.664452,0.082729,no
4,Apcnt_1000016,3370.0,0.7727,1.101,0.0,0.0,1556.0,214728.0,214728.0,1284089.0,...,0.275,12.12,0.438168,0.0,0.0,charge,97.887502,1.427891,0.04563,no


In [4]:
test_set.head()

Unnamed: 0,Applicant_ID,form_field1,form_field2,form_field3,form_field4,form_field5,form_field6,form_field7,form_field8,form_field9,...,form_field41,form_field42,form_field43,form_field44,form_field45,form_field46,form_field47,form_field48,form_field49,form_field50
0,Apcnt_1000032,3236.0,0.34875,10.2006,0.0,0.0,418564.0,418564.0,418564.0,540710.0,...,,0.825,1.01,0.8,,0.0,charge,,0.0,0.011221
1,Apcnt_1000048,3284.0,1.2736,2.9606,9.0198,0.0,0.0,9858816.0,49014.0,1510098.0,...,18.8415,0.507694,4.04,0.623248,1.0,0.0,lending,,0.504974,0.043525
2,Apcnt_1000052,,0.27505,0.06,0.0,0.0,,,,,...,,,0.0,,,,charge,,0.0,
3,Apcnt_1000076,3232.0,0.28505,2.8032,0.0,0.0,0.0,473802.0,473802.0,1724437.0,...,,0.916663,2.02,0.464224,,,charge,90.163742,0.788809,0.104029
4,Apcnt_1000080,3466.0,2.09545,0.8318,2.5182,0.0,19839.0,1150662.0,1150662.0,7860523.0,...,,0.234047,23.23,0.726688,0.0,0.0,lending,1303.587148,1.637733,0.163124


In [5]:
# convert categorical columns to integers
category_cols = ['form_field47', 'default_status']
for column in category_cols:
    train_set[column] = train_set[column].astype('category').cat.codes
    test_set['form_field47'] = test_set['form_field47'].astype('category').cat.codes

In [6]:
# split labels out of data sets    
train_label = train_set['default_status']
train_set = train_set.drop(['Applicant_ID','default_status'], axis=1) # remove labels
test_set = test_set.drop('Applicant_ID', axis=1) # dropping the first column because it has unique elements for all entries

In [7]:
x_train, x_test, y_train, y_test = train_test_split(train_set, train_label, test_size = 0.25, random_state = 1)

In [8]:
# train default classifier    
clf = cb.CatBoostClassifier(border_count = 100, l2_leaf_reg = 3)
cat_dims = [train_set.columns.get_loc(i) for i in category_cols[:-1]] 
clf.fit(x_train, np.ravel(y_train), cat_features=cat_dims)

0:	learn: 0.6735323	total: 109ms	remaining: 1m 49s
1:	learn: 0.6558289	total: 154ms	remaining: 1m 16s
2:	learn: 0.6387877	total: 203ms	remaining: 1m 7s
3:	learn: 0.6234207	total: 244ms	remaining: 1m
4:	learn: 0.6096106	total: 300ms	remaining: 59.7s
5:	learn: 0.5969843	total: 370ms	remaining: 1m 1s
6:	learn: 0.5860791	total: 448ms	remaining: 1m 3s
7:	learn: 0.5753869	total: 581ms	remaining: 1m 12s
8:	learn: 0.5654606	total: 665ms	remaining: 1m 13s
9:	learn: 0.5562108	total: 778ms	remaining: 1m 17s
10:	learn: 0.5479693	total: 913ms	remaining: 1m 22s
11:	learn: 0.5408161	total: 968ms	remaining: 1m 19s
12:	learn: 0.5326684	total: 1.03s	remaining: 1m 18s
13:	learn: 0.5255191	total: 1.07s	remaining: 1m 15s
14:	learn: 0.5186427	total: 1.11s	remaining: 1m 12s
15:	learn: 0.5124331	total: 1.14s	remaining: 1m 10s
16:	learn: 0.5072220	total: 1.17s	remaining: 1m 7s
17:	learn: 0.5018181	total: 1.21s	remaining: 1m 5s
18:	learn: 0.4971167	total: 1.24s	remaining: 1m 4s
19:	learn: 0.4925357	total: 1.28s

162:	learn: 0.4030463	total: 7.46s	remaining: 38.3s
163:	learn: 0.4029425	total: 7.53s	remaining: 38.4s
164:	learn: 0.4028242	total: 7.59s	remaining: 38.4s
165:	learn: 0.4027457	total: 7.64s	remaining: 38.4s
166:	learn: 0.4026697	total: 7.69s	remaining: 38.4s
167:	learn: 0.4025946	total: 7.74s	remaining: 38.3s
168:	learn: 0.4024947	total: 7.77s	remaining: 38.2s
169:	learn: 0.4024121	total: 7.8s	remaining: 38.1s
170:	learn: 0.4023210	total: 7.84s	remaining: 38s
171:	learn: 0.4022144	total: 7.89s	remaining: 38s
172:	learn: 0.4021404	total: 7.93s	remaining: 37.9s
173:	learn: 0.4020478	total: 7.97s	remaining: 37.8s
174:	learn: 0.4019260	total: 8s	remaining: 37.7s
175:	learn: 0.4018369	total: 8.03s	remaining: 37.6s
176:	learn: 0.4017676	total: 8.07s	remaining: 37.5s
177:	learn: 0.4016977	total: 8.11s	remaining: 37.4s
178:	learn: 0.4016386	total: 8.14s	remaining: 37.3s
179:	learn: 0.4015770	total: 8.17s	remaining: 37.2s
180:	learn: 0.4014869	total: 8.2s	remaining: 37.1s
181:	learn: 0.4013991

321:	learn: 0.3916528	total: 13.9s	remaining: 29.3s
322:	learn: 0.3916008	total: 13.9s	remaining: 29.2s
323:	learn: 0.3915483	total: 14s	remaining: 29.1s
324:	learn: 0.3915044	total: 14s	remaining: 29.1s
325:	learn: 0.3914313	total: 14s	remaining: 29s
326:	learn: 0.3913785	total: 14.1s	remaining: 29s
327:	learn: 0.3913280	total: 14.1s	remaining: 28.9s
328:	learn: 0.3912774	total: 14.1s	remaining: 28.9s
329:	learn: 0.3912067	total: 14.2s	remaining: 28.8s
330:	learn: 0.3911382	total: 14.2s	remaining: 28.7s
331:	learn: 0.3910757	total: 14.2s	remaining: 28.6s
332:	learn: 0.3910047	total: 14.3s	remaining: 28.6s
333:	learn: 0.3909587	total: 14.3s	remaining: 28.5s
334:	learn: 0.3908970	total: 14.3s	remaining: 28.5s
335:	learn: 0.3908585	total: 14.4s	remaining: 28.4s
336:	learn: 0.3907997	total: 14.4s	remaining: 28.3s
337:	learn: 0.3907166	total: 14.4s	remaining: 28.2s
338:	learn: 0.3906356	total: 14.5s	remaining: 28.2s
339:	learn: 0.3906054	total: 14.5s	remaining: 28.1s
340:	learn: 0.3905369	

484:	learn: 0.3806640	total: 20.1s	remaining: 21.4s
485:	learn: 0.3805867	total: 20.2s	remaining: 21.3s
486:	learn: 0.3805216	total: 20.2s	remaining: 21.3s
487:	learn: 0.3804368	total: 20.3s	remaining: 21.2s
488:	learn: 0.3803597	total: 20.3s	remaining: 21.2s
489:	learn: 0.3802944	total: 20.3s	remaining: 21.1s
490:	learn: 0.3802243	total: 20.3s	remaining: 21.1s
491:	learn: 0.3801872	total: 20.4s	remaining: 21s
492:	learn: 0.3801109	total: 20.4s	remaining: 21s
493:	learn: 0.3800470	total: 20.4s	remaining: 20.9s
494:	learn: 0.3799544	total: 20.5s	remaining: 20.9s
495:	learn: 0.3799091	total: 20.5s	remaining: 20.8s
496:	learn: 0.3798650	total: 20.6s	remaining: 20.8s
497:	learn: 0.3798034	total: 20.6s	remaining: 20.8s
498:	learn: 0.3797287	total: 20.6s	remaining: 20.7s
499:	learn: 0.3796584	total: 20.7s	remaining: 20.7s
500:	learn: 0.3796025	total: 20.7s	remaining: 20.6s
501:	learn: 0.3795202	total: 20.7s	remaining: 20.6s
502:	learn: 0.3794624	total: 20.8s	remaining: 20.5s
503:	learn: 0.37

645:	learn: 0.3704674	total: 26.1s	remaining: 14.3s
646:	learn: 0.3704100	total: 26.1s	remaining: 14.2s
647:	learn: 0.3703662	total: 26.1s	remaining: 14.2s
648:	learn: 0.3702855	total: 26.2s	remaining: 14.2s
649:	learn: 0.3702123	total: 26.2s	remaining: 14.1s
650:	learn: 0.3701494	total: 26.3s	remaining: 14.1s
651:	learn: 0.3700829	total: 26.3s	remaining: 14s
652:	learn: 0.3700320	total: 26.3s	remaining: 14s
653:	learn: 0.3699853	total: 26.4s	remaining: 13.9s
654:	learn: 0.3699310	total: 26.4s	remaining: 13.9s
655:	learn: 0.3699263	total: 26.4s	remaining: 13.9s
656:	learn: 0.3698553	total: 26.5s	remaining: 13.8s
657:	learn: 0.3697871	total: 26.5s	remaining: 13.8s
658:	learn: 0.3697274	total: 26.5s	remaining: 13.7s
659:	learn: 0.3696426	total: 26.6s	remaining: 13.7s
660:	learn: 0.3695800	total: 26.6s	remaining: 13.6s
661:	learn: 0.3695040	total: 26.7s	remaining: 13.6s
662:	learn: 0.3694336	total: 26.7s	remaining: 13.6s
663:	learn: 0.3693765	total: 26.7s	remaining: 13.5s
664:	learn: 0.36

804:	learn: 0.3618648	total: 32.3s	remaining: 7.83s
805:	learn: 0.3618256	total: 32.4s	remaining: 7.79s
806:	learn: 0.3617598	total: 32.4s	remaining: 7.75s
807:	learn: 0.3616904	total: 32.5s	remaining: 7.71s
808:	learn: 0.3616158	total: 32.5s	remaining: 7.67s
809:	learn: 0.3615454	total: 32.5s	remaining: 7.63s
810:	learn: 0.3614929	total: 32.6s	remaining: 7.6s
811:	learn: 0.3614452	total: 32.6s	remaining: 7.55s
812:	learn: 0.3613998	total: 32.7s	remaining: 7.51s
813:	learn: 0.3613659	total: 32.7s	remaining: 7.47s
814:	learn: 0.3613001	total: 32.7s	remaining: 7.43s
815:	learn: 0.3612256	total: 32.8s	remaining: 7.39s
816:	learn: 0.3611743	total: 32.8s	remaining: 7.35s
817:	learn: 0.3611018	total: 32.9s	remaining: 7.31s
818:	learn: 0.3610486	total: 32.9s	remaining: 7.27s
819:	learn: 0.3609885	total: 33s	remaining: 7.23s
820:	learn: 0.3609360	total: 33s	remaining: 7.2s
821:	learn: 0.3608805	total: 33.1s	remaining: 7.16s
822:	learn: 0.3608346	total: 33.1s	remaining: 7.12s
823:	learn: 0.3607

964:	learn: 0.3533428	total: 38.9s	remaining: 1.41s
965:	learn: 0.3533225	total: 39s	remaining: 1.37s
966:	learn: 0.3532736	total: 39s	remaining: 1.33s
967:	learn: 0.3532375	total: 39s	remaining: 1.29s
968:	learn: 0.3531911	total: 39.1s	remaining: 1.25s
969:	learn: 0.3531371	total: 39.2s	remaining: 1.21s
970:	learn: 0.3530895	total: 39.2s	remaining: 1.17s
971:	learn: 0.3530364	total: 39.2s	remaining: 1.13s
972:	learn: 0.3529896	total: 39.3s	remaining: 1.09s
973:	learn: 0.3529383	total: 39.3s	remaining: 1.05s
974:	learn: 0.3528904	total: 39.3s	remaining: 1.01s
975:	learn: 0.3528387	total: 39.4s	remaining: 968ms
976:	learn: 0.3527780	total: 39.4s	remaining: 928ms
977:	learn: 0.3527257	total: 39.5s	remaining: 888ms
978:	learn: 0.3526810	total: 39.5s	remaining: 847ms
979:	learn: 0.3526180	total: 39.5s	remaining: 807ms
980:	learn: 0.3525828	total: 39.6s	remaining: 767ms
981:	learn: 0.3525701	total: 39.6s	remaining: 726ms
982:	learn: 0.3525076	total: 39.7s	remaining: 686ms
983:	learn: 0.3524

<catboost.core.CatBoostClassifier at 0x1bf54c5f088>

In [9]:
y_pred = clf.predict(x_test)

In [10]:
from sklearn.metrics import roc_auc_score
score = roc_auc_score(y_test, y_pred)
score

0.6846769416891729

In [11]:
clf.fit(train_set, np.ravel(train_label), cat_features=cat_dims)

0:	learn: 0.6739879	total: 43.5ms	remaining: 43.5s
1:	learn: 0.6559009	total: 95.3ms	remaining: 47.5s
2:	learn: 0.6395615	total: 146ms	remaining: 48.5s
3:	learn: 0.6243980	total: 202ms	remaining: 50.3s
4:	learn: 0.6102525	total: 252ms	remaining: 50.2s
5:	learn: 0.5981481	total: 338ms	remaining: 56s
6:	learn: 0.5865613	total: 403ms	remaining: 57.2s
7:	learn: 0.5750874	total: 462ms	remaining: 57.3s
8:	learn: 0.5645579	total: 528ms	remaining: 58.2s
9:	learn: 0.5550924	total: 589ms	remaining: 58.4s
10:	learn: 0.5463772	total: 636ms	remaining: 57.2s
11:	learn: 0.5385079	total: 684ms	remaining: 56.3s
12:	learn: 0.5312143	total: 754ms	remaining: 57.3s
13:	learn: 0.5240729	total: 814ms	remaining: 57.3s
14:	learn: 0.5176862	total: 857ms	remaining: 56.3s
15:	learn: 0.5118748	total: 905ms	remaining: 55.6s
16:	learn: 0.5068379	total: 970ms	remaining: 56.1s
17:	learn: 0.5015487	total: 1.03s	remaining: 56.1s
18:	learn: 0.4964213	total: 1.07s	remaining: 55.2s
19:	learn: 0.4919357	total: 1.11s	remaini

165:	learn: 0.4052794	total: 9.1s	remaining: 45.7s
166:	learn: 0.4052042	total: 9.15s	remaining: 45.6s
167:	learn: 0.4051097	total: 9.2s	remaining: 45.6s
168:	learn: 0.4050366	total: 9.25s	remaining: 45.5s
169:	learn: 0.4049526	total: 9.31s	remaining: 45.4s
170:	learn: 0.4048602	total: 9.36s	remaining: 45.4s
171:	learn: 0.4047776	total: 9.4s	remaining: 45.3s
172:	learn: 0.4046790	total: 9.45s	remaining: 45.2s
173:	learn: 0.4045880	total: 9.49s	remaining: 45.1s
174:	learn: 0.4045300	total: 9.54s	remaining: 45s
175:	learn: 0.4044679	total: 9.59s	remaining: 44.9s
176:	learn: 0.4043719	total: 9.63s	remaining: 44.8s
177:	learn: 0.4043286	total: 9.68s	remaining: 44.7s
178:	learn: 0.4042444	total: 9.76s	remaining: 44.8s
179:	learn: 0.4041740	total: 9.81s	remaining: 44.7s
180:	learn: 0.4040773	total: 9.85s	remaining: 44.6s
181:	learn: 0.4040132	total: 9.89s	remaining: 44.4s
182:	learn: 0.4039213	total: 9.94s	remaining: 44.4s
183:	learn: 0.4038385	total: 9.98s	remaining: 44.3s
184:	learn: 0.403

325:	learn: 0.3954596	total: 16.8s	remaining: 34.7s
326:	learn: 0.3954341	total: 16.9s	remaining: 34.7s
327:	learn: 0.3954035	total: 16.9s	remaining: 34.6s
328:	learn: 0.3953914	total: 16.9s	remaining: 34.5s
329:	learn: 0.3953332	total: 17s	remaining: 34.5s
330:	learn: 0.3952959	total: 17s	remaining: 34.4s
331:	learn: 0.3952511	total: 17.1s	remaining: 34.3s
332:	learn: 0.3951793	total: 17.1s	remaining: 34.2s
333:	learn: 0.3951291	total: 17.1s	remaining: 34.2s
334:	learn: 0.3950843	total: 17.2s	remaining: 34.1s
335:	learn: 0.3950339	total: 17.2s	remaining: 34s
336:	learn: 0.3949753	total: 17.3s	remaining: 34s
337:	learn: 0.3949260	total: 17.3s	remaining: 33.9s
338:	learn: 0.3948757	total: 17.4s	remaining: 33.8s
339:	learn: 0.3948125	total: 17.4s	remaining: 33.8s
340:	learn: 0.3947723	total: 17.5s	remaining: 33.8s
341:	learn: 0.3947350	total: 17.5s	remaining: 33.7s
342:	learn: 0.3946763	total: 17.6s	remaining: 33.7s
343:	learn: 0.3946223	total: 17.7s	remaining: 33.7s
344:	learn: 0.394548

486:	learn: 0.3865523	total: 24.8s	remaining: 26.1s
487:	learn: 0.3864897	total: 24.8s	remaining: 26.1s
488:	learn: 0.3864371	total: 24.9s	remaining: 26s
489:	learn: 0.3863707	total: 24.9s	remaining: 25.9s
490:	learn: 0.3863336	total: 25s	remaining: 25.9s
491:	learn: 0.3862874	total: 25s	remaining: 25.8s
492:	learn: 0.3862268	total: 25s	remaining: 25.7s
493:	learn: 0.3861774	total: 25.1s	remaining: 25.7s
494:	learn: 0.3861201	total: 25.1s	remaining: 25.6s
495:	learn: 0.3860710	total: 25.1s	remaining: 25.6s
496:	learn: 0.3860180	total: 25.2s	remaining: 25.5s
497:	learn: 0.3859535	total: 25.2s	remaining: 25.4s
498:	learn: 0.3859003	total: 25.3s	remaining: 25.4s
499:	learn: 0.3858461	total: 25.3s	remaining: 25.3s
500:	learn: 0.3857913	total: 25.3s	remaining: 25.2s
501:	learn: 0.3857379	total: 25.4s	remaining: 25.2s
502:	learn: 0.3856728	total: 25.4s	remaining: 25.1s
503:	learn: 0.3856334	total: 25.5s	remaining: 25.1s
504:	learn: 0.3855943	total: 25.5s	remaining: 25s
505:	learn: 0.3855316	

647:	learn: 0.3781345	total: 31.5s	remaining: 17.1s
648:	learn: 0.3780848	total: 31.5s	remaining: 17s
649:	learn: 0.3780323	total: 31.5s	remaining: 17s
650:	learn: 0.3779842	total: 31.6s	remaining: 16.9s
651:	learn: 0.3779364	total: 31.6s	remaining: 16.9s
652:	learn: 0.3778743	total: 31.7s	remaining: 16.8s
653:	learn: 0.3778114	total: 31.7s	remaining: 16.8s
654:	learn: 0.3777499	total: 31.7s	remaining: 16.7s
655:	learn: 0.3777023	total: 31.8s	remaining: 16.7s
656:	learn: 0.3776549	total: 31.8s	remaining: 16.6s
657:	learn: 0.3776124	total: 31.9s	remaining: 16.6s
658:	learn: 0.3775755	total: 31.9s	remaining: 16.5s
659:	learn: 0.3775293	total: 31.9s	remaining: 16.5s
660:	learn: 0.3774732	total: 32s	remaining: 16.4s
661:	learn: 0.3774173	total: 32.1s	remaining: 16.4s
662:	learn: 0.3773900	total: 32.1s	remaining: 16.3s
663:	learn: 0.3773416	total: 32.2s	remaining: 16.3s
664:	learn: 0.3772836	total: 32.2s	remaining: 16.2s
665:	learn: 0.3772452	total: 32.2s	remaining: 16.2s
666:	learn: 0.3771

807:	learn: 0.3711950	total: 38.8s	remaining: 9.21s
808:	learn: 0.3711623	total: 38.8s	remaining: 9.16s
809:	learn: 0.3711241	total: 38.9s	remaining: 9.12s
810:	learn: 0.3710643	total: 38.9s	remaining: 9.07s
811:	learn: 0.3710627	total: 38.9s	remaining: 9.02s
812:	learn: 0.3710190	total: 39s	remaining: 8.97s
813:	learn: 0.3709879	total: 39s	remaining: 8.92s
814:	learn: 0.3709473	total: 39.1s	remaining: 8.87s
815:	learn: 0.3709180	total: 39.1s	remaining: 8.83s
816:	learn: 0.3708649	total: 39.2s	remaining: 8.78s
817:	learn: 0.3708051	total: 39.3s	remaining: 8.73s
818:	learn: 0.3707593	total: 39.3s	remaining: 8.68s
819:	learn: 0.3707053	total: 39.3s	remaining: 8.63s
820:	learn: 0.3706769	total: 39.4s	remaining: 8.59s
821:	learn: 0.3706280	total: 39.5s	remaining: 8.55s
822:	learn: 0.3705925	total: 39.5s	remaining: 8.49s
823:	learn: 0.3705353	total: 39.5s	remaining: 8.45s
824:	learn: 0.3704939	total: 39.6s	remaining: 8.39s
825:	learn: 0.3704325	total: 39.6s	remaining: 8.35s
826:	learn: 0.37

966:	learn: 0.3645552	total: 46.1s	remaining: 1.57s
967:	learn: 0.3645124	total: 46.2s	remaining: 1.53s
968:	learn: 0.3644528	total: 46.3s	remaining: 1.48s
969:	learn: 0.3644049	total: 46.3s	remaining: 1.43s
970:	learn: 0.3643670	total: 46.4s	remaining: 1.39s
971:	learn: 0.3643501	total: 46.5s	remaining: 1.34s
972:	learn: 0.3643060	total: 46.6s	remaining: 1.29s
973:	learn: 0.3642662	total: 46.6s	remaining: 1.24s
974:	learn: 0.3642275	total: 46.6s	remaining: 1.2s
975:	learn: 0.3641831	total: 46.7s	remaining: 1.15s
976:	learn: 0.3641802	total: 46.7s	remaining: 1.1s
977:	learn: 0.3641368	total: 46.8s	remaining: 1.05s
978:	learn: 0.3640840	total: 46.8s	remaining: 1s
979:	learn: 0.3640411	total: 46.9s	remaining: 957ms
980:	learn: 0.3640040	total: 46.9s	remaining: 909ms
981:	learn: 0.3639695	total: 47s	remaining: 861ms
982:	learn: 0.3639229	total: 47s	remaining: 813ms
983:	learn: 0.3638761	total: 47.1s	remaining: 765ms
984:	learn: 0.3638283	total: 47.1s	remaining: 717ms
985:	learn: 0.3637918

<catboost.core.CatBoostClassifier at 0x1bf54c5f088>

In [12]:
pred = clf.predict_proba(test_set)[:,1]

In [13]:
test = pd.read_csv("test.csv")
prediction = pd.DataFrame({'Applicant_ID': test['Applicant_ID'], 'default_status': pred})
prediction.head()

Unnamed: 0,Applicant_ID,default_status
0,Apcnt_1000032,0.314407
1,Apcnt_1000048,0.34174
2,Apcnt_1000052,0.41996
3,Apcnt_1000076,0.759467
4,Apcnt_1000080,0.162663


In [14]:
prediction.to_csv('Submit49.csv', index = False)