In [1]:
#import libraries
import pandas as pd
import numpy as np
import matplotlib.pyplot as plt
import seaborn as sns
%matplotlib inline

## Import Datasets
 ### Train and Test Data

In [2]:
train=pd.read_csv('../input/amexpert/train.csv')
train.info()

<class 'pandas.core.frame.DataFrame'>
RangeIndex: 78369 entries, 0 to 78368
Data columns (total 5 columns):
id                   78369 non-null int64
campaign_id          78369 non-null int64
coupon_id            78369 non-null int64
customer_id          78369 non-null int64
redemption_status    78369 non-null int64
dtypes: int64(5)
memory usage: 3.0 MB


In [3]:
test=pd.read_csv('../input/amexperttest/test_QyjYwdj.csv')
test.info()

<class 'pandas.core.frame.DataFrame'>
RangeIndex: 50226 entries, 0 to 50225
Data columns (total 4 columns):
id             50226 non-null int64
campaign_id    50226 non-null int64
coupon_id      50226 non-null int64
customer_id    50226 non-null int64
dtypes: int64(4)
memory usage: 1.5 MB


In [4]:
train.head()

Unnamed: 0,id,campaign_id,coupon_id,customer_id,redemption_status
0,1,13,27,1053,0
1,2,13,116,48,0
2,6,9,635,205,0
3,7,13,644,1050,0
4,9,8,1017,1489,0


In [5]:
test.head()

Unnamed: 0,id,campaign_id,coupon_id,customer_id
0,3,22,869,967
1,4,20,389,1566
2,5,22,981,510
3,8,25,1069,361
4,10,17,498,811


* Train and Test have IDs to other tables' data (like foreign keys); so we need to merge with ohter tables
* Target variable is redemption_status.

### Campaign Data

In [6]:
campaign_data=pd.read_csv('../input/amexpert/campaign_data.csv')
campaign_data.info()

<class 'pandas.core.frame.DataFrame'>
RangeIndex: 28 entries, 0 to 27
Data columns (total 4 columns):
campaign_id      28 non-null int64
campaign_type    28 non-null object
start_date       28 non-null object
end_date         28 non-null object
dtypes: int64(1), object(3)
memory usage: 1.0+ KB


In [7]:
campaign_data.head()

Unnamed: 0,campaign_id,campaign_type,start_date,end_date
0,24,Y,21/10/13,20/12/13
1,25,Y,21/10/13,22/11/13
2,20,Y,07/09/13,16/11/13
3,23,Y,08/10/13,15/11/13
4,21,Y,16/09/13,18/10/13


In [8]:
#Converting start_date,end_date to datetime type
campaign_data['start_date']=pd.to_datetime(campaign_data['start_date'])
campaign_data['end_date']=pd.to_datetime(campaign_data['end_date'])

In [9]:
#replacing two values of campaign_type column with 0 and 1.
campaign_data['campaign_type'].replace(to_replace=['X','Y'],value=[0,1],inplace=True)

In [10]:
campaign_data.info()

<class 'pandas.core.frame.DataFrame'>
RangeIndex: 28 entries, 0 to 27
Data columns (total 4 columns):
campaign_id      28 non-null int64
campaign_type    28 non-null int64
start_date       28 non-null datetime64[ns]
end_date         28 non-null datetime64[ns]
dtypes: datetime64[ns](2), int64(2)
memory usage: 1.0 KB


### Coupon Item Mapping

In [11]:
coupon_item_mapping=pd.read_csv('../input/amexpert/coupon_item_mapping.csv')
coupon_item_mapping.info()

<class 'pandas.core.frame.DataFrame'>
RangeIndex: 92663 entries, 0 to 92662
Data columns (total 2 columns):
coupon_id    92663 non-null int64
item_id      92663 non-null int64
dtypes: int64(2)
memory usage: 1.4 MB


In [12]:
coupon_item_mapping.head()

Unnamed: 0,coupon_id,item_id
0,105,37
1,107,75
2,494,76
3,522,77
4,518,77


### Customer Demographics Data

In [13]:
customer_demographics=pd.read_csv('../input/amexpert/customer_demographics.csv')
customer_demographics.info()

<class 'pandas.core.frame.DataFrame'>
RangeIndex: 760 entries, 0 to 759
Data columns (total 7 columns):
customer_id       760 non-null int64
age_range         760 non-null object
marital_status    431 non-null object
rented            760 non-null int64
family_size       760 non-null object
no_of_children    222 non-null object
income_bracket    760 non-null int64
dtypes: int64(3), object(4)
memory usage: 41.7+ KB


In [14]:
customer_demographics.head()

Unnamed: 0,customer_id,age_range,marital_status,rented,family_size,no_of_children,income_bracket
0,1,70+,Married,0,2,,4
1,6,46-55,Married,0,2,,5
2,7,26-35,,0,3,1.0,3
3,8,26-35,,0,4,2.0,6
4,10,46-55,Single,0,1,,5


In [15]:
customer_demographics.no_of_children.value_counts()

1     107
3+     60
2      55
Name: no_of_children, dtype: int64

All the existing values in this columns are 1,2,3+.

In [16]:
#the missing values are probably those with no_of_children=0
customer_demographics.no_of_children.fillna(0,inplace=True)
#replacing value 3+ with 3 to convert no_of_children column with int type. 
customer_demographics['no_of_children'].replace(to_replace='3+',value=3,inplace=True)
customer_demographics['no_of_children']=customer_demographics.no_of_children.astype(int)

In [17]:
#replacing value 5+ with 5 to convert family_size column with int type. 
customer_demographics['family_size'].replace(to_replace='5+',value=5,inplace=True)
customer_demographics['family_size']=customer_demographics.family_size.astype(int)

In [18]:
customer_demographics.marital_status.value_counts(dropna=False)

NaN        329
Married    317
Single     114
Name: marital_status, dtype: int64

* There are 329 NaN values, we can't ignore it so putting it in a whole new category i.e 0.
* Replacing Married and Single with 1 and 2 respectively.

In [19]:
customer_demographics.marital_status.fillna(0,inplace=True)
customer_demographics.replace(to_replace=['Married','Single'],value=[1,2],inplace=True)
#customer_demographics.groupby('marital_status')['age_range'].value_counts(normalize=True)

In [20]:
#Label Encoding on column age_range of customer_demographics
from sklearn.preprocessing import LabelEncoder
le=LabelEncoder()
customer_demographics['age_range']=le.fit_transform(customer_demographics['age_range'])
customer_demographics.age_range.value_counts(dropna=False)

3    271
2    187
1    130
5     68
4     59
0     45
Name: age_range, dtype: int64

In [21]:
customer_demographics.info()

<class 'pandas.core.frame.DataFrame'>
RangeIndex: 760 entries, 0 to 759
Data columns (total 7 columns):
customer_id       760 non-null int64
age_range         760 non-null int64
marital_status    760 non-null int64
rented            760 non-null int64
family_size       760 non-null int64
no_of_children    760 non-null int64
income_bracket    760 non-null int64
dtypes: int64(7)
memory usage: 41.7 KB


### Customer Transaction Data

In [23]:
customer_transaction_data=pd.read_csv('../input/amexpert/customer_transaction_data.csv')
customer_transaction_data.info()

<class 'pandas.core.frame.DataFrame'>
RangeIndex: 1324566 entries, 0 to 1324565
Data columns (total 7 columns):
date               1324566 non-null object
customer_id        1324566 non-null int64
item_id            1324566 non-null int64
quantity           1324566 non-null int64
selling_price      1324566 non-null float64
other_discount     1324566 non-null float64
coupon_discount    1324566 non-null float64
dtypes: float64(3), int64(3), object(1)
memory usage: 70.7+ MB


In [25]:
customer_transaction_data.head()

Unnamed: 0,date,customer_id,item_id,quantity,selling_price,other_discount,coupon_discount
0,2012-01-02,1501,26830,1,35.26,-10.69,0.0
1,2012-01-02,1501,54253,1,53.43,-13.89,0.0
2,2012-01-02,1501,31962,1,106.5,-14.25,0.0
3,2012-01-02,1501,33647,1,67.32,0.0,0.0
4,2012-01-02,1501,48199,1,71.24,-28.14,0.0


### Item Data

In [26]:
item_data=pd.read_csv('../input/amexpert/item_data.csv')
item_data.info()

<class 'pandas.core.frame.DataFrame'>
RangeIndex: 74066 entries, 0 to 74065
Data columns (total 4 columns):
item_id       74066 non-null int64
brand         74066 non-null int64
brand_type    74066 non-null object
category      74066 non-null object
dtypes: int64(2), object(2)
memory usage: 2.3+ MB


In [27]:
#Replacing 'Established' and 'Local' from column brand_type of item_data with 0,1 respectively. 
item_data['brand_type'].replace(to_replace=['Established','Local'],value=[0,1],inplace=True)

In [28]:
#Label Encoding on column category of item_data
from sklearn.preprocessing import LabelEncoder
le=LabelEncoder()
item_data['category']=le.fit_transform(item_data['category'])

### Concatinating train and test dataset

In [29]:
data=pd.concat([train,test])

of pandas will change to not sort by default.

To accept the future behavior, pass 'sort=False'.


  """Entry point for launching an IPython kernel.


In [30]:
data.shape

(128595, 5)

## Merging Datasets
Primary keys for each table:

* Train: {id}
* Campaign: {campaign_id}
* Customer Demographics: {customer_id}
* Item: {item_id}
* Coupon Item: {coupon_id, item_id}
* Customer Transaction: {customer_id, item_id}

In [31]:
merge=pd.merge(customer_transaction_data,item_data,on='item_id',how='left')

In [32]:
merge.customer_id.value_counts()

1555    4522
711     4173
464     4134
1475    3945
1011    3862
        ... 
1150      87
406       83
1180      83
907       80
1326      79
Name: customer_id, Length: 1582, dtype: int64

* Occurence of each customer_id in merge is more than 1.
* Before using customer_id to merge two datasets, we have to apply aggregate function on it.

In [33]:
merge=merge.groupby('customer_id').mean().reset_index()

In [34]:
merge=pd.merge(data,merge,on='customer_id',how='left')

In [35]:
merge=pd.merge(merge,campaign_data,on='campaign_id',how='left')

In [36]:
merge=pd.merge(merge,customer_demographics,on='customer_id',how='left')

In [37]:
merge.columns

Index(['campaign_id', 'coupon_id', 'customer_id', 'id', 'redemption_status',
       'item_id', 'quantity', 'selling_price', 'other_discount',
       'coupon_discount', 'brand', 'brand_type', 'category', 'campaign_type',
       'start_date', 'end_date', 'age_range', 'marital_status', 'rented',
       'family_size', 'no_of_children', 'income_bracket'],
      dtype='object')

In [39]:
#filtering merge with required columns
merge=merge[['id','quantity', 'selling_price', 'other_discount',
       'coupon_discount', 'brand', 'brand_type', 'category', 'campaign_type',
       'start_date', 'end_date', 'age_range', 'marital_status', 'rented',
       'family_size', 'no_of_children', 'income_bracket','redemption_status']]

In [40]:
merge.head()

Unnamed: 0,id,quantity,selling_price,other_discount,coupon_discount,brand,brand_type,category,campaign_type,start_date,end_date,age_range,marital_status,rented,family_size,no_of_children,income_bracket,redemption_status
0,1,340.487097,184.260484,-33.168935,-0.287258,674.590323,0.464516,6.880645,0,2013-05-19,2013-05-07,3.0,0.0,0.0,1.0,0.0,5.0,0.0
1,2,31.54026,234.247013,-27.699169,-3.215039,758.511688,0.244156,7.32987,0,2013-05-19,2013-05-07,2.0,1.0,0.0,2.0,0.0,3.0,0.0
2,6,1.392784,121.094495,-17.79566,-2.212082,1059.807216,0.185567,6.84433,1,2013-11-03,2013-12-04,3.0,1.0,0.0,2.0,0.0,7.0,0.0
3,7,1.291139,98.276034,-16.65557,-0.751477,762.063291,0.468354,6.616034,0,2013-05-19,2013-05-07,,,,,,,0.0
4,9,247.44306,120.636103,-20.524733,-0.471548,987.588968,0.286477,6.86121,0,2013-02-16,2013-05-04,3.0,1.0,0.0,2.0,0.0,3.0,0.0


In [41]:
merge.isnull().sum()

id                       0
quantity                 0
selling_price            0
other_discount           0
coupon_discount          0
brand                    0
brand_type               0
category                 0
campaign_type            0
start_date               0
end_date                 0
age_range            53995
marital_status       53995
rented               53995
family_size          53995
no_of_children       53995
income_bracket       53995
redemption_status    50226
dtype: int64

In [42]:
merge.fillna(merge.mean(),inplace=True)

In [43]:
#spliting data back to train and test datasets.
train_data=merge[merge.id.isin(train.id)]
test_data=merge[merge.id.isin(test.id)]

In [44]:
train_data.redemption_status.value_counts()

0.0    77640
1.0      729
Name: redemption_status, dtype: int64

In [45]:
test_data=test_data.drop('redemption_status',axis=1)

In [46]:
print(train_data.shape)
print(test_data.shape)

(78369, 18)
(50226, 17)


In [47]:
train_data.isnull().sum().sum()

0

In [48]:
test_data.isnull().sum().sum()

0

In [49]:
train_data.describe()

Unnamed: 0,id,quantity,selling_price,other_discount,coupon_discount,brand,brand_type,category,campaign_type,age_range,marital_status,rented,family_size,no_of_children,income_bracket,redemption_status
count,78369.0,78369.0,78369.0,78369.0,78369.0,78369.0,78369.0,78369.0,78369.0,78369.0,78369.0,78369.0,78369.0,78369.0,78369.0,78369.0
mean,64347.975449,128.055506,116.003809,-18.273669,-0.624218,891.483584,0.266464,6.915662,0.263944,2.425186,0.731565,0.05697,2.23725,0.586452,4.820663,0.009302
std,37126.440855,216.197239,31.576526,8.666128,1.104607,191.156271,0.111135,0.39264,0.440772,0.928254,0.526495,0.173582,0.902628,0.727775,1.749698,0.095999
min,1.0,1.042453,47.73919,-191.188099,-13.629081,396.273735,0.013774,5.006315,0.0,0.0,0.0,0.0,1.0,0.0,1.0,0.0
25%,32260.0,1.403756,95.877243,-20.866231,-0.705638,764.284038,0.185567,6.670429,0.0,2.0,0.730523,0.0,2.0,0.0,4.0,0.0
50%,64318.0,39.75539,111.057371,-16.724106,-0.242455,874.473885,0.250522,6.883721,0.0,2.414182,0.730523,0.0,2.241877,0.588298,4.843539,0.0
75%,96577.0,190.594817,130.539012,-13.897148,-0.055656,996.903365,0.336907,7.129895,1.0,3.0,1.0,0.056461,2.241877,0.588298,5.0,0.0
max,128595.0,3490.058667,387.71792,-4.674406,0.0,1963.583893,0.631579,10.060241,1.0,5.0,2.0,1.0,5.0,3.0,12.0,1.0


In [50]:
sample1=train_data.drop('id',axis=1)

In [51]:
#dropping duplicate rows.
sample1=sample1.drop_duplicates(keep='last')

In [52]:
sample1.redemption_status.sum()

373.0

In [53]:
sample1.head()

Unnamed: 0,quantity,selling_price,other_discount,coupon_discount,brand,brand_type,category,campaign_type,start_date,end_date,age_range,marital_status,rented,family_size,no_of_children,income_bracket,redemption_status
237,573.036737,122.278676,-13.190832,-0.163571,852.303079,0.21772,7.25986,0,2013-05-19,2013-05-07,1.0,1.0,0.0,2.0,0.0,4.0,1.0
440,1.373974,88.725909,-14.071817,-0.435967,802.465416,0.416178,6.900352,1,2013-01-28,2013-01-03,2.414182,0.730523,0.056461,2.241877,0.588298,4.843539,0.0
678,1.30629,92.436044,-15.907637,-0.202069,912.786008,0.250441,6.964727,1,2013-01-28,2013-01-03,5.0,0.0,0.0,1.0,0.0,2.0,1.0
1084,20.530142,99.165098,-17.488324,-2.359796,1072.555851,0.095745,6.416667,1,2013-11-03,2013-12-04,2.414182,0.730523,0.056461,2.241877,0.588298,4.843539,1.0
1715,16.95336,90.507984,-16.323731,-0.350008,926.557312,0.162055,6.232411,0,2013-05-19,2013-05-07,2.0,1.0,0.0,2.0,0.0,4.0,1.0


In [54]:
sample1['date']=sample1.start_date.dt.day
sample1['month']=sample1.start_date.dt.month
test_data['date']=test_data.start_date.dt.day
test_data['month']=test_data.start_date.dt.month

In [55]:
sample1.corr()

Unnamed: 0,quantity,selling_price,other_discount,coupon_discount,brand,brand_type,category,campaign_type,age_range,marital_status,rented,family_size,no_of_children,income_bracket,redemption_status,date,month
quantity,1.0,0.631983,-0.082174,-0.118615,0.010732,0.086177,-0.060732,0.019857,-0.038364,-0.023233,0.00111,0.030952,0.023504,0.056435,0.062066,-0.033367,0.004163
selling_price,0.631983,1.0,-0.267935,-0.148102,0.306114,-0.232604,0.194995,0.031579,-0.001696,0.012781,-0.041041,0.011005,-0.019236,0.165748,0.009846,-0.059946,0.015227
other_discount,-0.082174,-0.267935,1.0,0.444761,0.105214,-0.067451,0.030925,0.037711,-0.069092,-0.006074,0.05325,-0.064297,-0.039821,0.075395,-0.048536,8.8e-05,-0.040797
coupon_discount,-0.118615,-0.148102,0.444761,1.0,0.011978,0.048385,0.025319,-0.005578,-0.063973,-0.002586,-0.004623,-0.079731,-0.05975,-0.031049,-0.224143,0.03028,-0.035665
brand,0.010732,0.306114,0.105214,0.011978,1.0,-0.653611,0.095526,0.040611,0.034964,0.090386,-0.054464,-0.059375,-0.091642,0.233614,0.018114,-0.059815,-0.001289
brand_type,0.086177,-0.232604,-0.067451,0.048385,-0.653611,1.0,-0.1835,-0.057751,0.018604,-0.063639,0.059899,-0.020326,0.012942,-0.174907,-0.029572,0.052472,-0.020115
category,-0.060732,0.194995,0.030925,0.025319,0.095526,-0.1835,1.0,0.03855,-0.041097,0.002285,0.007034,0.018826,0.013137,0.066259,-0.037373,-0.051482,0.02905
campaign_type,0.019857,0.031579,0.037711,-0.005578,0.040611,-0.057751,0.03855,1.0,-0.05691,0.019389,0.027059,0.055031,0.053775,0.053223,-0.09504,-0.427851,0.175355
age_range,-0.038364,-0.001696,-0.069092,-0.063973,0.034964,0.018604,-0.041097,-0.05691,1.0,0.009643,-0.147535,-0.15745,-0.19999,-0.009647,0.012486,0.03124,-0.017167
marital_status,-0.023233,0.012781,-0.006074,-0.002586,0.090386,-0.063639,0.002285,0.019389,0.009643,1.0,0.202914,0.127303,0.109327,0.074515,0.031878,-0.0282,0.02242


In [56]:
features=sample1.drop(['redemption_status','start_date','end_date','selling_price','brand_type'],axis=1)
y=sample1['redemption_status'].values

In [57]:
test_features=test_data.drop(['id','start_date','end_date','selling_price','brand_type'],axis=1)

In [58]:
#preprocessing
from sklearn.preprocessing import StandardScaler
x=StandardScaler().fit_transform(features)
test_x=StandardScaler().fit_transform(test_features)

In [59]:
from sklearn.model_selection import train_test_split
x_train,x_test,y_train,y_test=train_test_split(x,y,test_size=0.3,stratify=y,random_state=445)

### CatBoostClassifier  

In [60]:
from catboost import CatBoostClassifier
cat=CatBoostClassifier()
cat.fit(x,y)

Learning rate set to 0.025853
0:	learn: 0.6625898	total: 79.7ms	remaining: 1m 19s
1:	learn: 0.6316490	total: 107ms	remaining: 53.3s
2:	learn: 0.6030647	total: 137ms	remaining: 45.5s
3:	learn: 0.5777018	total: 166ms	remaining: 41.3s
4:	learn: 0.5560984	total: 194ms	remaining: 38.7s
5:	learn: 0.5364966	total: 224ms	remaining: 37.1s
6:	learn: 0.5187060	total: 253ms	remaining: 35.9s
7:	learn: 0.4956886	total: 275ms	remaining: 34.1s
8:	learn: 0.4804346	total: 305ms	remaining: 33.6s
9:	learn: 0.4637032	total: 334ms	remaining: 33.1s
10:	learn: 0.4492672	total: 364ms	remaining: 32.7s
11:	learn: 0.4377256	total: 391ms	remaining: 32.2s
12:	learn: 0.4264202	total: 414ms	remaining: 31.4s
13:	learn: 0.4122544	total: 446ms	remaining: 31.4s
14:	learn: 0.4008190	total: 476ms	remaining: 31.2s
15:	learn: 0.3903604	total: 503ms	remaining: 30.9s
16:	learn: 0.3823715	total: 536ms	remaining: 31s
17:	learn: 0.3733237	total: 565ms	remaining: 30.8s
18:	learn: 0.3660748	total: 595ms	remaining: 30.7s
19:	learn: 

161:	learn: 0.2131285	total: 4.76s	remaining: 24.6s
162:	learn: 0.2130244	total: 4.79s	remaining: 24.6s
163:	learn: 0.2129799	total: 4.82s	remaining: 24.5s
164:	learn: 0.2129284	total: 4.84s	remaining: 24.5s
165:	learn: 0.2127961	total: 4.88s	remaining: 24.5s
166:	learn: 0.2126826	total: 4.91s	remaining: 24.5s
167:	learn: 0.2125930	total: 4.93s	remaining: 24.4s
168:	learn: 0.2124374	total: 4.96s	remaining: 24.4s
169:	learn: 0.2123167	total: 4.99s	remaining: 24.4s
170:	learn: 0.2121331	total: 5.02s	remaining: 24.4s
171:	learn: 0.2121201	total: 5.04s	remaining: 24.3s
172:	learn: 0.2120299	total: 5.07s	remaining: 24.2s
173:	learn: 0.2119623	total: 5.1s	remaining: 24.2s
174:	learn: 0.2118875	total: 5.13s	remaining: 24.2s
175:	learn: 0.2118032	total: 5.15s	remaining: 24.1s
176:	learn: 0.2117014	total: 5.18s	remaining: 24.1s
177:	learn: 0.2116594	total: 5.21s	remaining: 24s
178:	learn: 0.2115673	total: 5.24s	remaining: 24s
179:	learn: 0.2115304	total: 5.26s	remaining: 24s
180:	learn: 0.21145

326:	learn: 0.2031676	total: 10.2s	remaining: 20.9s
327:	learn: 0.2031230	total: 10.2s	remaining: 20.9s
328:	learn: 0.2030502	total: 10.2s	remaining: 20.8s
329:	learn: 0.2029451	total: 10.2s	remaining: 20.8s
330:	learn: 0.2029319	total: 10.3s	remaining: 20.8s
331:	learn: 0.2028693	total: 10.3s	remaining: 20.7s
332:	learn: 0.2028367	total: 10.3s	remaining: 20.7s
333:	learn: 0.2028318	total: 10.4s	remaining: 20.6s
334:	learn: 0.2028002	total: 10.4s	remaining: 20.6s
335:	learn: 0.2027711	total: 10.4s	remaining: 20.6s
336:	learn: 0.2026713	total: 10.4s	remaining: 20.5s
337:	learn: 0.2026534	total: 10.5s	remaining: 20.5s
338:	learn: 0.2026318	total: 10.5s	remaining: 20.4s
339:	learn: 0.2025486	total: 10.5s	remaining: 20.4s
340:	learn: 0.2024634	total: 10.5s	remaining: 20.4s
341:	learn: 0.2024206	total: 10.6s	remaining: 20.3s
342:	learn: 0.2023739	total: 10.6s	remaining: 20.3s
343:	learn: 0.2022766	total: 10.6s	remaining: 20.3s
344:	learn: 0.2022572	total: 10.7s	remaining: 20.2s
345:	learn: 

489:	learn: 0.1973345	total: 14.9s	remaining: 15.5s
490:	learn: 0.1973189	total: 14.9s	remaining: 15.5s
491:	learn: 0.1972963	total: 15s	remaining: 15.5s
492:	learn: 0.1972700	total: 15s	remaining: 15.4s
493:	learn: 0.1972135	total: 15s	remaining: 15.4s
494:	learn: 0.1971874	total: 15.1s	remaining: 15.4s
495:	learn: 0.1971203	total: 15.1s	remaining: 15.3s
496:	learn: 0.1971085	total: 15.1s	remaining: 15.3s
497:	learn: 0.1970840	total: 15.1s	remaining: 15.3s
498:	learn: 0.1970231	total: 15.2s	remaining: 15.2s
499:	learn: 0.1970019	total: 15.2s	remaining: 15.2s
500:	learn: 0.1970004	total: 15.2s	remaining: 15.2s
501:	learn: 0.1969312	total: 15.3s	remaining: 15.1s
502:	learn: 0.1969124	total: 15.3s	remaining: 15.1s
503:	learn: 0.1968684	total: 15.3s	remaining: 15.1s
504:	learn: 0.1968639	total: 15.3s	remaining: 15s
505:	learn: 0.1968497	total: 15.4s	remaining: 15s
506:	learn: 0.1968344	total: 15.4s	remaining: 15s
507:	learn: 0.1967982	total: 15.4s	remaining: 14.9s
508:	learn: 0.1967774	to

652:	learn: 0.1934183	total: 19.7s	remaining: 10.5s
653:	learn: 0.1934110	total: 19.7s	remaining: 10.4s
654:	learn: 0.1934054	total: 19.8s	remaining: 10.4s
655:	learn: 0.1933952	total: 19.8s	remaining: 10.4s
656:	learn: 0.1933766	total: 19.8s	remaining: 10.3s
657:	learn: 0.1933708	total: 19.8s	remaining: 10.3s
658:	learn: 0.1932987	total: 19.9s	remaining: 10.3s
659:	learn: 0.1932774	total: 19.9s	remaining: 10.3s
660:	learn: 0.1932688	total: 20s	remaining: 10.2s
661:	learn: 0.1932630	total: 20s	remaining: 10.2s
662:	learn: 0.1932441	total: 20s	remaining: 10.2s
663:	learn: 0.1932440	total: 20s	remaining: 10.1s
664:	learn: 0.1932286	total: 20.1s	remaining: 10.1s
665:	learn: 0.1932190	total: 20.1s	remaining: 10.1s
666:	learn: 0.1931753	total: 20.1s	remaining: 10s
667:	learn: 0.1931694	total: 20.1s	remaining: 10s
668:	learn: 0.1931462	total: 20.2s	remaining: 9.98s
669:	learn: 0.1931235	total: 20.2s	remaining: 9.95s
670:	learn: 0.1931168	total: 20.2s	remaining: 9.92s
671:	learn: 0.1930973	to

817:	learn: 0.1907342	total: 24.6s	remaining: 5.46s
818:	learn: 0.1907125	total: 24.6s	remaining: 5.43s
819:	learn: 0.1906999	total: 24.6s	remaining: 5.4s
820:	learn: 0.1906968	total: 24.6s	remaining: 5.37s
821:	learn: 0.1906880	total: 24.7s	remaining: 5.34s
822:	learn: 0.1906633	total: 24.7s	remaining: 5.31s
823:	learn: 0.1906293	total: 24.7s	remaining: 5.28s
824:	learn: 0.1906284	total: 24.8s	remaining: 5.25s
825:	learn: 0.1906210	total: 24.8s	remaining: 5.22s
826:	learn: 0.1906027	total: 24.8s	remaining: 5.19s
827:	learn: 0.1905961	total: 24.9s	remaining: 5.16s
828:	learn: 0.1905857	total: 24.9s	remaining: 5.13s
829:	learn: 0.1905810	total: 24.9s	remaining: 5.1s
830:	learn: 0.1905802	total: 24.9s	remaining: 5.07s
831:	learn: 0.1905766	total: 25s	remaining: 5.04s
832:	learn: 0.1905715	total: 25s	remaining: 5.01s
833:	learn: 0.1905661	total: 25s	remaining: 4.98s
834:	learn: 0.1905614	total: 25.1s	remaining: 4.95s
835:	learn: 0.1905434	total: 25.1s	remaining: 4.92s
836:	learn: 0.190540

978:	learn: 0.1887264	total: 29.3s	remaining: 628ms
979:	learn: 0.1887230	total: 29.3s	remaining: 598ms
980:	learn: 0.1887223	total: 29.3s	remaining: 568ms
981:	learn: 0.1887066	total: 29.4s	remaining: 538ms
982:	learn: 0.1886971	total: 29.4s	remaining: 508ms
983:	learn: 0.1886869	total: 29.4s	remaining: 478ms
984:	learn: 0.1886616	total: 29.4s	remaining: 448ms
985:	learn: 0.1886571	total: 29.5s	remaining: 419ms
986:	learn: 0.1886565	total: 29.5s	remaining: 389ms
987:	learn: 0.1886434	total: 29.5s	remaining: 359ms
988:	learn: 0.1886425	total: 29.6s	remaining: 329ms
989:	learn: 0.1886137	total: 29.6s	remaining: 299ms
990:	learn: 0.1886114	total: 29.6s	remaining: 269ms
991:	learn: 0.1886107	total: 29.6s	remaining: 239ms
992:	learn: 0.1886026	total: 29.7s	remaining: 209ms
993:	learn: 0.1886015	total: 29.7s	remaining: 179ms
994:	learn: 0.1885847	total: 29.7s	remaining: 149ms
995:	learn: 0.1885836	total: 29.8s	remaining: 120ms
996:	learn: 0.1885652	total: 29.8s	remaining: 89.7ms
997:	learn:

<catboost.core.CatBoostClassifier at 0x7f845969e9e8>

In [61]:
y_pred=cat.predict_proba(x_train)[:,1]
y_hat=cat.predict_proba(x_test)[:,1]
test_y=cat.predict_proba(test_x)[:,1]

In [62]:
from sklearn.metrics import roc_auc_score
print('train set:',roc_auc_score(y_train,y_pred))
print('test set:',roc_auc_score(y_test,y_hat))

train set: 0.9068091234025045
test set: 0.9097361949474625


In [63]:
test_data['redemption_status']=test_y

### Final Submission

In [64]:
submission=test_data[['id','redemption_status']]

In [65]:
submission.to_csv('final.csv',index=False)