In [1]:
import numpy as np
import scipy as sp
import pandas as pd
import statsmodels.api as sm
import statsmodels.formula.api as smf
import statsmodels.stats.api as sms
import sklearn as sk

import matplotlib as mpl
mpl.use('Agg')
import matplotlib.pylab as plt
from mpl_toolkits.mplot3d import Axes3D
%matplotlib inline

import seaborn as sns
sns.set()
sns.set_style("whitegrid")
sns.set_color_codes()
''
import warnings

warnings.filterwarnings('ignore')

  from pandas.core import datetools


In [2]:
# 테이블 현황 파악을 위한 함수 
def summary_table(table):
    df = pd.DataFrame()
    for i in table.columns:
        name = i
        dtype = table[i].dtype.name
        null = table[i].isnull().sum()
        act = table.shape[0] - null
        unique = len(table[i].unique())
        data = {'name': name, 'dtype': dtype, 'null': null, 'act': act, 'unique': unique}
        df = df.append(data, ignore_index=True)
    return df

### 데이터 불러오기: 일본어는 영어로 번역, 지역에 Prefecture가 붙은 지명은 Prefecture 제외

In [3]:
# train 
detail_train = pd.read_csv('coupon_data_project2/coupon_detail_train_translated_en.csv', parse_dates=['I_DATE'])
visit_train = pd.read_csv('coupon_data_project2/coupon_visit_train.csv', parse_dates=['I_DATE'])

area_train = pd.read_csv('coupon_data_project2/coupon_area_train_translated_en.csv')
coupon_list_train = pd.read_csv('coupon_data_project2/coupon_list_train_translated_en.csv', parse_dates=['DISPFROM', 'DISPEND', 'VALIDFROM', 'VALIDEND'])

# base data
prefecture_location = pd.read_csv('coupon_data_project2/prefecture_locations_translated_en.csv')
user_list = pd.read_csv('coupon_data_project2/user_list_translated_en.csv', parse_dates=['WITHDRAW_DATE', 'REG_DATE'])

# test data
area_test = pd.read_csv('coupon_data_project2/coupon_area_test_translated_en.csv')
coupon_list_test = pd.read_csv('coupon_data_project2/coupon_list_test_translated_en.csv', parse_dates=['DISPFROM', 'DISPEND', 'VALIDFROM', 'VALIDEND'])

# submisiion
submission = pd.read_csv('coupon_data_project2/sample_submission.csv')

### Merge( vist & train)

In [4]:
detail_train[:2]

Unnamed: 0,ITEM_COUNT,I_DATE,SMALL_AREA_NAME,PURCHASEID_hash,USER_ID_hash,COUPON_ID_hash
0,1,2012-03-28 15:06:00,Hyogo,c820a8882374a4e472f0984a8825893f,d9dca3cb44bab12ba313eaa681f663eb,34c48f84026e08355dc3bd19b427f09a
1,1,2011-07-04 23:52:00,"Ginza Shinbashi, Tokyo, Ueno",1b4eb2435421ede98c8931c42e8220ec,560574a339f1b25e57b0221e486907ed,767673b7a777854a92b73b0934ddfae7


In [5]:
visit_train[:2]

Unnamed: 0,PURCHASE_FLG,I_DATE,PAGE_SERIAL,REFERRER_hash,VIEW_COUPON_ID_hash,USER_ID_hash,SESSION_ID_hash,PURCHASEID_hash
0,0,2012-03-28 14:15:00,7,7d3892e54acb559ae36c459978489330,34c48f84026e08355dc3bd19b427f09a,d9dca3cb44bab12ba313eaa681f663eb,673af822615593249e7c6a9a1a6bbb1a,
1,0,2012-03-28 14:17:28,9,7d3892e54acb559ae36c459978489330,34c48f84026e08355dc3bd19b427f09a,d9dca3cb44bab12ba313eaa681f663eb,673af822615593249e7c6a9a1a6bbb1a,


### detail & visit merge 후 구분자 추가 

In [6]:
detail_train['PURCHASE_FLG'] = 1
detail_train.rename(columns = {'I_DATE': 'purchase_date', 'SMALL_AREA_NAME': 'resid_small' }, inplace=True) # 거주지 구분을 위해 컬럼명 변경
visit_train.rename(columns = {'I_DATE': 'view_date', 'VIEW_COUPON_ID_hash':'COUPON_ID_hash'}, inplace=True)

In [7]:
# date의 시간을 제거
detail_train['purchase_date'] = pd.to_datetime(detail_train['purchase_date'].dt.date)
visit_train['view_date'] = pd.to_datetime(visit_train['view_date'].dt.date)

In [8]:
visit_train.drop(labels=['REFERRER_hash', 'SESSION_ID_hash', 'PURCHASEID_hash'], axis=1, inplace=True)
detail_train.drop(labels=['PURCHASEID_hash'], axis=1, inplace=True)

In [9]:
train = pd.merge(detail_train, visit_train, how='outer')

In [10]:
print(detail_train.shape)
print(visit_train.shape)
print(train.shape)

(168996, 6)
(2833180, 5)
(2913043, 8)


### 전처리 일관성을 유지를 위한 coupon_list merge(311~ train임)

In [11]:
coupon_list = pd.merge(coupon_list_test, coupon_list_train, how='outer')

In [12]:
coupon_list_test.shape, coupon_list_train.shape, coupon_list.shape

((310, 24), (19413, 24), (19723, 24))

In [13]:
# PRICE_RATE: 10%단위로 그룹화
coupon_list['PRICE_RATE_GROUP'] = coupon_list['PRICE_RATE'].apply(lambda x: "~10%" if x < 10 else "10%" if x < 20 else "20%" if x< 30 else "30%" if x < 40 else "40%" if x < 50 else "50%" if x < 60 else "60%" if x < 70 else "70%" if x < 80 else "80%" if x < 90 else "90%~")

In [14]:
# 캡슐과 장르 통합 및 명칭 변경
coupon_list['Case'] = coupon_list['CAPSULE_TEXT'] + coupon_list['GENRE_NAME']
coupon_list['Case'] = coupon_list['Case'].apply(lambda x: "HOTEL" if x == 'Guest houseHotel and Japanese hotel' 
                          or x == 'HotelHotel and Japanese hotel'
                          or x == 'Japanese hotelHotel and Japanese hotel'
                          or x == 'Japanse guest houseHotel and Japanese hotel'
                          or x == 'LodgeHotel and Japanese hotel'
                          or x == 'Public hotelHotel and Japanese hotel'
                          or x == 'Resort innHotel and Japanese hotel'
                          or x == 'Vacation rentalHotel and Japanese hotel'
                          else "NAIL" if x == 'Nail and eye salonNail and eye salon'
                          else "HAIR" if x == 'Hair salonHair salon'
                          else "FOOD" if x == 'FoodFood'
                          else "SPA" if x == 'SpaSpa'
                          else "BEAUTY" if x == 'BeautyBeauty'
                          else "CLASS" if x == 'ClassLesson'
                          else "CORRESPONDENCE" if x == 'Correspondence courseLessonClassLesson'
                          else "DELIVERY" if x == 'Delivery serviceDelivery service'
                          else "EVENT" if x == 'EventOther coupon'
                          else "GIFT" if x == 'Gift cardGift card'
                          else "HEALTH" if x == 'Health and medicalHealth and medical'
                          else "LEISURE" if x == 'LeisureLeisure'
                          else "LESSON" if x == 'LessonLesson'
                          else "OTHER" if x == 'OtherOther coupon'
                          else "RELAXATION" if x == 'RelaxationRelaxation'
                          else "WEB" if x == 'Web serviceOther coupon'
                          else 'OTHER'
                          )

In [15]:
# valid_period: 그룹화
coupon_list['valid_group'] = coupon_list['VALIDPERIOD'].apply(lambda x: "~30" if x < 30 else "30~150" if x < 150 else "150~")

In [16]:
# date type 의 시간 제외(판단보류)
coupon_list['DISPFROM'] = pd.to_datetime(coupon_list['DISPFROM'].dt.date)
coupon_list['DISPEND'] = pd.to_datetime(coupon_list['DISPEND'].dt.date)
coupon_list['VALIDFROM'] = pd.to_datetime(coupon_list['VALIDFROM'].dt.date)
coupon_list['VALIDEND'] = pd.to_datetime(coupon_list['VALIDEND'].dt.date)
coupon_list['REG_DATE'] = pd.to_datetime(coupon_list['VALIDEND'].dt.date)

In [17]:
# usable 전처리
coupon_list['USABLE_DATE_MON'].replace([0,2,1,np.nan],[0,0,1,1], inplace=True)
coupon_list['USABLE_DATE_TUE'].replace([0,2,1,np.nan],[0,0,1,1], inplace=True)
coupon_list['USABLE_DATE_WED'].replace([0,2,1,np.nan],[0,0,1,1], inplace=True)
coupon_list['USABLE_DATE_THU'].replace([0,2,1,np.nan],[0,0,1,1], inplace=True)
coupon_list['USABLE_DATE_FRI'].replace([0,2,1,np.nan],[0,0,1,1], inplace=True)
coupon_list['USABLE_DATE_SAT'].replace([0,2,1,np.nan],[0,0,1,1], inplace=True)
coupon_list['USABLE_DATE_SUN'].replace([0,2,1,np.nan],[0,0,1,1], inplace=True)
coupon_list['USABLE_DATE_HOLIDAY'].replace([0,2,1,np.nan],[0,0,1,1], inplace=True)
coupon_list['USABLE_DATE_BEFORE_HOLIDAY'].replace([0,2,1,np.nan],[0,0,1,1], inplace=True)

In [18]:
# 판매가 계산 및 그룹화
coupon_list['Price'] = coupon_list['CATALOG_PRICE'] + coupon_list['DISCOUNT_PRICE']

# PRICE_group
coupon_list['price_group'] = coupon_list['Price'].apply(lambda x: '~1500' if x < 1500
                                                      else '1500~3000' if x < 3001
                                                      else '3000~4500' if x < 4501
                                                      else '4500~6000' if x < 6001
                                                      else '6000~7500' if x < 7501
                                                      else '7500~9000' if x < 9001
                                                      else '9000~10500' if x < 10501
                                                      else '10500~12000' if x < 12001
                                                      else '12000~13500' if x < 13501
                                                      else '13500~15000' if x < 15001
                                                      else '15000~16500' if x < 16501
                                                      else '16500~18000' if x < 18001
                                                      else '18000~19500' if x < 19501
                                                      else '19500~21000' if x < 21001
                                                      else '21000~22500' if x < 22501
                                                      else '22500~24000' if x < 24001
                                                      else '24000~25500' if x < 25501
                                                      else '25500~27000' if x < 27001
                                                      else '27000~28500' if x < 28501
                                                      else '28500~30000' if x < 30001
                                                      else '30000~31500' if x < 31501
                                                      else '31500~33000' if x < 33001
                                                      else '33000~34500' if x < 34501
                                                      else '34500~36000' if x < 36001
                                                      else '36000~' 
                                                      )

In [19]:
# 지역명 변경
coupon_list.rename(columns = {"LARGE_AREA_NAME": "coupon_large_area", "ken_name": "coupon_pref_area", 
                              "SMALL_AREA_NAME": "coupon_small_area"}, inplace=True)

In [20]:
# 기존에 있던 Price, PRICE_RATE, VALIDPERIOD, CAPSULE_TEXT, GENRE_NAME
coupon_list.drop(labels = ['Price', 'PRICE_RATE'], axis=1, inplace=True )
coupon_list.drop(labels = ['VALIDPERIOD'], axis=1, inplace=True )
coupon_list.drop(labels = ['CAPSULE_TEXT', 'GENRE_NAME'], axis=1, inplace=True)
coupon_list.drop(labels=['CATALOG_PRICE','DISCOUNT_PRICE'], axis=1, inplace = True)
coupon_list.drop(labels = ['DISPFROM', 'DISPEND', 'DISPPERIOD', 'VALIDFROM', 'VALIDEND', 'REG_DATE'], axis=1, inplace=True)

In [21]:
coupon_list.columns

Index(['USABLE_DATE_MON', 'USABLE_DATE_TUE', 'USABLE_DATE_WED',
       'USABLE_DATE_THU', 'USABLE_DATE_FRI', 'USABLE_DATE_SAT',
       'USABLE_DATE_SUN', 'USABLE_DATE_HOLIDAY', 'USABLE_DATE_BEFORE_HOLIDAY',
       'coupon_large_area', 'coupon_pref_area', 'coupon_small_area',
       'COUPON_ID_hash', 'PRICE_RATE_GROUP', 'Case', 'valid_group',
       'price_group'],
      dtype='object')

In [22]:
# 더미처리
coupon_list = pd.get_dummies(coupon_list, columns = ["Case"], prefix="Case")
coupon_list = pd.get_dummies(coupon_list, columns = ["coupon_large_area"], prefix="l_area")
coupon_list = pd.get_dummies(coupon_list, columns = ["coupon_pref_area"], prefix="p_area")
coupon_list = pd.get_dummies(coupon_list, columns = ["coupon_small_area"], prefix="s_area")
coupon_list = pd.get_dummies(coupon_list, columns = ["PRICE_RATE_GROUP"], prefix="rate")
coupon_list = pd.get_dummies(coupon_list, columns = ["valid_group"], prefix="valid")
coupon_list = pd.get_dummies(coupon_list, columns = ["price_group"], prefix="price_group")

In [23]:
coupon_list.shape

(19723, 203)

In [24]:
# train set과 test set을 다시 분리
coupon_list_train = coupon_list[311:]
coupon_list_test = coupon_list[:310]

### User_list 전처리

In [25]:
user_list.columns

Index(['REG_DATE', 'SEX_ID', 'AGE', 'WITHDRAW_DATE', 'PREF_NAME',
       'USER_ID_hash'],
      dtype='object')

In [26]:
# SEX_ID: f는 0으로 M은 1로
user_list['SEX_ID'] = user_list['SEX_ID'].apply(lambda x: 0 if x == 'f' else 1) 

In [27]:
# age_group 그룹화
user_list['age_group'] = user_list['AGE'].apply(lambda x: "~15" if x < 15 
                                               else "15~24" if x < 25
                                               else "25~34" if x < 35
                                               else "35~44" if x < 45
                                               else "45~54" if x < 55
                                               else "55~64" if x < 65
                                               else "65~74" if x < 75
                                               else "75~84" if x < 85
                                               else "85~") 

In [28]:
# PREF_NAME 이름 변경
user_list.rename({'PREF_NAME': 'resid_pref'}, axis=1, inplace=True)

In [29]:
# REG_DATE , WITHDRAW_DATE 삭제
user_list.drop(labels = ['REG_DATE', 'WITHDRAW_DATE'], axis=1, inplace=True)

In [30]:
# AGE 삭제
user_list.drop(labels = ['AGE'], axis=1, inplace=True)

In [31]:
# 더미처리 
user_list = pd.get_dummies(user_list, columns = ['age_group'], prefix='age')
user_list = pd.get_dummies(user_list, columns = ['resid_pref'], prefix='resid_pref')

In [32]:
user_list.shape

(22873, 56)

## train_set 구성

In [33]:
# train & coupon merge
train = pd.merge(train, coupon_list, how='left', on='COUPON_ID_hash')

In [34]:
# train & user_list
train = pd.merge(train, user_list, how='left', on = 'USER_ID_hash')

### train set NaN 값 처리

In [35]:
# item_count: 모두 0으로 처리
train['ITEM_COUNT'] = train['ITEM_COUNT'].fillna(0)

# purchase_date: 구매여부가 있으므로 삭제
train.drop(labels = ['purchase_date'], axis=1, inplace=True)


# view_date: 봤는지 여부만 파악 후 신규셀 생성(봤으면1, 안봤으면 0)
train['View'] = train['view_date'].apply(lambda x: 0 if x == None else 1)
train.drop(labels= ['view_date'], axis=1, inplace=True)

# USABLE_DATE_MON 등이 널값: view만 되고 제품정보 및 판매지에는 없음 -> null인 row는 모두 제외
train = train[train['USABLE_DATE_MON'].isin([0, 1])]

# PAGE_SERIAL: 제외
train.drop(labels = ['PAGE_SERIAL'], axis=1, inplace=True)

In [36]:
summary_table(train).pivot_table(index=['dtype', 'name'])

Unnamed: 0_level_0,Unnamed: 1_level_0,act,null,unique
dtype,name,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1
float64,Case_BEAUTY,2597810.0,0.0,2.0
float64,Case_CLASS,2597810.0,0.0,2.0
float64,Case_DELIVERY,2597810.0,0.0,2.0
float64,Case_EVENT,2597810.0,0.0,2.0
float64,Case_FOOD,2597810.0,0.0,2.0
float64,Case_GIFT,2597810.0,0.0,2.0
float64,Case_HAIR,2597810.0,0.0,2.0
float64,Case_HEALTH,2597810.0,0.0,2.0
float64,Case_HOTEL,2597810.0,0.0,2.0
float64,Case_LEISURE,2597810.0,0.0,2.0


### 모델 생성을 위한 데이터 생성

In [44]:
train = pd.get_dummies(train, columns = ['resid_small'], prefix='resid_s')

In [45]:
to_be_removed_train = {'USER_ID_hash', 'COUPON_ID_hash', 'PURCHASE_FLG'}
ls_train = [i for i in list(train.columns) if i not in to_be_removed_train]
X_train = train.filter(ls_train)
y_train = train.PURCHASE_FLG

In [47]:
X_train[:2]

Unnamed: 0,ITEM_COUNT,USABLE_DATE_MON,USABLE_DATE_TUE,USABLE_DATE_WED,USABLE_DATE_THU,USABLE_DATE_FRI,USABLE_DATE_SAT,USABLE_DATE_SUN,USABLE_DATE_HOLIDAY,USABLE_DATE_BEFORE_HOLIDAY,...,resid_s_Tochigi,resid_s_Tokushima,resid_s_Tottori,resid_s_Toyama,resid_s_Triple,resid_s_Wakayama,resid_s_Yamagata,resid_s_Yamaguchi,resid_s_Yamanashi,resid_s_Yokohama
0,1.0,1.0,1.0,1.0,1.0,1.0,1.0,1.0,1.0,1.0,...,0,0,0,0,0,0,0,0,0,0
1,1.0,1.0,1.0,1.0,1.0,1.0,1.0,1.0,1.0,1.0,...,0,0,0,0,0,0,0,0,0,0


### 랜덤포레스트를 통해 1차 트레인 시도

In [56]:
from sklearn import clone
from sklearn.datasets import load_iris
from sklearn.ensemble import RandomForestClassifier, ExtraTreesClassifier
from sklearn.tree import DecisionTreeClassifier
from tqdm import tqdm

In [57]:
from sklearn.metrics import accuracy_score

In [58]:
for i in tqdm(range(1,5)):
    globals()['tree%s' % i] = RandomForestClassifier(max_depth=i, n_estimators=10).fit(X_train, y_train)
    print('{}_accuracy_scroe:{}'.format('tree%s' % i, accuracy_score(y_train, globals()['tree%s' % i].predict(X_train))))

  0%|          | 0/4 [00:00<?, ?it/s]

tree1_accuracy_scroe:0.922145191526709


 25%|██▌       | 1/4 [00:50<02:31, 50.42s/it]

tree2_accuracy_scroe:0.922145191526709


 50%|█████     | 2/4 [01:30<01:30, 45.34s/it]

tree3_accuracy_scroe:0.922145191526709


 75%|███████▌  | 3/4 [02:12<00:44, 44.14s/it]

tree4_accuracy_scroe:0.9351657742483092


100%|██████████| 4/4 [02:55<00:00, 43.96s/it]


### 크로스 체크

In [59]:
from sklearn.model_selection import train_test_split

X_train1, X_test1, y_train1, y_test1 = train_test_split(X_train, y_train)

In [60]:
for i in range(1,2):
    globals()['tree%s' % i] = RandomForestClassifier(max_depth=i, n_estimators=10).fit(X_train1, y_train1)
    print('{}_accuracy_scroe:{}'.format('tree%s' % i, accuracy_score(y_test1, globals()['tree%s' % i].predict(X_test1))))

tree1_accuracy_scroe:0.9220574852991671


### test 셋 구성을 위한 merge

In [61]:
coupon_list_test['A'] = 1
user_list['A'] = 1
test = pd.merge(coupon_list_test, user_list, how='outer')

In [62]:
user_list.shape, coupon_list_test.shape

((22873, 57), (310, 204))

In [63]:
summary_table(test)

Unnamed: 0,act,dtype,name,null,unique
0,7090630.0,float64,USABLE_DATE_MON,0.0,2.0
1,7090630.0,float64,USABLE_DATE_TUE,0.0,2.0
2,7090630.0,float64,USABLE_DATE_WED,0.0,2.0
3,7090630.0,float64,USABLE_DATE_THU,0.0,2.0
4,7090630.0,float64,USABLE_DATE_FRI,0.0,2.0
5,7090630.0,float64,USABLE_DATE_SAT,0.0,2.0
6,7090630.0,float64,USABLE_DATE_SUN,0.0,2.0
7,7090630.0,float64,USABLE_DATE_HOLIDAY,0.0,2.0
8,7090630.0,float64,USABLE_DATE_BEFORE_HOLIDAY,0.0,2.0
9,7090630.0,object,COUPON_ID_hash,0.0,310.0


In [67]:
to_be_removed_test = {'USER_ID_hash', 'COUPON_ID_hash', 'PURCHASE_FLG'}
ls_test = [i for i in list(test.columns) if i not in to_be_removed_test]
X_test = train.filter(ls_test)


### column이 일치하지 않으므로 컬럼을 일치시켜서 다시 모델링

In [65]:
mod_ls = [i for i in X_train.columns if i in test.columns]
X_train_re = train.filter(mod_ls)
y_train_re = train.PURCHASE_FLG
tree50 = RandomForestClassifier(max_depth=50, n_estimators=10).fit(X_train_re, y_train_re)

In [68]:
y_pred_test = tree50.predict(X_test.filter(mod_ls))

In [69]:
y_pred_test.sum()

31287

In [70]:
y_pred_test = pd.DataFrame({'A': y_pred_test})

In [71]:
test_sub = test.filter(['USER_ID_hash', 'COUPON_ID_hash'])
test_sub = test_sub.join(y_pred_test, how='outer')

In [72]:
subsub = test_sub[test_sub['A'] == 1]
subsub[:2]

Unnamed: 0,USER_ID_hash,COUPON_ID_hash,A
2,e66ae91b978b3229f8fd858c80615b73,c76ea297ebd3a5a4d3bf9f75269f66fa,1.0
3,43fc18f32eafb05713ec02935e2c2825,c76ea297ebd3a5a4d3bf9f75269f66fa,1.0


In [73]:
lookup_table = subsub.groupby('USER_ID_hash').apply(lambda x: list(x.COUPON_ID_hash))

In [74]:
lookup_table = pd.DataFrame(lookup_table, columns = ['COUPON_ID_hash']).reset_index()
lookup_table

Unnamed: 0,USER_ID_hash,COUPON_ID_hash
0,0000b53e182165208887ba65c079fc21,"[dd74dc95ca294afa02db40a543ae1763, 86c64391318..."
1,0005b1068d5f2b8f2a7c978fcfe1ca06,[f93dc6e223935d817e1237f8f73b56a2]
2,000cc06982785a19e2a2fdb40b1c9d59,"[f93dc6e223935d817e1237f8f73b56a2, 0acc89ba759..."
3,0013518e41c416cd6a181d277dd8ca0b,[86c64391318f1d751647bf8e1882cf1d]
4,001fd7876e3aa29393537c6baf308e43,"[78aec0e250021cf5247986f6ca55a47e, 3810431a776..."
5,002383753c1e5d6305c8aff6f89e26d6,"[dd74dc95ca294afa02db40a543ae1763, f93dc6e2239..."
6,0025cae7997d25ea5cf8851bb099c798,"[f93dc6e223935d817e1237f8f73b56a2, 3810431a776..."
7,002822059a01d895fad84f2f2ff5c1f1,"[c76ea297ebd3a5a4d3bf9f75269f66fa, dd74dc95ca2..."
8,002ae30377cd30f65652e52618e8b2d6,[18503db7753295eed16ebd37b1809cfb]
9,002bbdd51b2a042c051c66c43b55439a,[16fac142e913b0fab4e25a06200e841b]


In [75]:
submission[:2]

Unnamed: 0,USER_ID_hash,PURCHASED_COUPONS
0,0000b53e182165208887ba65c079fc21,
1,00035b86e6884589ec8d28fbf2fe7757,


In [76]:
submission.insert(2, 'COUPON', submission['USER_ID_hash'].map(lookup_table.set_index('USER_ID_hash')['COUPON_ID_hash']))

In [77]:
submission.drop('PURCHASED_COUPONS', axis=1, inplace=True)
submission.rename(columns={'COUPON':'PURCHASED_COUPONS'}, inplace=True)

In [78]:
submission.to_csv('test_submission.csv')