### Load Dataset

In [1]:
import pandas as pd

In [2]:
X_df = pd.read_csv('purchase_history.csv')

In [3]:
X_df.rename(columns={'created_at': 'date', 'user_id': 'customer', 'product_id': 'product'}, 
          inplace=True)

In [4]:
X_df.head()

Unnamed: 0,customer,product,date
0,647746,2011564,2019-02-09
1,906136,4105782,2019-11-23
2,12862997,2144063,2019-07-16
3,12097079,1207738,2019-04-05
4,3184486,4278839,2019-03-08


### Split Dataset (8 - 4 Months) 

In [5]:
X_df['date'] = pd.to_datetime(X_df['date'])

In [6]:
date_min = X_df['date'].min()
date_max = X_df['date'].max()

In [7]:
print(date_min, date_max)

2019-01-02 00:00:00 2019-12-31 00:00:00


In [8]:
X_df_8 = X_df[X_df.date < pd.Timestamp(2019, 9,1)].reset_index(drop=True)

In [9]:
X_df_4 = X_df[X_df.date >= pd.Timestamp(2019, 9, 1)].reset_index(drop=True)

### Finding Last & Next Purchases and Differences

In [10]:
last_purchase = X_df_8.groupby(['customer', 'product']).date.max().reset_index()
last_purchase.columns = ['customer', 'product', 'last_purchase']

In [11]:
next_purchase = X_df_4.groupby(['customer', 'product']).date.min().reset_index()
next_purchase.columns = ['customer', 'product', 'next_purchase']

In [12]:
purchase_dates = pd.merge(last_purchase, next_purchase, on=['customer', 'product'], how='left')

In [13]:
purchase_dates['next_purchase_day'] = (purchase_dates['next_purchase'] - purchase_dates['last_purchase']).dt.days

In [14]:
purchase_dates.head()

Unnamed: 0,customer,product,last_purchase,next_purchase,next_purchase_day
0,577243,724112,2019-06-29,NaT,
1,577243,875550,2019-06-16,NaT,
2,577243,895941,2019-08-23,2019-12-13,112.0
3,577243,895955,2019-05-12,2019-10-17,158.0
4,577243,896036,2019-05-16,2019-09-15,122.0


In [15]:
purchase_dates.next_purchase_day.describe()

count    169409.000000
mean         67.047925
std          52.371211
min           1.000000
25%          28.000000
50%          53.000000
75%          92.000000
max         349.000000
Name: next_purchase_day, dtype: float64

In [16]:
ctm_dt = purchase_dates[['customer', 'product', 'next_purchase_day']]

In [17]:
ctm_dt.head()

Unnamed: 0,customer,product,next_purchase_day
0,577243,724112,
1,577243,875550,
2,577243,895941,112.0
3,577243,895955,158.0
4,577243,896036,122.0


### Extracting Features: Frequency

In [18]:
frequency = X_df_8.groupby(['customer', 'product']).date.count().reset_index()
frequency.columns = ['customer', 'product', 'frequency']

ctm_dt = pd.merge(ctm_dt, frequency, on=['customer', 'product'])

In [19]:
ctm_dt.head()

Unnamed: 0,customer,product,next_purchase_day,frequency
0,577243,724112,,5
1,577243,875550,,4
2,577243,895941,112.0,10
3,577243,895955,158.0,2
4,577243,896036,122.0,10


### Extracting Features: Days Between The Last 3 Purchases

In [20]:
day_order = X_df_8[['customer', 'product', 'date']]
day_order['date'] = day_order['date'].dt.date

In [21]:
day_order = day_order.sort_values(['customer', 'product', 'date'])
day_order = day_order.drop_duplicates(subset=['customer', 'product', 'date'], keep='first')

In [22]:
day_order.head()

Unnamed: 0,customer,product,date
49727,577243,724112,2019-03-13
948315,577243,724112,2019-03-25
304174,577243,724112,2019-04-22
1351903,577243,724112,2019-06-16
718283,577243,724112,2019-06-29


In [23]:
day_order['prev_date'] = day_order.groupby(['customer', 'product'])['date'].shift(1)
day_order['t2_date'] = day_order.groupby(['customer', 'product'])['date'].shift(2)
day_order['t3_date'] = day_order.groupby(['customer', 'product'])['date'].shift(3)

In [24]:
day_order.head()

Unnamed: 0,customer,product,date,prev_date,t2_date,t3_date
49727,577243,724112,2019-03-13,,,
948315,577243,724112,2019-03-25,2019-03-13,,
304174,577243,724112,2019-04-22,2019-03-25,2019-03-13,
1351903,577243,724112,2019-06-16,2019-04-22,2019-03-25,2019-03-13
718283,577243,724112,2019-06-29,2019-06-16,2019-04-22,2019-03-25


In [25]:
day_order['day_diff'] = (day_order['date'] - day_order['prev_date']).dt.days
day_order['day_diff_2'] = (day_order['prev_date'] - day_order['t2_date']).dt.days
day_order['day_diff_3'] = (day_order['t2_date'] - day_order['t3_date']).dt.days

In [26]:
day_order.head()

Unnamed: 0,customer,product,date,prev_date,t2_date,t3_date,day_diff,day_diff_2,day_diff_3
49727,577243,724112,2019-03-13,,,,,,
948315,577243,724112,2019-03-25,2019-03-13,,,12.0,,
304174,577243,724112,2019-04-22,2019-03-25,2019-03-13,,28.0,12.0,
1351903,577243,724112,2019-06-16,2019-04-22,2019-03-25,2019-03-13,55.0,28.0,12.0
718283,577243,724112,2019-06-29,2019-06-16,2019-04-22,2019-03-25,13.0,55.0,28.0


### Extracting Features: Mean & STD of The Difference Between Purchases

In [27]:
day_diff = day_order.groupby(['customer', 'product']).agg({'day_diff': ['mean','std']}).reset_index()
day_diff.columns = ['customer', 'product', 'day_diff_mean','day_diff_std']

In [28]:
day_diff.head()

Unnamed: 0,customer,product,day_diff_mean,day_diff_std
0,577243,724112,27.0,20.049938
1,577243,875550,49.333333,21.221059
2,577243,895941,24.0,27.060118
3,577243,895955,99.0,
4,577243,896036,14.777778,9.909311


### Removing Customer-Product Pairs with Less Than Count 3

In [29]:
day_order_last = day_order.drop_duplicates(subset=['customer', 'product'], keep='last')

In [30]:
day_order_last = day_order_last.dropna()

In [31]:
day_order_last.head()

Unnamed: 0,customer,product,date,prev_date,t2_date,t3_date,day_diff,day_diff_2,day_diff_3
718283,577243,724112,2019-06-29,2019-06-16,2019-04-22,2019-03-25,13.0,55.0,28.0
1106330,577243,875550,2019-06-16,2019-04-13,2019-02-13,2019-01-19,64.0,59.0,25.0
964647,577243,895941,2019-08-23,2019-06-29,2019-06-21,2019-06-16,55.0,8.0,5.0
1299273,577243,896036,2019-05-16,2019-05-12,2019-04-13,2019-03-13,4.0,29.0,31.0
1378901,577243,896067,2019-08-23,2019-05-12,2019-04-22,2019-02-22,103.0,20.0,59.0


### Merging All The Features

In [32]:
day_order_last = pd.merge(day_order_last, day_diff, on=['customer', 'product'])

In [33]:
day_order_last

Unnamed: 0,customer,product,date,prev_date,t2_date,t3_date,day_diff,day_diff_2,day_diff_3,day_diff_mean,day_diff_std
0,577243,724112,2019-06-29,2019-06-16,2019-04-22,2019-03-25,13.0,55.0,28.0,27.000000,20.049938
1,577243,875550,2019-06-16,2019-04-13,2019-02-13,2019-01-19,64.0,59.0,25.0,49.333333,21.221059
2,577243,895941,2019-08-23,2019-06-29,2019-06-21,2019-06-16,55.0,8.0,5.0,24.000000,27.060118
3,577243,896036,2019-05-16,2019-05-12,2019-04-13,2019-03-13,4.0,29.0,31.0,14.777778,9.909311
4,577243,896067,2019-08-23,2019-05-12,2019-04-22,2019-02-22,103.0,20.0,59.0,60.666667,41.525093
...,...,...,...,...,...,...,...,...,...,...,...
212352,45338907,778755,2019-08-03,2019-07-14,2019-05-25,2019-04-23,20.0,50.0,32.0,34.000000,15.099669
212353,45339741,903335,2019-07-20,2019-06-10,2019-04-27,2019-02-21,40.0,44.0,65.0,41.750000,19.259197
212354,45339741,975982,2019-08-30,2019-08-03,2019-07-16,2019-06-10,27.0,18.0,36.0,22.875000,12.123619
212355,45431507,4375282,2019-08-27,2019-07-11,2019-06-20,2019-04-08,47.0,21.0,73.0,47.000000,26.000000


In [34]:
features = pd.merge(ctm_dt, day_order_last, on=['customer', 'product'])

In [35]:
features = features.dropna()

In [36]:
features

Unnamed: 0,customer,product,next_purchase_day,frequency,date,prev_date,t2_date,t3_date,day_diff,day_diff_2,day_diff_3,day_diff_mean,day_diff_std
2,577243,895941,112.0,10,2019-08-23,2019-06-29,2019-06-21,2019-06-16,55.0,8.0,5.0,24.000000,27.060118
3,577243,896036,122.0,10,2019-05-16,2019-05-12,2019-04-13,2019-03-13,4.0,29.0,31.0,14.777778,9.909311
5,577243,967949,67.0,4,2019-08-23,2019-02-13,2019-02-02,2019-01-26,191.0,11.0,7.0,69.666667,105.096781
6,577243,968036,236.0,5,2019-02-10,2019-02-02,2019-01-31,2019-01-26,8.0,2.0,5.0,5.000000,3.000000
11,577243,968429,49.0,9,2019-08-22,2019-06-21,2019-05-24,2019-05-16,62.0,28.0,8.0,24.125000,17.787937
...,...,...,...,...,...,...,...,...,...,...,...,...,...
212349,45335223,816699,61.0,8,2019-07-08,2019-06-21,2019-06-02,2019-04-20,17.0,19.0,43.0,26.333333,14.468356
212350,45335223,3165743,26.0,13,2019-08-29,2019-07-28,2019-06-18,2019-06-08,32.0,40.0,10.0,23.000000,15.362291
212351,45335223,4960959,48.0,12,2019-08-06,2019-07-31,2019-07-08,2019-07-07,6.0,23.0,1.0,12.000000,10.230673
212352,45338907,778755,105.0,11,2019-08-03,2019-07-14,2019-05-25,2019-04-23,20.0,50.0,32.0,34.000000,15.099669


### Preparing The Labels

In [37]:
labels = features.next_purchase_day

In [38]:
features = features.drop('next_purchase_day', 1)

In [39]:
labels.describe()

count    80671.000000
mean        57.712524
std         45.161480
min          1.000000
25%         25.000000
50%         46.000000
75%         79.000000
max        349.000000
Name: next_purchase_day, dtype: float64

In [40]:
features.describe()

Unnamed: 0,customer,product,frequency,day_diff,day_diff_2,day_diff_3,day_diff_mean,day_diff_std
count,80671.0,80671.0,80671.0,80671.0,80671.0,80671.0,80671.0,80671.0
mean,9278442.0,1987991.0,7.117787,31.291381,30.327937,30.960296,31.090998,23.017821
std,8213218.0,1295263.0,5.40262,28.867234,28.282869,29.678013,16.259094,16.86482
min,577243.0,114044.0,4.0,1.0,1.0,1.0,1.0,0.0
25%,3364980.0,915832.0,4.0,11.0,10.0,10.0,18.333333,10.954451
50%,6915487.0,1625941.0,5.0,23.0,22.0,21.0,28.666667,18.681542
75%,12612060.0,2940875.0,8.0,42.0,41.0,42.0,41.666667,30.512293
max,45339740.0,6058017.0,328.0,227.0,229.0,233.0,80.333333,132.805622


### Categorical to One-Hot

In [41]:
from sklearn.preprocessing import OneHotEncoder

In [42]:
def train_onehot(dataframe):
    enc = OneHotEncoder(handle_unknown='ignore')
    enc.fit(dataframe)
    return enc

In [43]:
cat_columns = ['customer', 'product']

In [44]:
onehot_model = train_onehot(features[cat_columns])

In [45]:
train_cat = onehot_model.transform(features[cat_columns])

In [46]:
train_cat.shape

(80671, 47650)

### Numerical to Categorical

In [47]:
from scipy.sparse import hstack
import numpy as np

In [48]:
def train_num2cat(dataframe, num_bins):
    bins = pd.qcut(dataframe, duplicates='drop', q=num_bins, retbins=True)[1]
    bins = np.concatenate(([-np.inf], bins[1:-1], [np.inf]))
    res_df = pd.cut(dataframe, bins).to_frame()
    oh_model = train_onehot(res_df)
    return bins, oh_model

In [49]:
def transform_num2cat(dataframe, model, bins):
    res_df = pd.cut(dataframe, bins).to_frame()
    return model.transform(res_df)

In [50]:
numer_colums = ['frequency', 'day_diff', 'day_diff_2', 'day_diff_3', 'day_diff_mean', 'day_diff_std']

In [51]:
numer_bins, numer_models = [], []

In [52]:
for col in numer_colums:
    numer_bin, numer_model = train_num2cat(features[col], 10)
    numer_bins.append(numer_bin)
    numer_models.append(numer_model)    

In [53]:
numers = [transform_num2cat(features[numer_colums[i]], numer_model, numer_bin)
          for i, (numer_bin, numer_model) in enumerate(zip(numer_bins, numer_models))]

In [54]:
train_numer = hstack(numers)

In [55]:
train_numer.shape

(80671, 56)

### Input-Output of The Model

In [56]:
train_X = hstack((train_numer, train_cat))

In [57]:
train_y = labels.to_list()

### Train-Validation Split

In [58]:
from sklearn.model_selection import train_test_split

In [59]:
X_train, X_val, y_train, y_val = train_test_split(train_X, train_y, test_size=0.1, random_state=0)

### Building & Training The Model

In [89]:
from sklearn.linear_model import Ridge

In [90]:
reg = Ridge(alpha=4)

In [91]:
reg.fit(X_train, y_train)

Ridge(alpha=4)

### Evaluation

In [92]:
reg.score(X_train, y_train)

0.48681015919175163

In [93]:
reg.score(X_val, y_val)

0.23490478809413795

### Future Predictions

In [310]:
last_purchase = X_df.groupby(['customer', 'product']).date.max().reset_index()
last_purchase.columns = ['customer', 'product', 'last_purchase']

In [311]:
last_purchase

Unnamed: 0,customer,product,last_purchase
0,577243,724112,2019-06-29
1,577243,875550,2019-06-16
2,577243,895941,2019-12-13
3,577243,895955,2019-10-29
4,577243,896036,2019-10-10
...,...,...,...
318045,45431507,4375282,2019-08-27
318046,45443053,1660612,2019-12-24
318047,45471482,1945563,2019-08-10
318048,45471482,3309954,2019-11-15


In [312]:
max_purchase = X_df.groupby(['customer', 'product']).date.max().reset_index()
max_purchase.columns = ['customer', 'product', 'last_purchase']

max_purchase['recency'] = (pd.Timestamp(2019, 12,31) - max_purchase['last_purchase']).dt.days

In [313]:
max_purchase

Unnamed: 0,customer,product,last_purchase,recency
0,577243,724112,2019-06-29,185
1,577243,875550,2019-06-16,198
2,577243,895941,2019-12-13,18
3,577243,895955,2019-10-29,63
4,577243,896036,2019-10-10,82
...,...,...,...,...
318045,45431507,4375282,2019-08-27,126
318046,45443053,1660612,2019-12-24,7
318047,45471482,1945563,2019-08-10,143
318048,45471482,3309954,2019-11-15,46


In [314]:
frequency = X_df.groupby(['customer', 'product']).date.count().reset_index()
frequency.columns = ['customer', 'product', 'frequency']

In [315]:
frequency

Unnamed: 0,customer,product,frequency
0,577243,724112,5
1,577243,875550,4
2,577243,895941,11
3,577243,895955,4
4,577243,896036,12
...,...,...,...
318045,45431507,4375282,4
318046,45443053,1660612,45
318047,45471482,1945563,4
318048,45471482,3309954,4


In [316]:
day_order = X_df[['customer', 'product', 'date']]
day_order['date'] = day_order['date'].dt.date

day_order = day_order.sort_values(['customer', 'product', 'date'])
day_order = day_order.drop_duplicates(subset=['customer', 'product', 'date'], keep='first')

day_order.head()

day_order['prev_date'] = day_order.groupby(['customer', 'product'])['date'].shift(1)
day_order['t2_date'] = day_order.groupby(['customer', 'product'])['date'].shift(2)
day_order['t3_date'] = day_order.groupby(['customer', 'product'])['date'].shift(3)

day_order.head()

day_order['day_diff'] = (day_order['date'] - day_order['prev_date']).dt.days
day_order['day_diff_2'] = (day_order['prev_date'] - day_order['t2_date']).dt.days
day_order['day_diff_3'] = (day_order['t2_date'] - day_order['t3_date']).dt.days

In [317]:
day_order

Unnamed: 0,customer,product,date,prev_date,t2_date,t3_date,day_diff,day_diff_2,day_diff_3
68489,577243,724112,2019-03-13,,,,,,
1304270,577243,724112,2019-03-25,2019-03-13,,,12.0,,
418727,577243,724112,2019-04-22,2019-03-25,2019-03-13,,28.0,12.0,
1859117,577243,724112,2019-06-16,2019-04-22,2019-03-25,2019-03-13,55.0,28.0,12.0
988009,577243,724112,2019-06-29,2019-06-16,2019-04-22,2019-03-25,13.0,55.0,28.0
...,...,...,...,...,...,...,...,...,...
1377731,45471482,4563945,2019-10-17,2019-10-04,2019-10-01,,13.0,3.0,
1695845,45471482,4563945,2019-10-29,2019-10-17,2019-10-04,2019-10-01,12.0,13.0,3.0
1121576,45471482,4563945,2019-11-10,2019-10-29,2019-10-17,2019-10-04,12.0,12.0,13.0
1535111,45471482,4563945,2019-11-11,2019-11-10,2019-10-29,2019-10-17,1.0,12.0,12.0


In [318]:
day_diff = day_order.groupby(['customer', 'product']).agg({'day_diff': ['mean','std']}).reset_index()
day_diff.columns = ['customer', 'product', 'day_diff_mean', 'day_diff_std']

In [319]:
day_diff

Unnamed: 0,customer,product,day_diff_mean,day_diff_std
0,577243,724112,27.000000,20.049938
1,577243,875550,49.333333,21.221059
2,577243,895941,32.800000,37.752998
3,577243,895955,89.666667,73.446125
4,577243,896036,25.454545,33.365742
...,...,...,...,...
318045,45431507,4375282,47.000000,26.000000
318046,45443053,1660612,3.714286,4.344917
318047,45471482,1945563,58.000000,76.315136
318048,45471482,3309954,5.000000,2.645751


In [320]:
day_order.rename(columns={'date': 'last_purchase'}, inplace=True)
last_purchase['last_purchase'] = last_purchase['last_purchase'].dt.date
day_order = pd.merge(last_purchase, day_order, on=['customer', 'product', 'last_purchase'], how='left')

In [321]:
day_order

Unnamed: 0,customer,product,last_purchase,prev_date,t2_date,t3_date,day_diff,day_diff_2,day_diff_3
0,577243,724112,2019-06-29,2019-06-16,2019-04-22,2019-03-25,13.0,55.0,28.0
1,577243,875550,2019-06-16,2019-04-13,2019-02-13,2019-01-19,64.0,59.0,25.0
2,577243,895941,2019-12-13,2019-08-23,2019-06-29,2019-06-21,112.0,55.0,8.0
3,577243,895955,2019-10-29,2019-10-17,2019-05-12,2019-02-02,12.0,158.0,99.0
4,577243,896036,2019-10-10,2019-09-15,2019-05-16,2019-05-12,25.0,122.0,4.0
...,...,...,...,...,...,...,...,...,...
318045,45431507,4375282,2019-08-27,2019-07-11,2019-06-20,2019-04-08,47.0,21.0,73.0
318046,45443053,1660612,2019-12-24,2019-12-07,2019-12-06,2019-12-04,17.0,1.0,2.0
318047,45471482,1945563,2019-08-10,2019-03-17,2019-02-27,2019-02-17,146.0,18.0,10.0
318048,45471482,3309954,2019-11-15,2019-11-11,2019-11-08,2019-10-31,4.0,3.0,8.0


### Merging Features

In [322]:
features = pd.merge(last_purchase, max_purchase[['customer', 'product', 'recency']], on=['customer', 'product'])

In [323]:
features = pd.merge(features, frequency, on=['customer', 'product'])

In [324]:
features = pd.merge(features, day_order[['customer', 'product', 'day_diff', 'day_diff_2', 'day_diff_3']], on=['customer', 'product'])

In [325]:
features = pd.merge(features, day_diff, on=['customer', 'product'])

In [326]:
features = features[['customer', 'product', 'recency', 'frequency', 'day_diff', 'day_diff_2', 'day_diff_3',
                     'day_diff_mean', 'day_diff_std']]

In [327]:
features

Unnamed: 0,customer,product,recency,frequency,day_diff,day_diff_2,day_diff_3,day_diff_mean,day_diff_std
0,577243,724112,185,5,13.0,55.0,28.0,27.000000,20.049938
1,577243,875550,198,4,64.0,59.0,25.0,49.333333,21.221059
2,577243,895941,18,11,112.0,55.0,8.0,32.800000,37.752998
3,577243,895955,63,4,12.0,158.0,99.0,89.666667,73.446125
4,577243,896036,82,12,25.0,122.0,4.0,25.454545,33.365742
...,...,...,...,...,...,...,...,...,...
318045,45431507,4375282,126,4,47.0,21.0,73.0,47.000000,26.000000
318046,45443053,1660612,7,45,17.0,1.0,2.0,3.714286,4.344917
318047,45471482,1945563,143,4,146.0,18.0,10.0,58.000000,76.315136
318048,45471482,3309954,46,4,4.0,3.0,8.0,5.000000,2.645751


In [328]:
features.describe()

Unnamed: 0,customer,product,recency,frequency,day_diff,day_diff_2,day_diff_3,day_diff_mean,day_diff_std
count,318050.0,318050.0,318050.0,318050.0,318050.0,318050.0,318050.0,318050.0,318050.0
mean,10007850.0,2141462.0,117.411181,6.836809,36.443754,34.520623,34.573067,35.119698,27.264249
std,8817177.0,1401675.0,77.911267,18.03078,37.032948,35.635061,36.489303,20.968358,22.240279
min,577243.0,113646.0,0.0,4.0,1.0,1.0,1.0,1.0,0.0
25%,3527045.0,967804.0,54.0,4.0,11.0,10.0,10.0,18.8,11.523888
50%,7368366.0,1797684.0,101.0,5.0,25.0,23.0,22.0,31.333333,21.221059
75%,13415420.0,3154877.0,169.0,7.0,49.0,46.0,46.0,47.666667,36.473735
max,45471480.0,7381721.0,359.0,7618.0,349.0,334.0,338.0,119.333333,194.279009


### Encoding

In [329]:
test_cat = onehot_model.transform(features[cat_columns])

In [330]:
numers = [transform_num2cat(features[numer_colums[i]], numer_model, numer_bin)
          for i, (numer_bin, numer_model) in enumerate(zip(numer_bins, numer_models))]

In [331]:
test_numer = hstack(numers)

In [332]:
test_X = hstack((test_numer, test_cat))

In [333]:
test_X.shape

(318050, 47706)

### Predicting

In [334]:
preds = reg.predict(test_X)

In [335]:
preds = [int(p) for p in preds]

In [336]:
out_df = last_purchase[['customer', 'product', 'last_purchase']]

In [337]:
out_df

Unnamed: 0,customer,product,last_purchase
0,577243,724112,2019-06-29
1,577243,875550,2019-06-16
2,577243,895941,2019-12-13
3,577243,895955,2019-10-29
4,577243,896036,2019-10-10
...,...,...,...
318045,45431507,4375282,2019-08-27
318046,45443053,1660612,2019-12-24
318047,45471482,1945563,2019-08-10
318048,45471482,3309954,2019-11-15


### Add Days

In [338]:
from datetime import datetime
from datetime import timedelta

In [339]:
result_date = [start + timedelta(offset) for (start, offset) in zip(out_df['last_purchase'].to_list(), preds)]

### Writing to File

In [340]:
final_df = pd.DataFrame()

In [341]:
final_df['next_purchase'] = result_date

In [342]:
final_df

Unnamed: 0,next_purchase
0,2019-10-28
1,2019-09-06
2,2020-03-02
3,2020-01-07
4,2019-12-16
...,...
318045,2019-10-04
318046,2020-01-12
318047,2019-09-30
318048,2020-02-07


In [343]:
final_df.to_csv('answer.csv', index=False)

### References:
- [Using Machine Learning to Predict Customers’ Next Purchase Day](https://towardsdatascience.com/using-machine-learning-to-predict-customers-next-purchase-day-7895ad49b4db)
- [Predicting Next Purchase Day](https://towardsdatascience.com/predicting-next-purchase-day-15fae5548027)