In [1]:
import numpy as np
import pandas as pd
import matplotlib.pyplot as plt
import datetime as dt

In [2]:
data = pd.read_csv('../data/mini.csv', parse_dates=['occurence'])
data.head()

Unnamed: 0,customer_id,occurence,cost,item_id
0,416705,2017-05-07 21:58:10,299.0,515274
1,13891,2018-02-10 17:35:11,1090.0,828115
2,9081,2017-12-21 17:13:44,499.0,695501
3,470904,2017-10-31 10:39:49,290.0,899821
4,58500,2018-03-09 20:57:29,150.0,518554


In [3]:
print(data['occurence'].min(), data['occurence'].max())

2017-01-01 12:33:40 2018-07-23 10:38:37


In [4]:
sd = dt.datetime(2018,8,1)
data['recency_in_days']= sd - data['occurence']
data['recency_in_days'] = data['recency_in_days'].astype('timedelta64[D]')
data.head()

Unnamed: 0,customer_id,occurence,cost,item_id,recency_in_days
0,416705,2017-05-07 21:58:10,299.0,515274,450.0
1,13891,2018-02-10 17:35:11,1090.0,828115,171.0
2,9081,2017-12-21 17:13:44,499.0,695501,222.0
3,470904,2017-10-31 10:39:49,290.0,899821,273.0
4,58500,2018-03-09 20:57:29,150.0,518554,144.0


In [5]:
data = data[data['recency_in_days'] < 730]

In [30]:
rfm_table_users = data.groupby(['item_id', 'customer_id']).agg({'recency_in_days': lambda x: x.min(), # Recency
                                            'customer_id': lambda x: len(x), # Frequency
                                            'cost': lambda x: x.sum()}) # Monetary_value
rfm_table_users.rename(columns={'recency_in_days': 'recency',
                         'customer_id': 'frequency',
                         'cost': 'monetary_value'}, inplace=True)

In [31]:
rfm_table_users

Unnamed: 0_level_0,Unnamed: 1_level_0,recency,frequency,monetary_value
item_id,customer_id,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1
000006,429842,436.0,1,1520.0
000030,502,479.0,1,1791.0
000059,53615,527.0,1,990.0
000059,102820,351.0,1,687.0
000061,35139,255.0,1,995.0
000061,46756,370.0,1,945.0
000061,53615,527.0,1,990.0
000061,207857,535.0,1,990.0
000061,272143,315.0,1,141.0
000061,282037,500.0,1,990.0


In [11]:
data[data['customer_id'] == 627746]

Unnamed: 0,customer_id,occurence,cost,item_id,recency_in_days
359171,627746,2018-07-23 10:05:58,20.0,1016155,8.0


In [32]:
quartiles_users = rfm_table_users.quantile(q=[0.2,0.4,0.6,0.8]).to_dict()

In [33]:
def RClass(x,p,q):
    if x <= q[p][0.2]:
        return 5
    elif x <= q[p][0.4]:
        return 4
    elif x <= q[p][0.6]:
        return 3
    elif x <= q[p][0.8]:
        return 2
    else:
        return 1
    
def FMClass(x,p,q):
    if x <= q[p][0.2]:
        return 1
    elif x <= q[p][0.4]:
        return 2
    elif x <= q[p][0.6]:
        return 3
    elif x <= q[p][0.8]:
        return 4
    else:
        return 5

In [34]:
rfm_seg_users = rfm_table_users
rfm_seg_users['R_Quartile'] = rfm_seg_users['recency'].apply(RClass, args=('recency', quartiles_users))
rfm_seg_users['F_Quartile'] = rfm_seg_users['frequency'].apply(FMClass, args=('frequency',quartiles_users))
rfm_seg_users['M_Quartile'] = rfm_seg_users['monetary_value'].apply(FMClass, args=('monetary_value',quartiles_users))

In [35]:
rfm_seg_users['RFMClass'] = rfm_seg_users.R_Quartile.map(str) \
                            + rfm_seg_users.F_Quartile.map(str) \
                            + rfm_seg_users.M_Quartile.map(str)

In [36]:
rfm_seg_users.head()

Unnamed: 0_level_0,Unnamed: 1_level_0,recency,frequency,monetary_value,R_Quartile,F_Quartile,M_Quartile,RFMClass
item_id,customer_id,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1,Unnamed: 5_level_1,Unnamed: 6_level_1,Unnamed: 7_level_1,Unnamed: 8_level_1
6,429842,436.0,1,1520.0,2,1,3,213
30,502,479.0,1,1791.0,1,1,3,113
59,53615,527.0,1,990.0,1,1,3,113
59,102820,351.0,1,687.0,2,1,2,212
61,35139,255.0,1,995.0,3,1,3,313


In [37]:
rfm_seg_users['Rating'] = rfm_seg_users['R_Quartile'] + rfm_seg_users['F_Quartile'] +rfm_seg_users['M_Quartile']

In [38]:
quartiles_users = rfm_table_users.quantile(q=[0.2,0.4,0.6,0.8]).to_dict()

In [39]:
rfm_seg_users['Rating'] = rfm_seg_users['Rating'].apply(FMClass, args=('Rating', quartiles_users))

In [41]:
rfm_seg_users

Unnamed: 0_level_0,Unnamed: 1_level_0,recency,frequency,monetary_value,R_Quartile,F_Quartile,M_Quartile,RFMClass,Rating
item_id,customer_id,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1,Unnamed: 5_level_1,Unnamed: 6_level_1,Unnamed: 7_level_1,Unnamed: 8_level_1,Unnamed: 9_level_1
000006,429842,436.0,1,1520.0,2,1,3,213,2
000030,502,479.0,1,1791.0,1,1,3,113,1
000059,53615,527.0,1,990.0,1,1,3,113,1
000059,102820,351.0,1,687.0,2,1,2,212,1
000061,35139,255.0,1,995.0,3,1,3,313,2
000061,46756,370.0,1,945.0,2,1,3,213,2
000061,53615,527.0,1,990.0,1,1,3,113,1
000061,207857,535.0,1,990.0,1,1,3,113,1
000061,272143,315.0,1,141.0,2,1,1,211,1
000061,282037,500.0,1,990.0,1,1,3,113,1
