In [1]:
import pandas as pd
import numpy as np
import rfm_analysis as rfm

In [2]:
# Let's check the docstring
help(rfm)

Help on module rfm_analysis:

NAME
    rfm_analysis

FUNCTIONS
    rfm_analysis(df, cid, r, f, m, method='simple', weight=[0.25, 0.25, 0.5])
        Objective:
        Consumer segmentation based on a simple RFM analysis
        
        Arguments:
        - df = dataframe to segment, type pandas.DataFrame
        - cid = column name for customer unique ID
        - r = column name for recency data, column type should be numeric
        - f = column name for frequency data, column type should be numeric
        - m = column name for monetary data, column type should be numeric
        - method = {'simple' or 'fr-grid'}, default 'simple'
        - weight = list containing weight of each rfm variables, used if method = 'simple'
            expected value = [r_weight, f_weight, m_weight], default [1/4,1/4,1/2]
        
        Return: 
        Pandas.DataFrame containing customer ID, rfm rank, score, and segmentation label

FILE
    c:\users\asif\downloads\almas bootcamp rakamin!\7. week 

In [3]:
df = pd.read_csv('df_rfm.csv')
df.head()

Unnamed: 0,Customer ID,recency,frequency,monetary
0,AA-10315,184,11,16073.344
1,AA-10375,19,15,2734.83
2,AA-10480,259,12,5493.936
3,AA-10645,55,18,22927.855
4,AB-10015,415,6,1984.48


In [4]:
result1 = rfm.rfm_analysis(df = df, cid='Customer ID', r = 'recency', f = 'frequency', m = 'monetary', method = 'simple', weight = [1/4,1/4,2/4])
result1

Unnamed: 0,Customer ID,recency,r_rank,frequency,f_rank,monetary,m_rank,rfm_score,segment
0,AA-10315,184,2,11,3,16073.344,4,3.25,Medium-value Customer
1,AA-10375,19,5,15,4,2734.830,1,2.75,Low-value Customer
2,AA-10480,259,1,12,3,5493.936,2,2.00,Low-value Customer
3,AA-10645,55,3,18,4,22927.855,5,4.25,High-value Customer
4,AB-10015,415,1,6,1,1984.480,1,1.00,Lost Customer
...,...,...,...,...,...,...,...,...,...
788,XP-21865,43,4,28,5,8676.214,3,3.75,Medium-value Customer
789,YC-21895,4,5,8,2,27208.926,5,4.25,High-value Customer
790,YS-21880,9,5,12,3,46478.992,5,4.50,High-value Customer
791,ZC-21910,54,3,31,5,40212.889,5,4.50,High-value Customer


In [5]:
result1.groupby('segment').agg({'Customer ID' : 'count'}).reset_index()

Unnamed: 0,segment,Customer ID
0,Lost Customer,108
1,Low-value Customer,305
2,Medium-value Customer,241
3,High-value Customer,98
4,Top Customer,41


In [6]:
result2 = rfm.rfm_analysis(df = df, cid='Customer ID', r = 'recency', f = 'frequency', m = 'monetary', method = 'fr-grid')
result2

Unnamed: 0,Customer ID,recency,r_rank,frequency,f_rank,segment
0,AA-10315,184,2,11,3,At Risk
1,AA-10375,19,5,15,4,Champions
2,AA-10480,259,1,12,3,At Risk
3,AA-10645,55,3,18,4,Loyal Customers
4,AB-10015,415,1,6,1,Hibernating
...,...,...,...,...,...,...
788,XP-21865,43,4,28,5,Loyal Customers
789,YC-21895,4,5,8,2,Potential Loyalist
790,YS-21880,9,5,12,3,Potential Loyalist
791,ZC-21910,54,3,31,5,Loyal Customers


In [7]:
result2.groupby('segment').agg({'Customer ID' : 'count'}).reset_index()

Unnamed: 0,segment,Customer ID
0,About To Sleep,67
1,At Risk,113
2,Can't Lose Them,35
3,Champions,85
4,Hibernating,168
5,Loyal Customers,137
6,Need Attention,29
7,New customers,22
8,Potential Loyalist,119
9,Promising,18


In [8]:
# Let's use different data set
df1 = pd.read_csv('contoh_db_rfm.csv')

In [9]:
result3 = rfm.rfm_analysis(df = df1, cid='customer_id', r = 'recency_day', f = 'frequency_number', m = 'monetary_total', method = 'simple')
result3

Unnamed: 0,customer_id,recency_day,r_rank,frequency_number,f_rank,monetary_total,m_rank,rfm_score,segment
0,1,4,5,6,4,540,4,4.25,High-value Customer
1,2,6,4,11,5,940,4,4.25,High-value Customer
2,3,46,1,1,1,35,1,1.0,Lost Customer
3,4,23,2,3,2,65,2,2.0,Low-value Customer
4,5,15,3,4,3,179,3,3.0,Low-value Customer
5,6,32,2,2,2,56,2,2.0,Low-value Customer
6,7,7,4,3,2,140,3,3.0,Low-value Customer
7,8,50,1,1,1,950,5,3.0,Low-value Customer
8,9,34,1,15,5,2630,5,4.0,Medium-value Customer
9,10,10,3,5,4,191,3,3.25,Medium-value Customer


In [10]:
result4 = rfm.rfm_analysis(df = df1, cid='customer_id', r = 'recency_day', f = 'frequency_number', m = 'monetary_total', method = 'fr-grid')
result4

Unnamed: 0,customer_id,recency_day,r_rank,frequency_number,f_rank,segment
0,1,4,5,6,4,Champions
1,2,6,4,11,5,Loyal Customers
2,3,46,1,1,1,Hibernating
3,4,23,2,3,2,Hibernating
4,5,15,3,4,3,Need Attention
5,6,32,2,2,2,Hibernating
6,7,7,4,3,2,Potential Loyalist
7,8,50,1,1,1,Hibernating
8,9,34,1,15,5,Can't Lose Them
9,10,10,3,5,4,Loyal Customers


In [11]:
# Let's try if the error handling function worked, here we use weight != 1
result3 = rfm.rfm_analysis(df = df1, cid='customer_id', r = 'recency_day', f = 'frequency_number', m = 'monetary_total', method = 'simple', weight = [0.3,0.3,0.3])

Error. 
        Usually this is caused by datatype error. 
        Try checking your data, each data type for recency, frequency and monetary column should be numeric 

<class 'pandas.core.frame.DataFrame'>
RangeIndex: 15 entries, 0 to 14
Data columns (total 4 columns):
 #   Column            Non-Null Count  Dtype
---  ------            --------------  -----
 0   customer_id       15 non-null     int64
 1   recency_day       15 non-null     int64
 2   frequency_number  15 non-null     int64
 3   monetary_total    15 non-null     int64
dtypes: int64(4)
memory usage: 608.0 bytes
None

 If not, try checking the weight, the sum of the weight should be exactly 1
[0.3, 0.3, 0.3]
0.8999999999999999
