In [3]:
import sys
import datetime as dt
import pandas as pd
import numpy as np


In [4]:
data_path = sys.path[0] + "/data/Customer_Transaction.csv"

In [5]:
## Report day: 1/9/2022

In [6]:
customer_transaction_df = pd.read_csv(data_path)
customer_transaction_df.sample(5)

Unnamed: 0,Transaction_ID,CustomerID,Purchase_Date,GMV
907875,907875,1031813,7/1/2022,75000
688567,688567,4229091,8/1/2022,80000
596223,596223,1065133,6/1/2022,85000
491124,491124,1782757,8/1/2022,75000
521223,521223,535534,7/1/2022,75000


In [7]:
customer_transaction_df.info()

<class 'pandas.core.frame.DataFrame'>
RangeIndex: 1048575 entries, 0 to 1048574
Data columns (total 4 columns):
 #   Column          Non-Null Count    Dtype 
---  ------          --------------    ----- 
 0   Transaction_ID  1048575 non-null  int64 
 1   CustomerID      1048575 non-null  int64 
 2   Purchase_Date   1048575 non-null  object
 3   GMV             1048575 non-null  int64 
dtypes: int64(3), object(1)
memory usage: 32.0+ MB


In [8]:
customer_transaction_df["Purchase_Date"] = pd.to_datetime(customer_transaction_df["Purchase_Date"])

In [9]:
milestone = dt.datetime(2022, 9, 1)
rfm_df = customer_transaction_df.groupby("CustomerID", as_index = False).agg({
                                                                    "Purchase_Date": lambda x: (milestone - x.max()).days,
                                                                     "Transaction_ID": "count",
                                                                     "GMV": "sum"})
rfm_df = rfm_df.rename(columns = {"Purchase_Date": "Recency", "Transaction_ID": "Frequency", "GMV": "Monetary"})
rfm_df.sample(5)

Unnamed: 0,CustomerID,Recency,Frequency,Monetary
461448,2737482,92,1,95000
215855,1609279,31,1,70000
292261,1993277,62,1,75000
56972,659664,62,1,75000
379685,2389081,31,1,135000


In [10]:
quantile_dict = rfm_df.quantile([0.25, 0.5, 0.75], numeric_only = True).to_dict()
quantile_dict

{'CustomerID': {0.25: 1711208.5, 0.5: 2777706.0, 0.75: 3600816.0},
 'Recency': {0.25: 31.0, 0.5: 62.0, 0.75: 92.0},
 'Frequency': {0.25: 1.0, 0.5: 1.0, 0.75: 1.0},
 'Monetary': {0.25: 70000.0, 0.5: 75000.0, 0.75: 95000.0}}

In [11]:
def recency_score(dict_var, key, value):
    if (value < dict_var[key][0.25]):
        return 4
    if (value < dict_var[key][0.5]):
        return 3
    if (value < dict_var[key][0.75]):
        return 2
    else: return 1
    

In [12]:
def frequency_and_monetary_score(dict_var, key, value):
    if (value <= dict_var[key][0.25]):
        return 1
    if (value <= dict_var[key][0.5]):
        return 2
    if (value <= dict_var[key][0.75]):
        return 3
    else: return 4

In [13]:
rfm_df["RecencyScore"] = rfm_df["Recency"].apply(lambda x: recency_score(quantile_dict, "Recency", x))
rfm_df["FrequencyScore"] = rfm_df["Frequency"].apply(lambda x: frequency_and_monetary_score(quantile_dict, "Frequency", x))
rfm_df["MonetaryScore"] = rfm_df["Monetary"].apply(lambda x: frequency_and_monetary_score(quantile_dict, "Monetary", x))
rfm_df.sample(5)

Unnamed: 0,CustomerID,Recency,Frequency,Monetary,RecencyScore,FrequencyScore,MonetaryScore
899397,4189007,31,1,70000,3,1,1
249632,1784264,31,2,190000,3,4,4
864946,4084729,92,1,70000,1,1,1
834756,3995844,31,1,70000,3,1,1
249760,1784959,31,2,230000,3,4,4


In [14]:
rfm_df["RFMScore"] = rfm_df["RecencyScore"].astype(str) + rfm_df["FrequencyScore"].astype(str) +rfm_df["MonetaryScore"].astype(str)
rfm_df.sample(5)

Unnamed: 0,CustomerID,Recency,Frequency,Monetary,RecencyScore,FrequencyScore,MonetaryScore,RFMScore
270611,1888538,31,1,75000,3,1,2,312
940989,4382464,4,1,9033,4,1,1,411
567131,3134869,31,2,150000,3,4,4,344
245548,1763203,62,1,75000,2,1,2,212
569633,3143683,62,1,75000,2,1,2,212


In [1]:
segment_map = {
    "1[1-2][1-2]": "Lost" ,
    "2[1-2][1-2]": "Hibernating",
    "2[3-4][1-2]": "About To Sleep",
    "1[3-4][1-2]": "Needs Attention",
    "2[1-4][3-4]": "At Risk",
    "1[1-4][3-4]": "Can\'t Loose Them",
    "[3-4][1-2][1-2]": "New Customers",
    "[3-4][1-2][3-4]": "Promising",
    "[3-4][3-4][1-2]": "Potential Loyalist",
    "[3-4][3-4][3]": "Loyal Customers",
    "[3-4][3-4][4]": "Champions" 
}

In [15]:
rfm_df["CustomerSegmentaion"] = rfm_df["RFMScore"].replace(segment_map, regex = True)
rfm_df.sample(5)

Unnamed: 0,CustomerID,Recency,Frequency,Monetary,RecencyScore,FrequencyScore,MonetaryScore,RFMScore,CustomerSegmentaion
833234,3991221,31,1,80000,3,1,3,313,Promising
868580,4095263,92,1,70000,1,1,1,111,Lost
676038,3500487,92,1,70000,1,1,1,111,Lost
100917,969603,92,1,115000,1,1,4,114,Can't Loose Them
123114,1101544,62,1,75000,2,1,2,212,Hibernating
