## RFM Analysis 

In [1]:
# General Dependencies
import os
import numpy as np
import pandas as pd

# Warnings
import warnings
warnings.filterwarnings('ignore')

In [2]:
# Read the data

input_csv = input("Enter the input customers file to analyze: ")
file = os.path.join("data", input_csv)

try:
    df_data = pd.read_csv(file)
except:
    print ("File : " + input_csv  + " cannot find in : " + csvpath)
    exit()
    
print('Dataframe dimensions:', df_data.shape)
df_data.describe()

Enter the input customers file to analyze: cust17.csv
Dataframe dimensions: (493, 6)


Unnamed: 0,NumberOrders,TotalSpent,TotalItems,DaysAsCustomer,DaysNoPurchase
count,493.0,493.0,493.0,493.0,493.0
mean,2.894523,643.115619,2.910751,221.772819,173.306288
std,4.015852,997.711351,4.022314,84.998285,88.884468
min,1.0,29.0,1.0,1.0,1.0
25%,1.0,142.0,1.0,184.0,123.0
50%,2.0,326.0,2.0,215.0,184.0
75%,3.0,703.0,3.0,276.0,215.0
max,36.0,9800.0,36.0,335.0,335.0


In [3]:
df_rfm = df_data.dropna(how='all')
print('Dataframe dimensions:', df_data.shape)

Dataframe dimensions: (493, 6)


In [4]:
df_rfm.head(10)

Unnamed: 0,CustomerID,NumberOrders,TotalSpent,TotalItems,DaysAsCustomer,DaysNoPurchase
0,DL1005,7,1832.0,7.0,276.0,92.0
1,DL1006,5,200.0,5.0,184.0,184.0
2,DL1007,21,4272.0,21.0,335.0,154.0
3,DL1009,4,932.0,4.0,215.0,62.0
4,DL1012,1,184.0,1.0,123.0,123.0
5,DL1013,2,460.0,2.0,62.0,31.0
6,DL1016,1,343.0,1.0,184.0,184.0
7,DL1017,3,957.0,3.0,123.0,31.0
8,DL1018,2,108.0,2.0,184.0,184.0
9,DL1020,4,1161.0,4.0,276.0,184.0


## RFM Analysis

RFM (Recency, Frequency, Monetary) analysis is a customer segmentation technique that uses past purchase behavior to divide customers into groups. RFM helps divide customers into various categories or clusters to identify customers who are more likely to respond to promotions and also for future personalization services.

* RECENCY (R): Days since last purchase => df_customers['Days without purchase'] 
* FREQUENCY (F): Total number of purchases => df_customers['Number Orders']
* MONETARY VALUE (M): Total money this customer spent  => df_customers['Total Spent']

In [5]:
quantiles = df_rfm.quantile(q=[0.25,0.5,0.75])
quantiles

Unnamed: 0,NumberOrders,TotalSpent,TotalItems,DaysAsCustomer,DaysNoPurchase
0.25,1.0,142.0,1.0,184.0,123.0
0.5,2.0,326.0,2.0,215.0,184.0
0.75,3.0,703.0,3.0,276.0,215.0


In [6]:
quantiles.to_dict()

{'DaysAsCustomer': {0.25: 184.0, 0.5: 215.0, 0.75: 276.0},
 'DaysNoPurchase': {0.25: 123.0, 0.5: 184.0, 0.75: 215.0},
 'NumberOrders': {0.25: 1.0, 0.5: 2.0, 0.75: 3.0},
 'TotalItems': {0.25: 1.0, 0.5: 2.0, 0.75: 3.0},
 'TotalSpent': {0.25: 142.0, 0.5: 326.0, 0.75: 703.0}}

In [7]:
# Identify and separate big spenders, lots of orders, long-time customers, 
# dormant customers for sales and marketing campaign use

# RECENCY VALUE (R): Days since last purchase => df_customers['DaysNoPurchase']
# These customers are loyal 

# We calculate Loyal customer base on >= 50% DaysAsCustomer and <= 25% DaysNoPurchase 
 
def loyal_customer(row):
    if ( row['DaysNoPurchase'] <= quantiles['DaysNoPurchase'][0.25] ):
        return 4 
    elif ( row['DaysNoPurchase'] <= quantiles['DaysNoPurchase'][0.50] ):
        return 3
    elif (row['DaysNoPurchase'] <= quantiles['DaysNoPurchase'][0.75]):
        return 2
    else:
        return 1 

In [8]:
# MONETARY VALUE (M): Total money this customer spent => df_customers['Total Spent']

def big_spender(row):
    if row['TotalSpent'] <= quantiles['TotalSpent'][0.25]:
        return 1
    elif row['TotalSpent'] <= quantiles['TotalSpent'][0.50]:
        return 2
    elif row['TotalSpent'] <= quantiles['TotalSpent'][0.75]:
        return 3
    else:
        return 4  

In [9]:
# FREQUENCY VALUE (F):  Total number of purchases => df_customers['Number Orders']

def many_orders(row):
    if row['NumberOrders'] <= quantiles['NumberOrders'][0.25]:
        return 1
    elif row['NumberOrders'] <= quantiles['NumberOrders'][0.50]:
        return 2
    elif row['NumberOrders'] <= quantiles['NumberOrders'][0.75]:
        return 3
    else:
        return 4  

In [10]:
df_rfm['LoyalCustomer'] = df_rfm.apply(loyal_customer, axis=1)
df_rfm['ManyOrders'] = df_rfm.apply(many_orders, axis=1)
df_rfm['BigSpender'] = df_rfm.apply(big_spender, axis=1)

df_rfm['RFMScore'] = df_rfm['LoyalCustomer'].map(str) \
                    + df_rfm['ManyOrders'].map(str) \
                    + df_rfm['BigSpender'].map(str)
df_rfm.head()

Unnamed: 0,CustomerID,NumberOrders,TotalSpent,TotalItems,DaysAsCustomer,DaysNoPurchase,LoyalCustomer,ManyOrders,BigSpender,RFMScore
0,DL1005,7,1832.0,7.0,276.0,92.0,4,4,4,444
1,DL1006,5,200.0,5.0,184.0,184.0,3,4,2,342
2,DL1007,21,4272.0,21.0,335.0,154.0,3,4,4,344
3,DL1009,4,932.0,4.0,215.0,62.0,4,4,4,444
4,DL1012,1,184.0,1.0,123.0,123.0,4,1,2,412


## How many Customers do we have in each segment?


In [11]:
print("Best Customers: ",len(df_rfm[df_rfm['RFMScore']=='444']))
print('Loyal Customers: ',len(df_rfm[df_rfm['LoyalCustomer']==4]))
print("Big Spenders: ",len(df_rfm[df_rfm['BigSpender']==4]))

almost_lost = len(df_rfm[df_rfm['RFMScore']=='244'])
almost_lost += len(df_rfm[df_rfm['RFMScore']=='233'])
print('Almost Lost: ', almost_lost)

lost_customers = len(df_rfm[df_rfm['RFMScore']=='144'])
lost_customers += len(df_rfm[df_rfm['RFMScore']=='133'])
print('Lost Customers: ',lost_customers)

print('Lost Cheap Customers: ',len(df_rfm[df_rfm['RFMScore']=='111']))

Best Customers:  52
Loyal Customers:  159
Big Spenders:  123
Almost Lost:  11
Lost Customers:  7
Lost Cheap Customers:  46


In [12]:
# Save rfm datasets to csv file for Forecasting

if input_csv == 'cust17.csv' :
    df_rfm.to_csv('data/rfm17.csv',index=False)
elif input_csv == 'cust18.csv' :   
    df_rfm.to_csv('data/rfm18.csv',index=False)
