## RFM Analysis 

In [1]:
# General Dependencies

import numpy as np
import pandas as pd

# Warnings
import warnings
warnings.filterwarnings('ignore')

In [3]:
# Read the data

try:
    df_data17 = pd.read_csv('data/cust17.csv')
    df_data18 = pd.read_csv('data/cust18.csv')
except:
    print ("Datasets cust17.csv and cust18.csv cannot find " )
    exit() 

In [5]:
df_rfm17 = df_data17.dropna(how='all')
print('Dataframe dimensions:', df_rfm17.shape)
df_rfm17.head(10)

Dataframe dimensions: (493, 6)


Unnamed: 0,CustomerID,NumberOrders,TotalSpent,TotalItems,DaysAsCustomer,DaysNoPurchase
0,DL1005,7,1832.0,7.0,276.0,92.0
1,DL1006,5,200.0,5.0,184.0,184.0
2,DL1007,21,4272.0,21.0,335.0,154.0
3,DL1009,4,932.0,4.0,215.0,62.0
4,DL1012,1,184.0,1.0,123.0,123.0
5,DL1013,2,460.0,2.0,62.0,31.0
6,DL1016,1,343.0,1.0,184.0,184.0
7,DL1017,3,957.0,3.0,123.0,31.0
8,DL1018,2,108.0,2.0,184.0,184.0
9,DL1020,4,1161.0,4.0,276.0,184.0


In [6]:
df_rfm18 = df_data18.dropna(how='all')
print('Dataframe dimensions:', df_rfm18.shape)
df_rfm18.head(10)

Dataframe dimensions: (181, 6)


Unnamed: 0,CustomerID,NumberOrders,TotalSpent,TotalItems,DaysAsCustomer,DaysNoPurchase
0,DL1004,4,903.0,4.0,29.0,1.0
1,DL1005,2,749.0,2.0,29.0,1.0
2,DL1007,7,1552.0,7.0,60.0,1.0
3,DL1011,1,398.0,1.0,29.0,29.0
4,DL1012,3,3414.0,14.0,29.0,1.0
5,DL1013,4,671.0,4.0,60.0,60.0
6,DL1014,3,1165.0,3.0,29.0,1.0
7,DL1015,3,963.0,3.0,60.0,29.0
8,DL5001,1,1703.0,1.0,1.0,1.0
9,FF1002,2,228.0,2.0,60.0,60.0


## RFM Analysis

RFM (Recency, Frequency, Monetary) analysis is a customer segmentation technique that uses past purchase behavior to divide customers into groups. RFM helps divide customers into various categories or clusters to identify customers who are more likely to respond to promotions and also for future personalization services.

* RECENCY (R): Days since last purchase => df_customers['Days without purchase'] 
* FREQUENCY (F): Total number of purchases => df_customers['Number Orders']
* MONETARY VALUE (M): Total money this customer spent  => df_customers['Total Spent']

In [None]:
# Identify and separate big spenders, lots of orders, long-time customers, 
# dormant customers for sales and marketing campaign use

# RECENCY VALUE (R): Days since last purchase => df_customers['DaysNoPurchase']
# These customers are loyal 

# We calculate Loyal customer base on >= 50% DaysAsCustomer and <= 25% DaysNoPurchase 

def loyal_customer(row):
    if (( row['DaysAsCustomer'] >= quantiles['DaysAsCustomer'][0.50] )  and 
       ( row['DaysNoPurchase'] <= quantiles['DaysNoPurchase'][0.25] )):
        return 4 
    elif (( row['DaysAsCustomer'] >= quantiles['DaysAsCustomer'][0.50] ) and 
         ( row['DaysNoPurchase'] <= quantiles['DaysNoPurchase'][0.50] )):
        return 3
    elif ( ( row['DaysAsCustomer'] >= quantiles['DaysAsCustomer'][0.50] ) and 
        (row['DaysNoPurchase'] <= quantiles['DaysNoPurchase'][0.75])):
        return 2
    elif ((row['DaysAsCustomer'] >= quantiles['DaysAsCustomer'][0.50]) and 
       (row['DaysNoPurchase'] >= quantiles['DaysNoPurchase'][0.75])):
        return 1
    elif ((row['DaysAsCustomer'] >= quantiles['DaysAsCustomer'][0.25]) and 
       (row['DaysNoPurchase'] <= quantiles['DaysNoPurchase'][0.25])):
        return 3
    elif ((row['DaysAsCustomer'] >= quantiles['DaysAsCustomer'][0.25]) and 
       (row['DaysNoPurchase'] <= quantiles['DaysNoPurchase'][0.50])):
        return 2
    else:
        return 1    

In [None]:
# MONETARY VALUE (M): Total money this customer spent => df_customers['Total Spent']

def big_spender(row):
    if row['TotalSpent'] <= quantiles['TotalSpent'][0.25]:
        return 1
    elif row['TotalSpent'] <= quantiles['TotalSpent'][0.50]:
        return 2
    elif row['TotalSpent'] <= quantiles['TotalSpent'][0.75]:
        return 3
    else:
        return 4  

In [None]:
# FREQUENCY VALUE (F):  Total number of purchases => df_customers['Number Orders']

def many_orders(row):
    if row['NumberOrders'] <= quantiles['NumberOrders'][0.25]:
        return 1
    elif row['NumberOrders'] <= quantiles['NumberOrders'][0.50]:
        return 2
    elif row['NumberOrders'] <= quantiles['NumberOrders'][0.75]:
        return 3
    else:
        return 4  

In [None]:
def RFM_analysis(df):
    quantiles = df_rfm.quantile(q=[0.25,0.5,0.75])
    quantiles.to_dict()
    
    df_rfm['LoyalCustomer'] = df_rfm.apply(loyal_customer, axis=1)
    df_rfm['ManyOrders'] = df_rfm.apply(many_orders, axis=1)
    df_rfm['BigSpender'] = df_rfm.apply(big_spender, axis=1)

    df_rfm['RFMScore'] = df_rfm['LoyalCustomer'].map(str) \
                    + df_rfm['ManyOrders'].map(str) \
                    + df_rfm['BigSpender'].map(str)
            
    # How many customers do we have in each segment?
    print("Best Customers: ",len(df_rfm[df_rfm['RFMScore']=='444']))
    print('Loyal Customers: ',len(df_rfm[df_rfm['LoyalCustomer']==4]))
print("Big Spenders: ",len(df_rfm[df_rfm['BigSpender']==4]))

almost_lost = len(df_rfm[df_rfm['RFMScore']=='244'])
almost_lost += len(df_rfm[df_rfm['RFMScore']=='233'])
print('Almost Lost: ', almost_lost)

lost_customers = len(df_rfm[df_rfm['RFMScore']=='144'])
lost_customers += len(df_rfm[df_rfm['RFMScore']=='133'])
print('Lost Customers: ',lost_customers)

print('Lost Cheap Customers: ',len(df_rfm[df_rfm['RFMScore']=='111']))
    

## How many Customers do we have in each segment?


In [None]:
# Save rfm datasets to csv file for Forecasting

if input_csv == 'cust17.csv' :
    df_rfm.to_csv('data/rfm17.csv',index=False)
elif input_csv == 'cust18.csv' :   
    df_rfm.to_csv('data/rfm18.csv',index=False)
else :
    df_rfm.to_csv('data/rfm.csv',index=False)