#Directory, Libraries and data

In [1]:
# Mount Google Drive to access files in Google Colab
from google.colab import drive
drive.mount('/content/drive')

Mounted at /content/drive


In [None]:
# Path to folder
%cd /content/drive/MyDrive/Business Analyst course/Segmentation/RFM

In [3]:
# Import libraries
import pandas as pd

In [4]:
# Load data
# Customer data
df = pd.read_csv("customer_data.csv").dropna()
df.head()

Unnamed: 0,customer_id,revenue,most_recent_visit,number_of_orders,recency_days
0,22086,777,5/14/2006,9,232
1,2290,1555,9/8/2006,16,115
2,26377,336,11/19/2006,5,43
3,24650,1189,10/29/2006,12,64
4,12883,1229,12/9/2006,12,23


#Preparing Dataframe

In [5]:
# Create the 'monetary' variable by dividing total revenue by the number of orders
df['monetary'] = df.revenue / df.number_of_orders
# Preview
df.head(1)

Unnamed: 0,customer_id,revenue,most_recent_visit,number_of_orders,recency_days,monetary
0,22086,777,5/14/2006,9,232,86.333333


In [6]:
# Change the names of variables for clarity
df.rename(columns = {'number_of_orders': 'frequency',
                     'recency_days': 'recency'}, inplace = True)
# Preview
df.head(0)

Unnamed: 0,customer_id,revenue,most_recent_visit,frequency,recency,monetary


In [7]:
# Remove unwanted variables
df = df.drop(columns = ['most_recent_visit', 'revenue'])
df.head(1)

Unnamed: 0,customer_id,frequency,recency,monetary
0,22086,9,232,86.333333


In [8]:
# Create frequency groups by quantile-based binning for 'frequency' with 3 quantiles
df['F'] = pd.qcut(df['frequency'], q = 3, labels = range(1,4,1))

# Create monetary value groups by quantile-based binning for 'monetary' with 3 quantiles
df['M'] = pd.qcut(df['monetary'], q = 3, labels = range(1,4,1))

# Create recency groups by quantile-based binning for 'recency' with 3 quantiles (reverse order: 3 is most recent)
df['R'] = pd.qcut(df['recency'], q = 3, labels = range(3,0,-1))

# Preview
df.head()

Unnamed: 0,customer_id,frequency,recency,monetary,F,M,R
0,22086,9,232,86.333333,2,1,1
1,2290,16,115,97.1875,3,2,2
2,26377,5,43,67.2,1,1,3
3,24650,12,64,99.083333,3,2,3
4,12883,12,23,102.416667,3,2,3


In [9]:
# Create RFM score by summing the recency, frequency, and monetary value groups
df['RFM'] = df[['R', 'F', 'M']].sum(axis = 1)
df.head(1)

Unnamed: 0,customer_id,frequency,recency,monetary,F,M,R,RFM
0,22086,9,232,86.333333,2,1,1,4


In [10]:
# Create RFM segmentation function to classify customers based on their RFM score
def rfm_segment(df):
    # Classify as 'Superstar' if RFM score is 8 or higher
    if df['RFM'] >= 8:
        return 'Superstar'
    # Classify as 'High Potential' if RFM score is between 5 and 7
    elif (df['RFM'] >= 5) and (df['RFM'] < 8):
        return 'High Potential'
    # Classify as 'Low Relevance' if RFM score is below 5
    else:
        return 'Low Relevance'


#RFM

In [11]:
# Apply the RFM segmentation function to the DataFrame
df['RFM_level'] = df.apply(rfm_segment, axis = 1)
df.head()

Unnamed: 0,customer_id,frequency,recency,monetary,F,M,R,RFM,RFM_level
0,22086,9,232,86.333333,2,1,1,4,Low Relevance
1,2290,16,115,97.1875,3,2,2,7,High Potential
2,26377,5,43,67.2,1,1,3,5,High Potential
3,24650,12,64,99.083333,3,2,3,8,Superstar
4,12883,12,23,102.416667,3,2,3,8,Superstar


In [12]:
# Analyze the characteristics of each RFM segment
df.groupby('RFM_level').agg({
    'recency': 'mean',               # Average recency for each segment
    'frequency': 'mean',             # Average frequency of purchases for each segment
    'monetary': ['mean', 'count']    # Average monetary value and count of customers in each segment
}).round(1)

Unnamed: 0_level_0,recency,frequency,monetary,monetary
Unnamed: 0_level_1,mean,mean,mean,count
RFM_level,Unnamed: 1_level_2,Unnamed: 2_level_2,Unnamed: 3_level_2,Unnamed: 4_level_2
High Potential,171.8,9.8,97.0,26445
Low Relevance,306.6,7.1,78.5,7179
Superstar,80.1,12.8,108.3,6375
