<a href="https://colab.research.google.com/github/Ronimol23/Ronimol23/blob/main/customer_segmentation_using_rfm_for_online_customers.ipynb" target="_parent"><img src="https://colab.research.google.com/assets/colab-badge.svg" alt="Open In Colab"/></a>

In [1]:
%cd /content/drive/MyDrive/Segmentation-20220613T115039Z-001/Segmentation/RFM

/content/drive/MyDrive/Segmentation-20220613T115039Z-001/Segmentation/RFM


# Libraries and Data

In [2]:
# Libraries
import pandas as pd
import matplotlib.pyplot as plt
from datetime import timedelta

In [3]:
# Data
Dataset = pd.read_csv("customer_data.csv")
Dataset.head()


Unnamed: 0,customer_id,revenue,most_recent_visit,number_of_orders,recency_days
0,22086,777,5/14/2006,9,232
1,2290,1555,9/8/2006,16,115
2,26377,336,11/19/2006,5,43
3,24650,1189,10/29/2006,12,64
4,12883,1229,12/9/2006,12,23


In [33]:
from pandas.core.base import DataError
Dataset['Monetary'] = (Dataset['revenue'] / Dataset['number_of_orders']).round(2)
Dataset.head()

Unnamed: 0,customer_id,revenue,most_recent_visit,number_of_orders,recency_days,Sales,Monetary
0,22086,777,5/14/2006,9,232,86.33,86.33
1,2290,1555,9/8/2006,16,115,97.19,97.19
2,26377,336,11/19/2006,5,43,67.2,67.2
3,24650,1189,10/29/2006,12,64,99.08,99.08
4,12883,1229,12/9/2006,12,23,102.42,102.42


In [34]:
df = Dataset.groupby(['customer_id']).agg({
    'recency_days': 'sum',
    'number_of_orders': 'sum',
    'Monetary':'sum',
})
df.head()


Unnamed: 0_level_0,recency_days,number_of_orders,Monetary
customer_id,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1
1,229,10,131.3
2,467,11,111.82
3,143,13,91.85
4,222,9,88.22
5,659,3,92.67


In [35]:
#Renaming variables
df.rename(columns={
    'recency_days': 'Recency',
    'number_of_orders': 'Frequency',
    'Monetary': 'Monetary'
}, inplace=True)
df.head()

Unnamed: 0_level_0,Recency,Frequency,Monetary
customer_id,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1
1,229,10,131.3
2,467,11,111.82
3,143,13,91.85
4,222,9,88.22
5,659,3,92.67


In [36]:
#Creating the frequency groups
df['M'] = pd.qcut(df['Monetary'], q=4, labels=range(1, 5, 1))
df['F'] = pd.qcut(df['Frequency'], q=4, labels=range(1 ,5, 1))
df['R'] = pd.qcut(df['Recency'], q=4, labels= range(4, 0, -1))
df.head(10)

Unnamed: 0_level_0,Recency,Frequency,Monetary,M,F,R
customer_id,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1,Unnamed: 5_level_1,Unnamed: 6_level_1
1,229,10,131.3,4,2,2
2,467,11,111.82,4,3,1
3,143,13,91.85,2,4,3
4,222,9,88.22,2,2,2
5,659,3,92.67,2,1,1
6,157,9,102.44,3,2,2
7,195,11,87.36,2,3,2
8,572,6,102.5,3,1,1
9,244,9,121.89,4,2,1
10,239,12,97.0,3,3,1


In [37]:
# Creating the RFM score. the 'axis=1' denotes it as adding along the rows, instead of columnal addition
df['RFM'] = df[['M', 'F', 'R']].sum(axis=1)
df.head(5)

Unnamed: 0_level_0,Recency,Frequency,Monetary,M,F,R,RFM
customer_id,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1,Unnamed: 5_level_1,Unnamed: 6_level_1,Unnamed: 7_level_1
1,229,10,131.3,4,2,2,8
2,467,11,111.82,4,3,1,8
3,143,13,91.85,2,4,3,9
4,222,9,88.22,2,2,2,6
5,659,3,92.67,2,1,1,4


In [40]:
#Creating the function for assessing customer valuation
def customer_valuation(df):
  if (df['RFM'] > 9):
    return 'High value customer'
  elif((df['RFM'] <= 9) and( df['RFM'] > 5)):
    return 'Mid value customer'
  else:
    return 'Low value customer'

In [43]:
#Aadding the results to df
df['Customer Valuation'] = df.apply(customer_valuation, axis=1)
df.tail(10)
df['customer_id']=

Unnamed: 0_level_0,Recency,Frequency,Monetary,M,F,R,RFM,Customer Valuation
customer_id,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1,Unnamed: 5_level_1,Unnamed: 6_level_1,Unnamed: 7_level_1,Unnamed: 8_level_1
39990,277,9,99.78,3,2,1,6,Mid value customer
39991,239,6,108.67,4,1,1,6,Mid value customer
39992,78,12,131.5,4,3,4,11,High value customer
39993,426,9,92.33,2,2,1,5,Low value customer
39994,483,4,128.5,4,1,1,6,Mid value customer
39995,94,9,108.78,4,2,3,9,Mid value customer
39996,298,9,61.67,1,2,1,4,Low value customer
39997,123,12,114.0,4,3,3,10,High value customer
39998,109,7,101.57,3,1,3,7,Mid value customer
39999,96,11,105.0,3,3,3,9,Mid value customer


In [42]:
# Looking into the dataset
df.groupby(['Customer Valuation']).agg({
    'Recency': 'mean',
    'Monetary': 'mean',
    'Frequency': 'mean'
})

Unnamed: 0_level_0,Recency,Monetary,Frequency
Customer Valuation,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1
High value customer,78.803743,107.997239,13.212322
Low value customer,301.395456,79.508126,7.289263
Mid value customer,165.721483,97.819868,9.876782
