#RFM Challenge

###Objective:
* Prepare basket variables
* Rename variables
* Create an RFM model with 3 levels
* Define 3 segments
* Prepare final table overview

##Libraries, Directory, Data

In [17]:
%cd /content/drive/MyDrive/Business Analyst/Segmentation/RFM

/content/drive/MyDrive/Business Analyst/Segmentation/RFM


In [18]:
#Install needed Libraries
import pandas as pd
import datetime as dt
import matplotlib.pyplot as plt
import seaborn as sns

In [19]:
#Upload data
df = pd.read_csv('customer_data.csv').dropna()
df.head()

Unnamed: 0,customer_id,revenue,most_recent_visit,number_of_orders,recency_days
0,22086,777,5/14/2006,9,232
1,2290,1555,9/8/2006,16,115
2,26377,336,11/19/2006,5,43
3,24650,1189,10/29/2006,12,64
4,12883,1229,12/9/2006,12,23


##Preparing Dataframe

In [20]:
#Prepare basket / Monetary Variable
df['Monetary'] = df.revenue / df.number_of_orders
df.head(1)

Unnamed: 0,customer_id,revenue,most_recent_visit,number_of_orders,recency_days,Monetary
0,22086,777,5/14/2006,9,232,86.333333


In [31]:
#Changing variables' names
df.rename(columns = {'recency_days': 'Recency',
                     'number_of_orders': 'Frequency'}, inplace = True)
df.head(1)

Unnamed: 0,customer_id,Frequency,Recency,Monetary
0,22086,9,232,86.333333


In [33]:
#Create Groups point system
df['F'] = pd.qcut(x = df['Frequency'], q = 3, labels = range(1, 4, 1))
df['M'] = pd.qcut(x = df['Monetary'], q = 3, labels = range(1, 4, 1))
df['R'] = pd.qcut(x = df['Recency'], q = 3, labels = list(range(3, 0, -1)))
df.head(5)

Unnamed: 0,customer_id,Frequency,Recency,Monetary,F,M,R
0,22086,9,232,86.333333,2,1,1
1,2290,16,115,97.1875,3,2,2
2,26377,5,43,67.2,1,1,3
3,24650,12,64,99.083333,3,2,3
4,12883,12,23,102.416667,3,2,3


In [34]:
#RFM Score
df['RFM'] = df[["R", "F", "M"]].sum(axis = 1)
df.head(2)

Unnamed: 0,customer_id,Frequency,Recency,Monetary,F,M,R,RFM
0,22086,9,232,86.333333,2,1,1,4
1,2290,16,115,97.1875,3,2,2,7


In [35]:
#Create the RFM Function
def rfm_segment(df):
  if df['RFM'] > 8:
    return "Superstar"
  elif ((df['RFM'] >= 5) and(df['RFM'] < 8)):
    return 'High Potential'
  else:
    return 'Low Relevance'

##RFM

In [38]:
#Apply RFM function
df['RFM_Level'] = df.apply(rfm_segment, axis = 1)
df.head()

Unnamed: 0,customer_id,Frequency,Recency,Monetary,F,M,R,RFM,RFM_Level
0,22086,9,232,86.333333,2,1,1,4,Low Relevance
1,2290,16,115,97.1875,3,2,2,7,High Potential
2,26377,5,43,67.2,1,1,3,5,High Potential
3,24650,12,64,99.083333,3,2,3,8,Low Relevance
4,12883,12,23,102.416667,3,2,3,8,Low Relevance


In [39]:
df.tail()

Unnamed: 0,customer_id,Frequency,Recency,Monetary,F,M,R,RFM,RFM_Level
39994,3249,10,31,99.8,2,2,3,7,High Potential
39995,6686,8,187,96.375,1,2,2,5,High Potential
39996,16418,9,154,112.888889,2,3,2,7,High Potential
39997,9117,7,195,96.857143,1,2,2,5,High Potential
39998,19184,13,113,116.846154,3,3,2,8,Low Relevance


In [37]:
#looking into the segments
df.groupby('RFM_Level').agg({
    'Recency': 'mean',
    'Frequency': 'mean',
    'Monetary': ['mean', 'count']
})

Unnamed: 0_level_0,Recency,Frequency,Monetary,Monetary
Unnamed: 0_level_1,mean,mean,mean,count
RFM_Level,Unnamed: 1_level_2,Unnamed: 2_level_2,Unnamed: 3_level_2,Unnamed: 4_level_2
High Potential,171.844545,9.835508,96.980906,26445
Low Relevance,217.484118,9.324547,89.868017,12026
Superstar,63.092932,13.734293,113.348086,1528
