<a href="https://colab.research.google.com/github/MubashirMustafa/deep-learning/blob/main/rfm_score.ipynb" target="_parent"><img src="https://colab.research.google.com/assets/colab-badge.svg" alt="Open In Colab"/></a>

What is RFM analysis?
RFM stands for recency, frequency, monetary value. In business analytics, we often use this concept to divide customers into different segments, like high-value customers, medium value customers or low-value customers, and similarly many others.

Let’s assume we are a company, our company name is geek, let’s perform the RFM analysis on our customers

Recency: How recently has the customer made a transaction with us
Frequency: How frequent is the customer in ordering/buying some product from us
Monetary: How much does the customer spend on purchasing products from us.

In [2]:
import pandas as pd
import datetime as dt

# Sample customer data
data = {
    'CustomerID': [1, 2, 3, 4, 5],
    'TransactionDate': ['2024-01-15', '2024-02-10', '2023-12-05', '2024-01-20', '2024-02-25'],
    'Amount': [100, 150, 50, 200, 300]
}

# Create DataFrame
df = pd.DataFrame(data)

# Convert TransactionDate to datetime
df['TransactionDate'] = pd.to_datetime(df['TransactionDate'])

# Calculate recency, frequency, and monetary value
current_date = max(df['TransactionDate'])
rfm_df = df.groupby('CustomerID').agg({
    'TransactionDate': lambda x: (current_date - x.max()).days,  # recency
    'CustomerID': 'count',  # frequency
    'Amount': 'sum'  # monetary value
})

# Rename columns
rfm_df.rename(columns={
    'TransactionDate': 'Recency',
    'CustomerID': 'Frequency',
    'Amount': 'Monetary'
}, inplace=True)

# Print RFM DataFrame
print("RFM DataFrame:")
print(rfm_df)

# Define quantiles for scoring
quantiles = rfm_df.quantile(q=[0.25, 0.5, 0.75])

# Function to assign R, F, and M scores
def r_score(x, p, d):
    if x <= d[p][0.25]:
        return 4
    elif x <= d[p][0.50]:
        return 3
    elif x <= d[p][0.75]:
        return 2
    else:
        return 1

# Apply scoring to Recency
rfm_df['R'] = rfm_df['Recency'].apply(r_score, args=('Recency', quantiles))

# Function to assign F and M scores
def f_m_score(x, p, d):
    if x <= d[p][0.25]:
        return 1
    elif x <= d[p][0.50]:
        return 2
    elif x <= d[p][0.75]:
        return 3
    else:
        return 4

# Apply scoring to Frequency and Monetary
rfm_df['F'] = rfm_df['Frequency'].apply(f_m_score, args=('Frequency', quantiles))
rfm_df['M'] = rfm_df['Monetary'].apply(f_m_score, args=('Monetary', quantiles))

# Calculate RFM score
rfm_df['RFM Score'] = rfm_df['R'] * 100 + rfm_df['F'] * 10 + rfm_df['M']

# Print RFM DataFrame with scores
print("\nRFM DataFrame with Scores:")
print(rfm_df)


RFM DataFrame:
            Recency  Frequency  Monetary
CustomerID                              
1                41          1       100
2                15          1       150
3                82          1        50
4                36          1       200
5                 0          1       300

RFM DataFrame with Scores:
            Recency  Frequency  Monetary  R  F  M  RFM Score
CustomerID                                                  
1                41          1       100  2  1  1        211
2                15          1       150  4  1  2        412
3                82          1        50  1  1  1        111
4                36          1       200  3  1  3        313
5                 0          1       300  4  1  4        414


In [3]:
import pandas as pd
import numpy as np
import datetime as dt

# Generate sample customer data
np.random.seed(123)
n_customers = 1000
start_date = dt.datetime(2022, 1, 1)
end_date = dt.datetime(2024, 1, 1)

customer_ids = np.arange(1, n_customers+1)
transaction_dates = np.random.choice(pd.date_range(start_date, end_date), n_customers)
amounts = np.random.randint(10, 500, size=n_customers)

data = {
    'CustomerID': customer_ids,
    'TransactionDate': transaction_dates,
    'Amount': amounts
}

# Create DataFrame
df = pd.DataFrame(data)

# Function to generate random product categories
def generate_product_category():
    categories = ['Electronics', 'Clothing', 'Books', 'Home & Garden', 'Sports', 'Health & Beauty']
    return np.random.choice(categories)

# Add product category column
df['ProductCategory'] = df.apply(lambda x: generate_product_category(), axis=1)

# Convert TransactionDate to datetime
df['TransactionDate'] = pd.to_datetime(df['TransactionDate'])

# Calculate recency, frequency, and monetary value
current_date = max(df['TransactionDate'])
rfm_df = df.groupby('CustomerID').agg({
    'TransactionDate': lambda x: (current_date - x.max()).days,  # recency
    'CustomerID': 'count',  # frequency
    'Amount': 'sum'  # monetary value
})

# Rename columns
rfm_df.rename(columns={
    'TransactionDate': 'Recency',
    'CustomerID': 'Frequency',
    'Amount': 'Monetary'
}, inplace=True)

# Print RFM DataFrame
print("RFM DataFrame:")
print(rfm_df.head())

# Define quantiles for scoring
quantiles = rfm_df.quantile(q=[0.25, 0.5, 0.75])

# Function to assign R, F, and M scores
def rfm_score(x, p, d):
    if x <= d[p][0.25]:
        return 4
    elif x <= d[p][0.50]:
        return 3
    elif x <= d[p][0.75]:
        return 2
    else:
        return 1

# Apply scoring to Recency
rfm_df['R'] = rfm_df['Recency'].apply(rfm_score, args=('Recency', quantiles))

# Function to assign F and M scores
def f_m_score(x, p, d):
    if x <= d[p][0.25]:
        return 1
    elif x <= d[p][0.50]:
        return 2
    elif x <= d[p][0.75]:
        return 3
    else:
        return 4

# Apply scoring to Frequency and Monetary
rfm_df['F'] = rfm_df['Frequency'].apply(f_m_score, args=('Frequency', quantiles))
rfm_df['M'] = rfm_df['Monetary'].apply(f_m_score, args=('Monetary', quantiles))

# Calculate RFM score
rfm_df['RFM Score'] = rfm_df['R'] * 100 + rfm_df['F'] * 10 + rfm_df['M']

# Print RFM DataFrame with scores
print("\nRFM DataFrame with Scores:")
print(rfm_df.head())


RFM DataFrame:
            Recency  Frequency  Monetary
CustomerID                              
1               220          1        68
2               365          1       205
3               348          1       382
4               408          1       297
5               632          1       253

RFM DataFrame with Scores:
            Recency  Frequency  Monetary  R  F  M  RFM Score
CustomerID                                                  
1               220          1        68  3  1  1        311
2               365          1       205  3  1  2        312
3               348          1       382  3  1  3        313
4               408          1       297  2  1  3        213
5               632          1       253  1  1  2        112


In [4]:
import pandas as pd
import numpy as np
from sklearn.preprocessing import StandardScaler
from sklearn.cluster import KMeans

# Generate sample customer data
np.random.seed(123)
n_customers = 1000
start_date = pd.Timestamp('2022-01-01')
end_date = pd.Timestamp('2024-01-01')

customer_ids = np.arange(1, n_customers+1)
transaction_dates = np.random.choice(pd.date_range(start_date, end_date), n_customers)
amounts = np.random.randint(10, 500, size=n_customers)

data = {
    'CustomerID': customer_ids,
    'TransactionDate': transaction_dates,
    'Amount': amounts
}

# Create DataFrame
df = pd.DataFrame(data)

# Function to generate random product categories
def generate_product_category():
    categories = ['Electronics', 'Clothing', 'Books', 'Home & Garden', 'Sports', 'Health & Beauty']
    return np.random.choice(categories)

# Add product category column
df['ProductCategory'] = df.apply(lambda x: generate_product_category(), axis=1)

# Convert TransactionDate to datetime
df['TransactionDate'] = pd.to_datetime(df['TransactionDate'])

# Calculate recency, frequency, and monetary value
current_date = max(df['TransactionDate'])
rfm_df = df.groupby('CustomerID').agg({
    'TransactionDate': lambda x: (current_date - x.max()).days,  # recency
    'CustomerID': 'count',  # frequency
    'Amount': 'sum'  # monetary value
})

# Rename columns
rfm_df.rename(columns={
    'TransactionDate': 'Recency',
    'CustomerID': 'Frequency',
    'Amount': 'Monetary'
}, inplace=True)

# Standardize the data
scaler = StandardScaler()
rfm_scaled = scaler.fit_transform(rfm_df)

# Perform KMeans clustering
kmeans = KMeans(n_clusters=5, random_state=42)
kmeans.fit(rfm_scaled)

# Add cluster labels to the DataFrame
rfm_df['Cluster'] = kmeans.labels_

# Print RFM DataFrame with cluster labels
print("RFM DataFrame with Cluster Labels:")
print(rfm_df.head())




RFM DataFrame with Cluster Labels:
            Recency  Frequency  Monetary  Cluster
CustomerID                                       
1               220          1        68        0
2               365          1       205        0
3               348          1       382        2
4               408          1       297        2
5               632          1       253        1
