# 🧺 RFM Segmentation Analysis

In [None]:

import pandas as pd
import matplotlib.pyplot as plt
import seaborn as sns

# Load the data
df = pd.read_csv('../data/transactions.csv', parse_dates=['order_date'])
df.head()
    

## Step 1: Calculate RFM Metrics

In [None]:

snapshot_date = df['order_date'].max() + pd.Timedelta(days=1)
rfm = df.groupby('customer_id').agg({
    'order_date': lambda x: (snapshot_date - x.max()).days,
    'customer_id': 'count',
    'order_amount': 'sum'
}).rename(columns={
    'order_date': 'Recency',
    'customer_id': 'Frequency',
    'order_amount': 'Monetary'
})
rfm.head()
    

## Step 2: Score & Segment Customers

In [None]:

rfm['R_quartile'] = pd.qcut(rfm['Recency'], 4, labels=[4,3,2,1])
rfm['F_quartile'] = pd.qcut(rfm['Frequency'].rank(method="first"), 4, labels=[1,2,3,4])
rfm['M_quartile'] = pd.qcut(rfm['Monetary'], 4, labels=[1,2,3,4])

rfm['RFM_Score'] = rfm['R_quartile'].astype(str) + rfm['F_quartile'].astype(str) + rfm['M_quartile'].astype(str)
rfm.head()
    

## Step 3: Visualize Segments

In [None]:

rfm['RFM_Score'].value_counts().head(10).plot(kind='barh', figsize=(10, 5))
plt.title("Top RFM Segments")
plt.xlabel("Number of Customers")
plt.ylabel("RFM Score")
plt.show()
    