<a href="https://colab.research.google.com/github/P3drio/Customer-Segmentation/blob/main/02_rfm_segmentation.ipynb" target="_parent"><img src="https://colab.research.google.com/assets/colab-badge.svg" alt="Open In Colab"/></a>

In [None]:
# ===============================================
# 02_rfm_segmentation.ipynb
# Customer Value & Retention Analysis
# ===============================================

import pandas as pd
import numpy as np
import matplotlib.pyplot as plt
import seaborn as sns
from datetime import timedelta

# 1️⃣ Load cleaned data
df = pd.read_csv('/content/clean_online_retail.csv')
df['InvoiceDate'] = pd.to_datetime(df['InvoiceDate'])

# 2️⃣ Define snapshot date
snapshot_date = df['InvoiceDate'].max() + timedelta(days=1)

# 3️⃣ Compute RFM metrics
rfm = df.groupby('Customer ID').agg({
    'InvoiceDate': lambda x: (snapshot_date - x.max()).days,
    'Invoice': 'nunique',
    'TotalPrice': 'sum'
}).reset_index()

rfm.columns = ['CustomerID', 'Recency', 'Frequency', 'Monetary']

# 4️⃣ Filter out zero or negative spenders
rfm = rfm[rfm['Monetary'] > 0]

# 5️⃣ Create quartile-based scores
rfm['R_Score'] = pd.qcut(rfm['Recency'], 4, labels=[4,3,2,1]).astype(int)
rfm['F_Score'] = pd.qcut(rfm['Frequency'].rank(method='first'), 4, labels=[1,2,3,4]).astype(int)
rfm['M_Score'] = pd.qcut(rfm['Monetary'], 4, labels=[1,2,3,4]).astype(int)

# 6️⃣ Composite score
rfm['RFM_Score'] = rfm[['R_Score','F_Score','M_Score']].sum(axis=1)

# 7️⃣ Quick visualization
plt.figure(figsize=(7,5))
sns.scatterplot(data=rfm, x='Frequency', y='Monetary', hue='RFM_Score', palette='viridis')
plt.title('Customer Segments by RFM Score')
plt.show()

# 8️⃣ Save output
rfm.to_csv('/content/customers_rfm.csv', index=False)
print("✅ RFM table saved as customers_rfm.csv")