In [None]:
import pandas as pd
import matplotlib.pyplot as plt
from datetime import timedelta
df = pd.read_csv("sales_data.csv", low_memory=False, encoding="latin1")
df.head()

# Data Prep

In [None]:
DATE_COL = "Order Date"
CUSTOMER_COL = "Customer ID"
SALES_COL = "Sales"
df[DATE_COL] = pd.to_datetime(df[DATE_COL], errors="coerce")
df[SALES_COL] = pd.to_numeric(df[SALES_COL], errors="coerce").fillna(0)

# RFM Metric Calculation

In [None]:
snapshot_date = df[DATE_COL].max() + timedelta(days=1)
rfm = df.groupby(CUSTOMER_COL).agg({
    DATE_COL: lambda x: (snapshot_date - x.max()).days,
    CUSTOMER_COL: "count",
    SALES_COL: "sum"
})
rfm.rename(columns={
    DATE_COL: "Recency",
    CUSTOMER_COL: "Frequency",
    SALES_COL: "Monetary"
}, inplace=True)
rfm.head()

# RFM Scoring

In [None]:
rfm["R_Score"] = pd.qcut(rfm["Recency"], 4, labels=[4, 3, 2, 1])
rfm["F_Score"] = pd.qcut(rfm["Frequency"].rank(method="first"), 4, labels=[1, 2, 3, 4])
rfm["M_Score"] = pd.qcut(rfm["Monetary"], 4, labels=[1, 2, 3, 4])
rfm["RFM_Score"] = (
    rfm["R_Score"].astype(int) +
    rfm["F_Score"].astype(int) +
    rfm["M_Score"].astype(int)
)
rfm.head()

# Customer Segmentation

In [None]:
def segment_label(score):
    if score >= 10:
        return "High Value"
    elif score >= 7:
        return "Medium Value"
    else:
        return "Low Value"

rfm["Customer Segment"] = rfm["RFM_Score"].apply(segment_label)
rfm.head()

In [None]:
segment_summary = rfm.groupby("Customer Segment").agg({
    "Customer Segment": "count",
    "Monetary": "sum"
}).rename(columns={
    "Customer Segment": "Customer Count",
    "Monetary": "Total Revenue"
})
segment_summary

In [None]:
plt.figure()
segment_summary["Customer Count"].plot(kind="bar")
plt.title("Customer Distribution by RFM Segment")
plt.xlabel("Customer Segment")
plt.ylabel("Number of Customers")
plt.xticks(rotation=0)
plt.tight_layout()
plt.show()

# Export

In [None]:
rfm.to_csv("rfm_customer_segments.csv")
segment_summary.to_csv("rfm_segment_summary.csv")