In [None]:
# Customer Segmentation using RFM Analysis

# Objective
#To segment customers based on purchasing behavior and derive actionable
#business insights for targeted marketing and retention strategies.


In [None]:
import pandas as pd
import numpy as np
import seaborn as sns
import matplotlib.pyplot as plt
plt.style.use("default")


In [None]:
from google.colab import files
files.upload()

In [None]:
df = pd.read_csv("OnlineRetail.csv",encoding="latin1")
df.head()

In [None]:
df.shape

In [None]:
df.info()

In [None]:
df.describe()

In [None]:
df.isnull().sum()

In [None]:
df = df[~df["InvoiceNo"].astype(str).str.startswith("C")]


In [None]:
df = df[df["Quantity"]>0]

In [None]:
df = df[df["UnitPrice"]>0]

In [None]:
df["InvoiceDate"] = pd.to_datetime(df["InvoiceDate"])


In [None]:
df.shape

In [None]:
df.head()

In [None]:
df.isnull().sum()

In [None]:
#RFM Analysis
#To calculate recency,frequency,and monetary values,a refrence date eis required.The reference date is set as one day after the last transaction date in the dataset.

In [None]:
reference_date = df["InvoiceDate"].max() + pd.Timedelta(days=1)
reference_date

In [None]:
rfm = df.groupby("CustomerID").agg({
    "InvoiceDate" : lambda x:
    (reference_date - x.max()).days,
    "InvoiceNo" : "nunique",
    "UnitPrice" : lambda x: np.sum(x),
})

In [None]:
rfm.columns = ["Recency","Frequency","Monetary"]
rfm.head()

In [None]:
rfm.describe()

In [None]:
rfm["Monetary"] = rfm["Monetary"].clip(
    upper=rfm["Monetary"].quantile(0.95)
)

In [None]:
plt.figure(figsize=(15,4))
plt.subplot(1,3,1)
sns.histplot(rfm["Recency"],bins=30)
plt.title("Recency Distribution")
plt.subplot(1,3,2)
sns.histplot(rfm["Frequency"], bins=30)
plt.title("Frequency Distribution")
plt.subplot(1,3,3)
sns.histplot(rfm["Monetary"], bins=30)
plt.title("Monetary Distribution")
plt.tight_layout()
plt.show()

In [None]:
from sklearn.preprocessing import StandardScaler
from sklearn.cluster import KMeans


In [None]:
scaler = StandardScaler()
rfm_scaled = scaler.fit_transform(rfm[["Recency", "Frequency", "Monetary"]])


In [None]:
rfm_scaled.shape


In [None]:
ssd = []
K = range(1, 10)

for k in K:
    kmeans = KMeans(n_clusters=k, random_state=42)
    kmeans.fit(rfm_scaled)
    ssd.append(kmeans.inertia_)

plt.plot(K, ssd, 'bx-')
plt.xlabel('k')
plt.ylabel('Inertia/SSD')
plt.title('Elbow Method For Optimal k')
plt.show()


In [None]:
kmeans = KMeans(n_clusters=4, random_state=42)
kmeans.fit(rfm_scaled)
rfm["Cluster"] = kmeans.labels_


In [None]:
rfm.head()


In [None]:
cluster_summary = rfm.groupby("Cluster")[["Recency","Frequency","Monetary"]].mean().round(2)
cluster_summary


In [None]:
rfm["Segment"] = rfm["Cluster"].map({
    0: "High Value",
    1: "Low Value",
    2: "At Risk",
    3: "Potential"
})


In [None]:
rfm["Segment"] = rfm["Cluster"].map({
    3: "VIP Customers",
    2: "Loyal Customers",
    0: "Regular Customers",
    1: "At-Risk Customers"
})


In [None]:
plt.figure(figsize=(7,5))
sns.scatterplot(data=rfm, x="Recency", y="Monetary", hue="Segment")
plt.title("Customer Segments (Recency vs Monetary)")
plt.show()


In [None]:
plt.figure(figsize=(7,5))
sns.scatterplot(data=rfm, x="Frequency", y="Monetary", hue="Segment")
plt.title("Customer Segments (Frequency vs Monetary)")
plt.show()


In [None]:
plt.figure(figsize=(7,5))
sns.heatmap(cluster_summary, annot=True, fmt=".2f", cmap="Blues")
plt.title("Cluster Characteristics Heatmap")
plt.show()


In [None]:
 ## Business Insights & Recommended Actions
Segment 1 — VIP Customers
Characteristics:
Very recent purchases (Recency ~ 6 days)
Very frequent buyers (Freq ~ 80+)
Highest spenders (Monetary ~ 880)
Insights:
These customers form the top revenue group.
They are loyal and willing to spend more.
Business Actions:
Launch a premium loyalty program
Provide early access to new launches
Offer personalized product bundles

Segment 2 — Loyal Customers
Characteristics:
Recent purchases (Recency ~ 24 days)
Good frequency (11+)
High monetary value (~720)

Insights:
Steady and reliable customer base
Strong potential to push toward VIP

Business Actions:
Cross-sell complementary products
Offer “Buy More Save More” bundles
Use email automation to maintain engagement

Segment 0 — Regular Customers
Characteristics:
Mid-recency (47 days)
Low frequency (3)
Low spending (~149)

Insights:
They buy occasionally, not regularly
They need incentives to increase engagement

Business Actions:
Send discount codes
Provide recommendations based on past behavior
Re-engage through social media ads

Segment 1 — At Risk / Churned Customers
Characteristics:
Very high recency (haven’t purchased in 250 days)
Very low frequency (1–2)
Very low spend (~89)

Insights:
These customers have churned
Majority may have made only 1 purchase

Business Actions:
Send win-back campaigns
Offer time-limited discounts (“10% off for 48 hours”)
Re-target via ads or emails