# A Basic Python Example for RFM Segmentation

In [1]:
import pandas as pd
import numpy as np
from sklearn.cluster import KMeans
from sklearn.preprocessing import StandardScaler
import matplotlib.pyplot as plt

# -------------------------------
# Step 1: Create Sample Customer Data
# -------------------------------
np.random.seed(42)  # For reproducible data generation

# Create a DataFrame with customer IDs and RFM values
data = pd.DataFrame({
    'CustomerID': np.arange(1, 101),
    'Recency': np.random.randint(1, 100, size=100),       # Days since last purchase
    'Frequency': np.random.randint(1, 20, size=100),        # Number of purchases
    'Monetary': np.random.randint(10, 1000, size=100)       # Amount spent
})

print("Sample Customer Data:")
print(data.head())

# -------------------------------
# Step 2: Preprocess the Data (Standardization)
# -------------------------------
features = ['Recency', 'Frequency', 'Monetary']
scaler = StandardScaler()
X_scaled = scaler.fit_transform(data[features])

# -------------------------------
# Step 3: Apply k-means Clustering
# -------------------------------
# We choose k = 4 clusters for this example.
# Setting random_state ensures that the initialization is the same each time.
k = 4
kmeans = KMeans(n_clusters=k, random_state=42, n_init=10)
data['Cluster'] = kmeans.fit_predict(X_scaled)

print("\nCluster Assignments:")
print(data.head())

# Optionally, display cluster centroids (in the scaled space)
centroids = kmeans.cluster_centers_
print("\nCluster Centroids (scaled features):")
print(centroids)

# -------------------------------
# Step 4: (Later) Assign New Customers to Existing Clusters
# -------------------------------
# Suppose you get new customer data in the same RFM format:
new_data = pd.DataFrame({
    'CustomerID': [101, 102],
    'Recency': [15, 80],
    'Frequency': [5, 3],
    'Monetary': [150, 600]
})

# IMPORTANT: Use the same scaler to transform new data!
X_new_scaled = scaler.transform(new_data[features])

# Use the trained k-means model to predict cluster labels for new customers.
new_data['Cluster'] = kmeans.predict(X_new_scaled)
print("\nNew Data with Cluster Assignments:")
print(new_data)


Sample Customer Data:
   CustomerID  Recency  Frequency  Monetary
0           1       52         13       394
1           2       93          9       412
2           3       15         15       647
3           4       72         13       139
4           5       61          1        62

Cluster Assignments:
   CustomerID  Recency  Frequency  Monetary  Cluster
0           1       52         13       394        2
1           2       93          9       412        2
2           3       15         15       647        0
3           4       72         13       139        2
4           5       61          1        62        2

Cluster Centroids (scaled features):
[[-1.32304009 -0.07656092 -0.08532492]
 [ 0.75699333 -1.03082496  0.70439495]
 [ 0.43420388  0.11282661 -1.02828861]
 [ 0.36825765  1.07185282  0.88705335]]

New Data with Cluster Assignments:
   CustomerID  Recency  Frequency  Monetary  Cluster
0         101       15          5       150        0
1         102       80          3    