# Import necessary libraries

In [None]:

import pandas as pd
import numpy as np
from datetime import datetime, timedelta
import matplotlib.pyplot as plt
from sklearn.preprocessing import StandardScaler
from sklearn.cluster import KMeans
import seaborn as sns



# Step 1: Create Dummy Dataset
# Generating random data for 100 customers with random transaction dates, amounts, and counts

In [None]:
np.random.seed(42)
num_customers = 100

# Step 2: Calculate RFM metrics

In [None]:
# Reference date set to the end of the year for calculating recency
reference_date = datetime(2024, 12, 31)

# Group by CustomerID to calculate Recency, Frequency, and Monetary metrics
rfm = data.groupby('CustomerID').agg({
    'TransactionDate': lambda x: (reference_date - x.max()).days,  # Recency: Days since last transaction
    'TransactionID': 'sum',                                       # Frequency: Total transactions
    'Amount': 'sum'                                               # Monetary: Total spending
}).reset_index()

# Rename columns for clarity
rfm.columns = ['CustomerID', 'Recency', 'Frequency', 'Monetary']


# Step 3: Normalize the RFM metrics

In [None]:
# Standardizing RFM metrics using StandardScaler to bring them to the same scale
scaler = StandardScaler()
rfm_normalized = scaler.fit_transform(rfm[['Recency', 'Frequency', 'Monetary']])

# Step 4: Apply the Elbow Method to Find Optimal Number of Clusters

In [None]:
# Using the Elbow Method to determine the optimal number of clusters by plotting inertia values
inertia = []
for k in range(1, 11):
    kmeans = KMeans(n_clusters=k, random_state=42)
    kmeans.fit(rfm_normalized)
    inertia.append(kmeans.inertia_)

# Plot the Elbow Curve to visualize the optimal number of clusters
plt.plot(range(1, 11), inertia, marker='o')
plt.xlabel('Number of Clusters')
plt.ylabel('Inertia')
plt.title('Elbow Method for Optimal K')
plt.show()

# Step 5: Apply K-Means Clustering with optimal clusters (k=4)

In [None]:
# Based on the Elbow Method, applying K-Means clustering with 4 clusters
optimal_clusters = 4
kmeans = KMeans(n_clusters=optimal_clusters, random_state=42)
rfm['Cluster'] = kmeans.fit_predict(rfm_normalized)

# Step 6: Analyze the Clusters

In [None]:
# Calculating mean Recency, Frequency, and Monetary values for each cluster
cluster_summary = rfm.groupby('Cluster').agg({
    'Recency': 'mean',
    'Frequency': 'mean',
    'Monetary': 'mean',
    'CustomerID': 'count'  # Number of customers in each cluster
}).rename(columns={'CustomerID': 'Customer Count'}).reset_index()

# Display cluster summary
print(cluster_summary)

# Visualize cluster characteristics using a heatmap
plt.figure(figsize=(10, 6))
sns.heatmap(cluster_summary.drop('Cluster', axis=1).set_index(cluster_summary['Cluster']).T, 
            annot=True, cmap='coolwarm')
plt.title('Cluster-wise Average RFM Metrics')
plt.show()

# Enhancement 1: Apply Weighted RFM Scoring

In [None]:
# Assigning weights to Recency, Frequency, and Monetary metrics
recency_weight = 0.5
frequency_weight = 0.3
monetary_weight = 0.2

# Normalizing RFM metrics using Min-Max scaling for weighted scoring
rfm_min = rfm[['Recency', 'Frequency', 'Monetary']].min()
rfm_max = rfm[['Recency', 'Frequency', 'Monetary']].max()
rfm_normalized_weighted = (rfm[['Recency', 'Frequency', 'Monetary']] - rfm_min) / (rfm_max - rfm_min)

# Calculating the weighted RFM score
rfm['Weighted_Score'] = (rfm_normalized_weighted['Recency'] * recency_weight +
                         rfm_normalized_weighted['Frequency'] * frequency_weight +
                         rfm_normalized_weighted['Monetary'] * monetary_weight)


# Step 7: Apply K-Means Clustering on Weighted Scores

In [None]:
# Applying K-Means clustering on the weighted scores to create new clusters
weighted_scores = rfm[['Weighted_Score']].values
kmeans_weighted = KMeans(n_clusters=3, random_state=42)
rfm['Weighted_Cluster'] = kmeans_weighted.fit_predict(weighted_scores)

# Analyze and visualize the new clusters based on weighted scores

In [None]:
cluster_summary_weighted = rfm.groupby('Weighted_Cluster').agg({
    'Recency': 'mean',
    'Frequency': 'mean',
    'Monetary': 'mean',
    'CustomerID': 'count'
}).rename(columns={'CustomerID': 'Customer Count'}).reset_index()

# Display cluster summary for weighted clusters
print(cluster_summary_weighted)

# Visualize weighted cluster characteristics using a heatmap
plt.figure(figsize=(10, 6))
sns.heatmap(cluster_summary_weighted.drop('Weighted_Cluster', axis=1).set_index(cluster_summary_weighted['Weighted_Cluster']).T, 
            annot=True, cmap='coolwarm')
plt.title('Cluster-wise Average RFM Metrics (Weighted Clustering)')
plt.show()