# Task - 3 Customer Segmentation / Clustering

In [5]:
import pandas as pd
from sklearn.cluster import KMeans
from sklearn.metrics import davies_bouldin_score
from sklearn.decomposition import PCA
from sklearn.preprocessing import StandardScaler
import matplotlib.pyplot as plt
import seaborn as sns

data = {
    "CustomerID": [f"C{str(i).zfill(4)}" for i in range(1, 21)],
    "TotalSpent": [500 + i * 10 for i in range(20)],
    "TotalQuantity": [50 + i * 5 for i in range(20)],
    "TransactionCount": [5 + i for i in range(20)],
}
customer_profiles = pd.DataFrame(data)

# Step 1: Preparing Data for Clustering
clustering_data = customer_profiles[["TotalSpent", "TotalQuantity", "TransactionCount"]]

# Step 2: Scaling the Features
scaler = StandardScaler()
scaled_clustering_data = scaler.fit_transform(clustering_data)

# Step 3: Finding the Optimal Number of Clusters  using Elbow Method
inertia = []
for k in range(2, 11):
    kmeans = KMeans(n_clusters=k, random_state=42, n_init="auto")
    kmeans.fit(scaled_clustering_data)
    inertia.append(kmeans.inertia_)

# Elbow Plot to determine the optimal number of clusters
plt.figure(figsize=(8, 5))
plt.plot(range(2, 11), inertia, marker="o")
plt.title("Elbow Method for Optimal Clusters")
plt.xlabel("Number of Clusters")
plt.ylabel("Inertia")
plt.tight_layout()
plt.show()

# Step 4: Applying K-Means Clustering (Using 4 clusters as an example)
optimal_clusters = 4
kmeans = KMeans(n_clusters=optimal_clusters, random_state=42, n_init="auto")
clusters = kmeans.fit_predict(scaled_clustering_data)
customer_profiles["Cluster"] = clusters

# Step 5: Evaluating Clustering with Davies-Bouldin Index
db_index = davies_bouldin_score(scaled_clustering_data, clusters)
print(f"Davies-Bouldin Index: {db_index}")

# Step 6: Visualizing Clusters using PCA
pca = PCA(n_components=2)
pca_result = pca.fit_transform(scaled_clustering_data)
customer_profiles["PCA1"] = pca_result[:, 0]
customer_profiles["PCA2"] = pca_result[:, 1]

plt.figure(figsize=(10, 6))
sns.scatterplot(
    x="PCA1", y="PCA2", hue="Cluster", data=customer_profiles, palette="viridis", s=100
)
plt.title("Customer Clusters (PCA Visualization)")
plt.xlabel("PCA1")
plt.ylabel("PCA2")
plt.legend(title="Cluster")
plt.tight_layout()
plt.show()

# Step 7: Save Clustering Results to CSV
clustering_results = customer_profiles[["CustomerID", "Cluster"]]
clustering_results.to_csv("Clustering_Results.csv", index=False)
print("Clustering results saved to Clustering_Results.csv.")


ModuleNotFoundError: No module named 'pandas'