# **📘Customer Segmentation Using K-Means**

---



## ***Importing the Dependencies***

In [None]:
import pandas as pd
import matplotlib.pyplot as plt
import seaborn as sns
from sklearn.cluster import KMeans
from sklearn.preprocessing import StandardScaler
from sklearn.decomposition import PCA
from mpl_toolkits.mplot3d import Axes3D

## ***Uploading the Dataset***

In [None]:
from google.colab import files
uploaded = files.upload()


## ***Data Collection & Analysis***

In [None]:
import pandas as pd

df = pd.read_csv("Shopping mall.csv")
df.head()


In [None]:
print("Null values in each column:\n")
print(df.isnull().sum())


## ***Selecting Features for Clustering***


In [None]:
X = df[['Age', 'Income', 'Spending', 'Frequency']]


## ***Scaling Features***

In [None]:
from sklearn.preprocessing import StandardScaler

scaler = StandardScaler()
X_scaled = scaler.fit_transform(X)


## **Choosing the number of Clusters**

***WCSS -> Within Clusters Sum of Squares***

In [None]:
from sklearn.cluster import KMeans
import matplotlib.pyplot as plt
import seaborn as sns

# Apply seaborn darkgrid style (like screenshot)
sns.set_style('darkgrid')
plt.figure(figsize=(8, 6))

# Compute wcss values
wcss = []
for k in range(1, 11):
    kmeans = KMeans(n_clusters=k, random_state=42)
    kmeans.fit(X_scaled)
    wcss.append(kmeans.inertia_)

# Plot with markers and line
plt.plot(range(1, 11), wcss, linestyle='-', color='steelblue', linewidth=2, markersize=6)

# Labels and title
sns.set()
plt.plot(range(1,11), wcss)
plt.title('The Elbow Point Graph')
plt.xlabel('Number of Clusters')
plt.ylabel('WCSS')
plt.show()


*Optimum Number of Clusters = 4*

# ***Training the K-Means Clustering Model***

In [None]:
kmeans = KMeans(n_clusters=4, random_state=42)
Y = kmeans.fit_predict(X_scaled)

print(Y)

### *4 Clusters = 0,1,2,3*

# ***Visualizing all the clusters***


### *2D Scatter Plot Visualizations of Customer Groups*


In [None]:
import itertools
import matplotlib.pyplot as plt
import seaborn as sns

# Elegant light gray background with grid lines
sns.set_style("darkgrid")  # better contrast with pastel

# Refined pastel-inspired palette
custom_palette = ['#8ecae6', '#ffb703', '#a8dadc', '#bdb2ff']

# Inverse transform centroids if scaled
centroids = scaler.inverse_transform(kmeans.cluster_centers_)

# Feature combinations
features = ['Age', 'Income', 'Spending', 'Frequency']
combinations = list(itertools.combinations(features, 2))

# Loop through each feature pair
for x_feat, y_feat in combinations:
    plt.figure(figsize=(8, 6))

    # Scatter plot of clusters
    sns.scatterplot(
        data=df,
        x=x_feat,
        y=y_feat,
        hue='Cluster',
        palette=custom_palette,
        s=50,              # Smaller dot size
        linewidth=0        # No borders
    )

    # Centroids
    x_idx = features.index(x_feat)
    y_idx = features.index(y_feat)
    plt.scatter(
        centroids[:, x_idx],
        centroids[:, y_idx],
        c='black',
        s=220,
        marker='X',
        label='Centroid'
    )

    # Formatting
    plt.title(f'Customer Segments: {x_feat} vs {y_feat}', fontsize=15, weight='bold')
    plt.xlabel(x_feat, fontsize=12)
    plt.ylabel(y_feat, fontsize=12)
    plt.grid(True, linestyle='--', linewidth=0.5, alpha=0.5)
    plt.gca().set_facecolor('#f4f4f4')  # Light gray plot background
    plt.legend(title='Cluster')
    plt.tight_layout()
    plt.show()


### *3D Scatter Plot Visualizations of Customer Groups*

In [None]:
from mpl_toolkits.mplot3d import Axes3D
import matplotlib.pyplot as plt
from matplotlib.animation import FuncAnimation
import numpy as np
from IPython.display import HTML
!pip install -q pillow


# Prepare the figure
fig = plt.figure(figsize=(10, 7))
ax = fig.add_subplot(111, projection='3d')

# ✅ Custom color palette matching your uploaded 3D plot
colors = ['#8ecae6', '#ffb703', '#a8dadc', '#bdb2ff']


# Plot clusters
for i in range(4):
    cluster = df[df['Cluster'] == i]
    ax.scatter(
        cluster['Income'],
        cluster['Spending'],
        cluster['Frequency'],
        color=colors[i],
        label=f'Cluster {i+1}',
        s=70,
        alpha=0.9,
        edgecolor='white',
        linewidth=0.7
    )

# Plot centroids
centers = scaler.inverse_transform(kmeans.cluster_centers_)
ax.scatter(
    centers[:, 1],
    centers[:, 2],
    centers[:, 3],
    s=250,
    c='black',
    marker='X',
    edgecolors='white',
    linewidth=1.5,
    label='Centroids'
)

# Axis Labels
ax.set_xlabel('Income', fontsize=12)
ax.set_ylabel('Spending', fontsize=12)
ax.set_zlabel('Frequency', fontsize=12)
ax.set_title('Customer Groups', fontsize=16, weight='bold')
ax.legend()

# Animate: rotate around z-axis
def rotate(angle):
    ax.view_init(elev=20, azim=angle)

ani = FuncAnimation(fig, rotate, frames=np.arange(0, 360, 3), interval=100)


# Save to GIF
ani.save("rotating_3d_clusters.gif", writer='pillow', fps=10)


import matplotlib as mpl
mpl.rcParams['animation.embed_limit'] = 50  # MB limit increased from 20 to 50

# Now render the animation
HTML(ani.to_jshtml())






In [None]:
from google.colab import files
files.download("rotating_3d_clusters.gif")


## ***Cluster Validation Using Silhouette Score***

In [None]:
from sklearn.metrics import silhouette_score

score = silhouette_score(X_scaled, df['Cluster'])
print(f"Silhouette Score: {score:.3f}")

## ***Cluster Characteristics Overview***

In [None]:
# Group data by cluster and calculate mean for each feature
summary = df.groupby('Cluster')[['Age', 'Income', 'Frequency', 'Spending']].mean().round(1)
summary


## 🧩 Cluster Interpretation

- **Cluster 0: Middle Income, Low Spenders**
  - **Age:** ~41  
  - **Income:** Moderate (~59k)  
  - **Frequency:** Medium  
  - **Spending:** Low  
  - 🟢 **Insights:** Price-sensitive and average engagement. May respond to **bundled deals or discount campaigns**.

- **Cluster 1: Engaged Loyal Customers**
  - **Age:** ~55  
  - **Income:** High (~110k)  
  - **Frequency:** High  
  - **Spending:** High  
  - 🔵 **Insights:** Highly profitable and loyal. Ideal for **loyalty programs, early access, and exclusive offers**.

- **Cluster 2: Elderly Passive Customers**
  - **Age:** ~76  
  - **Income:** High (~114k)  
  - **Frequency:** Low  
  - **Spending:** Low  
  - 🟡 **Insights:** Affluent but not active. Potential for **re-engagement via targeted outreach**, possibly emphasizing value or convenience.

- **Cluster 3: Young Premium Shoppers**
  - **Age:** ~39  
  - **Income:** Very High (~160k)  
  - **Frequency:** Medium  
  - **Spending:** Medium  
  - 🟣 **Insights:** Tech-savvy, aspirational, and brand-conscious. Best suited for **luxury offerings, influencer-driven marketing**, and premium upselling.


## 💼 Strategic Recommendations

1. 🎯 **Targeted Marketing**
   - Personalize promotions based on cluster behavior.
   - Use social media ads for Cluster 3 and email loyalty rewards for Cluster 1.

2. 📈 **Reactivation Campaigns**
   - Re-engage Cluster 2 through limited-time offers, senior-friendly UX, or value-based messaging.

3. 💡 **Product Bundling**
   - For Cluster 0, introduce low-cost bundles and seasonal discounts to increase spending.

4. 🛍️ **VIP Experiences**
   - For Cluster 1, consider premium loyalty tiers, early product access, or personal shopper services.



In [None]:
df.to_csv("Customer Segments.csv", index=False)


In [None]:
from google.colab import files
files.download("Customer Segments.csv")
