In [None]:
from sklearn.preprocessing import StandardScaler
from sklearn.preprocessing import LabelEncoder
from sklearn.metrics import silhouette_score
from sklearn.cluster import KMeans, AgglomerativeClustering, MeanShift, AffinityPropagation, Birch
import matplotlib.pyplot as plt
import seaborn as sns
import pandas as pd
import numpy as np

In [None]:
# Read the customer clustering dataset from CSV file
df = pd.read_csv('/kaggle/input/customer-clustering/segmentation data.csv')
df

In [None]:
# Shape of dataset
f'Records: {df.shape[0]} & Features: {df.shape[1]}'

In [None]:
# Explarotory analysis
df.describe()

In [None]:
# Remove irelevent columns
df.drop('ID', inplace = True, axis=1)
df.head()

In [None]:
# Check columns data type
df.info()

In [None]:
# Checking for missing values
df.isna().sum()

In [None]:
scaler = StandardScaler()
df_scaled = pd.DataFrame(scaler.fit_transform(df), columns=df.columns)
df_scaled.describe()

In [None]:
# Creating a dataframe to store results
results = pd.DataFrame()
Name = []
Score = []

In [None]:
# Apply the "Elbow Method" to find the optimal clusters number
interia = []
for k in range(1, 11):
    km = KMeans(k, n_init=10)
    km.fit(df)
    interia.append(km.inertia_)
plt.plot(range(1, 11), interia, marker='o')

In [None]:
# Visualise the three clusters using KMeans 
kmeans = KMeans(n_clusters=3, n_init=10) 
kmeans.fit(df)
score = silhouette_score(df, kmeans.labels_)
print(f"Silhouette score: {score:0.3f}")
Name.append('KMeans')
Score.append(score)
sns.scatterplot(x=df.Age, y=df.Income, hue=kmeans.labels_, palette='deep')

In [None]:
# Visualise the three clusters using Agglomerative Clustering 
AC = AgglomerativeClustering(n_clusters=3) 
AC.fit(df)
score = silhouette_score(df, AC.labels_)
print(f"Silhouette score: {score:0.3f}")
Name.append('Agglomerative Clustering')
Score.append(score)
sns.scatterplot(x=df.Age, y=df.Income, hue=AC.labels_, palette='deep')

In [None]:
# Visualise the three clusters using Birch Clustering 
BCH = Birch() 
BCH.fit(df)
score = silhouette_score(df, BCH.labels_)
print(f"Silhouette score: {score:0.3f}")
Name.append('Birch Clustering')
Score.append(score)
sns.scatterplot(x=df.Age, y=df.Income, hue=BCH.labels_, palette='deep')

In [None]:
# Visualise the three clusters using Mean Shift 
MS = MeanShift() 
MS.fit(df)
score = silhouette_score(df, MS.labels_)
print(f"Silhouette score: {score:0.3f}")
Name.append('Mean Shift')
Score.append(score)
sns.scatterplot(x=df.Age, y=df.Income, hue=MS.labels_, palette='deep')

In [None]:
# Visualise the three clusters using Affinity Propagation 
AP = AffinityPropagation(damping=0.9) 
AP.fit(df)
score = silhouette_score(df, AP.labels_)
print(f"Silhouette score: {score:0.3f}")
Name.append('Affinity Propagation')
Score.append(score)
sns.scatterplot(x=df.Age, y=df.Income, hue=AP.labels_, palette='deep')

In [None]:
# Showing the results 
results['Name'] = Name
results['Score'] = Score
results