In [None]:
# Required libraries
import pandas as pd
import matplotlib.pyplot as plt
from sklearn.cluster import KMeans, DBSCAN
from sklearn.decomposition import PCA
from sklearn.preprocessing import StandardScaler


In [None]:
# Sample dataset: Customer annual income, spending score, and age (Expanded with more data)
data = {'AnnualIncome': [15, 16, 17, 18, 19, 20, 22, 25, 30, 35, 40, 45, 50, 60, 65, 70, 75, 80, 85, 90],
        'SpendingScore': [39, 81, 6, 77, 40, 76, 94, 5, 82, 56, 44, 33, 62, 27, 10, 15, 99, 60, 72, 20],
        'Age': [20, 22, 25, 24, 35, 40, 30, 21, 50, 31, 60, 45, 38, 33, 26, 29, 41, 53, 46, 28]}

# Create DataFrame
df = pd.DataFrame(data)

# Display the first few rows of the dataset
df.head()


In [None]:
# Normalize the dataset using StandardScaler
scaler = StandardScaler()
df_scaled = scaler.fit_transform(df)

# Convert the scaled data back into a DataFrame
df_scaled = pd.DataFrame(df_scaled, columns=['AnnualIncome', 'SpendingScore', 'Age'])
df_scaled.head()


In [None]:
# Apply K-Means clustering with k=3
kmeans = KMeans(n_clusters=3, random_state=42)
df_scaled['KMeans_Cluster'] = kmeans.fit_predict(df_scaled)

# Display cluster assignments
df_scaled[['AnnualIncome', 'SpendingScore', 'Age', 'KMeans_Cluster']].head()


In [None]:
# Apply DBSCAN clustering
dbscan = DBSCAN(eps=0.5, min_samples=2)
df_scaled['DBSCAN_Cluster'] = dbscan.fit_predict(df_scaled)

# Display cluster assignments
df_scaled[['AnnualIncome', 'SpendingScore', 'Age', 'DBSCAN_Cluster']].head()


In [None]:
# Apply PCA to reduce dimensions from 3 to 2
pca = PCA(n_components=2)
df_pca = pca.fit_transform(df_scaled)

# Convert the PCA result back to a DataFrame for easy handling
df_pca = pd.DataFrame(df_pca, columns=['PCA1', 'PCA2'])
df_pca.head()


In [None]:
# Plot K-Means clusters with original data
plt.figure(figsize=(8, 6))
plt.scatter(df_scaled['AnnualIncome'], df_scaled['SpendingScore'], c=df_scaled['KMeans_Cluster'], cmap='viridis')
plt.title('K-Means Clustering of Customers')
plt.xlabel('Annual Income (in thousands)')
plt.ylabel('Spending Score (1-100)')
plt.colorbar(label='Cluster')
plt.show()


In [None]:
# Plot DBSCAN clusters with original data
plt.figure(figsize=(8, 6))
plt.scatter(df_scaled['AnnualIncome'], df_scaled['SpendingScore'], c=df_scaled['DBSCAN_Cluster'], cmap='rainbow')
plt.title('DBSCAN Clustering of Customers')
plt.xlabel('Annual Income (in thousands)')
plt.ylabel('Spending Score (1-100)')
plt.colorbar(label='Cluster')
plt.show()


In [None]:
# Plot PCA components with K-Means clusters
plt.figure(figsize=(8, 6))
plt.scatter(df_pca['PCA1'], df_pca['PCA2'], c=df_scaled['KMeans_Cluster'], cmap='viridis')
plt.title('PCA - Dimensionality Reduction with K-Means Clusters')
plt.xlabel('PCA1')
plt.ylabel('PCA2')
plt.colorbar(label='Cluster')
plt.show()


In [None]:
# Plot PCA components with DBSCAN clusters
plt.figure(figsize=(8, 6))
plt.scatter(df_pca['PCA1'], df_pca['PCA2'], c=df_scaled['DBSCAN_Cluster'], cmap='rainbow')
plt.title('PCA - Dimensionality Reduction with DBSCAN Clusters')
plt.xlabel('PCA1')
plt.ylabel('PCA2')
plt.colorbar(label='Cluster')
plt.show()
