In [4]:
import pandas as pd
import numpy as np
from sklearn.cluster import KMeans
import plotly.express as px
import plotly.graph_objects as go
import matplotlib.pyplot as plt

In [5]:
customersdata = pd.read_csv("/content/Mall_Customers (1).csv")

In [None]:
customersdata.head()

Unnamed: 0,CustomerID,Gender,Age,Annual Income (k$),Spending Score (1-100)
0,1,Male,19,15,39
1,2,Male,21,15,81
2,3,Female,20,16,6
3,4,Female,23,16,77
4,5,Female,31,17,40


In [6]:
customersdata.shape

(200, 5)

In [7]:
kmeans_model = KMeans(init='k-means++',  max_iter=400, random_state=42)

In [8]:
kmeans_model.fit(customersdata[['Age','Annual Income (k$)',
'Spending Score (1-100)']])

KMeans(max_iter=400, random_state=42)

In [9]:
def try_different_clusters(K, data):

    cluster_values = list(range(1, K+1))
    inertias=[]

    for c in cluster_values:
        model = KMeans(n_clusters = c,init='k-means++',max_iter=400,random_state=42)
        model.fit(data)
        inertias.append(model.inertia_)

    return inertias

In [10]:
outputs = try_different_clusters(12, customersdata[['Age','Annual Income (k$)','Spending Score (1-100)']])
distances = pd.DataFrame({"clusters": list(range(1, 13)),"sum of squared distances": outputs})

In [11]:
figure = go.Figure()
figure.add_trace(go.Scatter(x=distances["clusters"], y=distances["sum of squared distances"]))

figure.update_layout(xaxis = dict(tick0 = 1,dtick = 1,tickmode = 'linear'),
                  xaxis_title="Number of clusters",
                  yaxis_title="Sum of squared distances",
                  title_text="Finding optimal number of clusters using elbow method")
figure.show()

In [12]:
kmeans_model_new = KMeans(n_clusters = 5,init='k-means++',max_iter=400,random_state=42)

kmeans_model_new.fit_predict(customersdata[['Age','Annual Income (k$)','Spending Score (1-100)']])

array([0, 4, 0, 4, 0, 4, 0, 4, 0, 4, 0, 4, 0, 4, 0, 4, 0, 4, 0, 4, 0, 4,
       0, 4, 0, 4, 0, 4, 0, 4, 0, 4, 0, 4, 0, 4, 0, 4, 0, 4, 0, 4, 0, 3,
       0, 4, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3,
       3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3,
       3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3,
       3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 1, 2, 1, 3, 1, 2, 1, 2, 1,
       2, 1, 2, 1, 2, 1, 2, 1, 2, 1, 2, 1, 2, 1, 2, 1, 2, 1, 2, 1, 2, 1,
       2, 1, 2, 1, 2, 1, 2, 1, 2, 1, 2, 1, 2, 1, 2, 1, 2, 1, 2, 1, 2, 1,
       2, 1, 2, 1, 2, 1, 2, 1, 2, 1, 2, 1, 2, 1, 2, 1, 2, 1, 2, 1, 2, 1,
       2, 1], dtype=int32)

In [13]:
cluster_centers = kmeans_model_new.cluster_centers_
data = np.expm1(cluster_centers)
points = np.append(data, cluster_centers, axis=1)
points

array([[4.34173717e+19, 2.65358566e+11, 1.20898074e+09, 4.52173913e+01,
        2.63043478e+01, 2.09130435e+01],
       [1.57793399e+14, 3.82980197e+37, 4.65399911e+35, 3.26923077e+01,
        8.65384615e+01, 8.21282051e+01],
       [3.25560375e+17, 9.36317078e+37, 7.93349385e+07, 4.03243243e+01,
        8.74324324e+01, 1.81891892e+01],
       [5.36582750e+18, 6.44514846e+23, 4.39802724e+21, 4.31265823e+01,
        5.48227848e+01, 4.98354430e+01],
       [9.45814564e+10, 1.49009027e+11, 2.93217129e+34, 2.52727273e+01,
        2.57272727e+01, 7.93636364e+01]])

In [14]:
points = np.append(points, [[0], [1], [2], [3], [4]], axis=1)
customersdata["clusters"] = kmeans_model_new.labels_

In [15]:
customersdata.head()

Unnamed: 0,CustomerID,Gender,Age,Annual Income (k$),Spending Score (1-100),clusters
0,1,Male,19,15,39,0
1,2,Male,21,15,81,4
2,3,Female,20,16,6,0
3,4,Female,23,16,77,4
4,5,Female,31,17,40,0


In [16]:
figure = px.scatter_3d(customersdata,
                    color='clusters',
                    x="Age",
                    y="Annual Income (k$)",
                    z="Spending Score (1-100)",
                    category_orders = {"clusters": ["0", "1", "2", "3", "4"]}
                    )
figure.update_layout()
figure.show()

Finding an optimal number of unique customer groups will help you understand how your customers differ, and help you give them exactly what they want. Customer segmentation improves customer experience and boosts company revenue. That’s why segmentation is a must if you want to surpass your competitors and get more customers. 