In [1]:
import numpy as np
import matplotlib.pyplot as plt
import pandas as pd
from sklearn.cluster import DBSCAN
from itertools import cycle


dataset = pd.read_csv('Mall_Customers.csv')


X = dataset.iloc[:, [3, 4]].values


dbscan = DBSCAN(eps=3, min_samples=5)
y_dbscan = dbscan.fit_predict(X)
labels = dbscan.labels_


n_clusters_ = len(set(labels)) - (1 if -1 in labels else 0)
print(f"Estimated number of clusters: {n_clusters_}")


for k in range(n_clusters_):
    class_members = (labels == k)
    cluster_center = np.mean(X[class_members], axis=0)
    print(f"Cluster {k} center coordinates: {cluster_center}")


plt.figure(figsize=(10, 7))
plt.clf()


colors = cycle(['#dede00', '#377eb8', '#f781bf', '#4daf4a', '#e41a1c', '#ff7f00'])
markers = cycle(['x', 'o', '^', 's', 'D', '*'])

for k, col, marker in zip(range(n_clusters_), colors, markers):
    class_members = (labels == k)
    core_samples_mask = np.zeros_like(labels, dtype=bool)
    core_samples_mask[dbscan.core_sample_indices_] = True
    xy = X[class_members & core_samples_mask]
    plt.plot(xy[:, 0], xy[:, 1], marker, markerfacecolor=col, markersize=10, linestyle='None', label=f'Cluster {k}')
    
    cluster_center = np.mean(xy, axis=0)
    plt.plot(cluster_center[0], cluster_center[1], marker, markerfacecolor=col, 
             markeredgecolor='k', markersize=14)
    
    xy = X[class_members & ~core_samples_mask]
    plt.plot(xy[:, 0], xy[:, 1], marker, markerfacecolor=col, markersize=6)


if -1 in labels:
    outliers = (labels == -1)
    plt.plot(X[outliers, 0], X[outliers, 1], 'k+', markersize=6, label='Outliers')

plt.title(f'Clusters identified by DBSCAN (Estimated clusters: {n_clusters_})')
plt.xlabel('Feature 1')
plt.ylabel('Feature 2')
plt.legend()
plt.savefig('dbscan_clusters.png')  
plt.show() 


Estimated number of clusters: 5
Cluster 0 center coordinates: [47.22222222 49.33333333]
Cluster 1 center coordinates: [54.375 53.125]
Cluster 2 center coordinates: [54. 44.]
Cluster 3 center coordinates: [61.91666667 46.29166667]
Cluster 4 center coordinates: [72.2 73.2]


Deprecated in NumPy 1.20; for more details and guidance: https://numpy.org/devdocs/release/1.20.0-notes.html#deprecations
  method='lar', copy_X=True, eps=np.finfo(np.float).eps,
Deprecated in NumPy 1.20; for more details and guidance: https://numpy.org/devdocs/release/1.20.0-notes.html#deprecations
  method='lar', copy_X=True, eps=np.finfo(np.float).eps,
Deprecated in NumPy 1.20; for more details and guidance: https://numpy.org/devdocs/release/1.20.0-notes.html#deprecations
  eps=np.finfo(np.float).eps, copy_Gram=True, verbose=0,
Deprecated in NumPy 1.20; for more details and guidance: https://numpy.org/devdocs/release/1.20.0-notes.html#deprecations
  eps=np.finfo(np.float).eps, copy_X=True, fit_path=True,
Deprecated in NumPy 1.20; for more details and guidance: https://numpy.org/devdocs/release/1.20.0-notes.html#deprecations
  eps=np.finfo(np.float).eps, copy_X=True, fit_path=True,
Deprecated in NumPy 1.20; for more details and guidance: https://numpy.org/devdocs/release/1.20.0-notes

<Figure size 1000x700 with 1 Axes>

In [1]:
import pandas as pd
import numpy as np
import matplotlib.pyplot as plt
from sklearn.cluster import DBSCAN

dataset = pd.read_csv('Mall_Customers.csv')
X = dataset.iloc[:, [3, 4]].values


dbscan = DBSCAN(eps=3, min_samples=5)
labels = dbscan.fit_predict(X)


plt.figure(figsize=(10, 7))
unique_labels = set(labels)
colors = [plt.cm.Spectral(each) for each in np.linspace(0, 1, len(unique_labels))]

for k, col in zip(unique_labels, colors):
    if k == -1:
        col = [0, 0, 0, 1] 

    class_members = (labels == k)
    plt.plot(X[class_members, 0], X[class_members, 1], 'o', markerfacecolor=tuple(col), 
             markeredgecolor='k', markersize=10 if k != -1 else 6, label='Cluster' + str(k))

plt.title(f'Clusters identified by DBSCAN (Estimated clusters: {len(unique_labels) - (1 if -1 in labels else 0)})')
plt.xlabel('Feature 1')
plt.ylabel('Feature 2')
plt.legend()
plt.savefig('dbscan_clusters.png')
plt.show()


Deprecated in NumPy 1.20; for more details and guidance: https://numpy.org/devdocs/release/1.20.0-notes.html#deprecations
  method='lar', copy_X=True, eps=np.finfo(np.float).eps,
Deprecated in NumPy 1.20; for more details and guidance: https://numpy.org/devdocs/release/1.20.0-notes.html#deprecations
  method='lar', copy_X=True, eps=np.finfo(np.float).eps,
Deprecated in NumPy 1.20; for more details and guidance: https://numpy.org/devdocs/release/1.20.0-notes.html#deprecations
  eps=np.finfo(np.float).eps, copy_Gram=True, verbose=0,
Deprecated in NumPy 1.20; for more details and guidance: https://numpy.org/devdocs/release/1.20.0-notes.html#deprecations
  eps=np.finfo(np.float).eps, copy_X=True, fit_path=True,
Deprecated in NumPy 1.20; for more details and guidance: https://numpy.org/devdocs/release/1.20.0-notes.html#deprecations
  eps=np.finfo(np.float).eps, copy_X=True, fit_path=True,
Deprecated in NumPy 1.20; for more details and guidance: https://numpy.org/devdocs/release/1.20.0-notes

<Figure size 1000x700 with 1 Axes>