### Clustering with DBSCAN

In [37]:
import pandas as pd
import numpy as np

import seaborn as sns
import matplotlib.pyplot as plt
import plotly.express as px

from sklearn import datasets
from sklearn.cluster import KMeans,DBSCAN
from sklearn.metrics import silhouette_score

In [2]:
data = datasets.load_iris(as_frame=True)['data']

In [5]:
data.rename(columns={'sepal length (cm)':'sepal length',
                      'sepal width (cm)':'sepal width',
                      'petal length (cm)':'petal length',
                      'petal width (cm)':'petal wodth'}, inplace =True)

In [49]:
model = DBSCAN(eps=0.6, min_samples= 5)
model.fit(data)
preds = model.labels_
score = silhouette_score(data, preds)
score

0.5381725940321752

In [50]:
preds

array([ 0,  0,  0,  0,  0,  0,  0,  0,  0,  0,  0,  0,  0,  0,  0,  0,  0,
        0,  0,  0,  0,  0,  0,  0,  0,  0,  0,  0,  0,  0,  0,  0,  0,  0,
        0,  0,  0,  0,  0,  0,  0, -1,  0,  0,  0,  0,  0,  0,  0,  0,  1,
        1,  1,  1,  1,  1,  1, -1,  1,  1, -1,  1,  1,  1,  1,  1,  1,  1,
        1,  1,  1,  1,  1,  1,  1,  1,  1,  1,  1,  1,  1,  1,  1,  1,  1,
        1,  1,  1,  1,  1,  1,  1,  1, -1,  1,  1,  1,  1, -1,  1,  1,  1,
        1,  1,  1,  1, -1,  1,  1, -1,  1,  1,  1,  1,  1,  1,  1, -1,  1,
        1,  1,  1,  1,  1,  1,  1,  1,  1,  1,  1,  1, -1,  1,  1,  1,  1,
        1,  1,  1,  1,  1,  1,  1,  1,  1,  1,  1,  1,  1,  1],
      dtype=int64)

In [27]:
px.scatter(data_frame= data, x = 'sepal length', y = 'petal length',
           color = preds)

In [47]:
epsilons = [x/100 for x in range(5,500) if x % 5 == 0]
cluster = {}

for epsilon in epsilons:
    model = DBSCAN(eps=epsilon, min_samples= 5)
    model.fit(data)
    preds = pd.Series(model.labels_)
    if preds.nunique() <= 1:
        pass
    else:
        score = silhouette_score(data, preds)
        cluster.update({f'epsilon = {epsilon}, clusters = {preds.nunique()}': score})
        print('================================')
        print(f'epsilon: {epsilon}, number of clusters: {preds.nunique()}, silhouette score: {score}')

print(max(list(cluster.values())))

epsilon: 0.15, number of clusters: 2, silhouette score: 0.09264605166785017
epsilon: 0.2, number of clusters: 3, silhouette score: 0.1825642832387009
epsilon: 0.25, number of clusters: 3, silhouette score: -0.05098771166058551
epsilon: 0.3, number of clusters: 4, silhouette score: -0.052064296410956626
epsilon: 0.35, number of clusters: 7, silhouette score: 0.11618432226093098
epsilon: 0.4, number of clusters: 5, silhouette score: 0.2779400139443995
epsilon: 0.45, number of clusters: 3, silhouette score: 0.4597947157059908
epsilon: 0.5, number of clusters: 3, silhouette score: 0.4860341970345691
epsilon: 0.55, number of clusters: 3, silhouette score: 0.5005124887072625
epsilon: 0.6, number of clusters: 3, silhouette score: 0.5381725940321752
epsilon: 0.65, number of clusters: 3, silhouette score: 0.501833562046432
epsilon: 0.7, number of clusters: 3, silhouette score: 0.501833562046432
epsilon: 0.75, number of clusters: 3, silhouette score: 0.5121107753649314
epsilon: 0.8, number of cl

array([0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
       0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
       0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
       0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
       0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
       0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
       0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0], dtype=int64)