<a href="https://colab.research.google.com/github/Muyiiwaa/machine_learning_notes/blob/master/DBSCAN.ipynb" target="_parent"><img src="https://colab.research.google.com/assets/colab-badge.svg" alt="Open In Colab"/></a>

In [None]:
import pandas as pd
import numpy as np
from sklearn.datasets import load_iris
from sklearn.cluster import DBSCAN
import matplotlib.pyplot as plt
import seaborn as sns
from sklearn.metrics import silhouette_score
import plotly.express as px

In [None]:
data = load_iris(as_frame=True)['data']
data.head()

Unnamed: 0,sepal length (cm),sepal width (cm),petal length (cm),petal width (cm)
0,5.1,3.5,1.4,0.2
1,4.9,3.0,1.4,0.2
2,4.7,3.2,1.3,0.2
3,4.6,3.1,1.5,0.2
4,5.0,3.6,1.4,0.2


In [None]:
model = DBSCAN()
model.fit(data)
preds = model.labels_

In [None]:
preds

array([ 0,  0,  0,  0,  0,  0,  0,  0,  0,  0,  0,  0,  0,  0,  0,  0,  0,
        0,  0,  0,  0,  0,  0,  0,  0,  0,  0,  0,  0,  0,  0,  0,  0,  0,
        0,  0,  0,  0,  0,  0,  0, -1,  0,  0,  0,  0,  0,  0,  0,  0,  1,
        1,  1,  1,  1,  1,  1, -1,  1,  1, -1,  1,  1,  1,  1,  1,  1,  1,
       -1,  1,  1,  1,  1,  1,  1,  1,  1,  1,  1,  1,  1,  1,  1,  1,  1,
        1,  1, -1,  1,  1,  1,  1,  1, -1,  1,  1,  1,  1, -1,  1,  1,  1,
        1,  1,  1, -1, -1,  1, -1, -1,  1,  1,  1,  1,  1,  1,  1, -1, -1,
        1,  1,  1, -1,  1,  1,  1,  1,  1,  1,  1,  1, -1,  1,  1, -1, -1,
        1,  1,  1,  1,  1,  1,  1,  1,  1,  1,  1,  1,  1,  1])

### MANUAL HYPER-PARAMETER TUNING FOR DBSCAN

In [None]:
epsilons = [x/100 for x in range(10, 101) if x % 5 == 0]
samples = list(range(3, 10))

for epsilon in epsilons:
  for sample in samples:
    model = DBSCAN(eps=epsilon, min_samples=sample)
    try:
      model.fit(X=data)
      preds = model.labels_
      score = silhouette_score(X=data, labels=preds)
    except ValueError:
      score = None
    print(f"Epsilon: {epsilon}, Min sample: {sample}.....score: {score}")

Epsilon: 0.1, Min sample: 3.....score: None
Epsilon: 0.1, Min sample: 4.....score: None
Epsilon: 0.1, Min sample: 5.....score: None
Epsilon: 0.1, Min sample: 6.....score: None
Epsilon: 0.1, Min sample: 7.....score: None
Epsilon: 0.1, Min sample: 8.....score: None
Epsilon: 0.1, Min sample: 9.....score: None
Epsilon: 0.15, Min sample: 3.....score: -0.3176350649922472
Epsilon: 0.15, Min sample: 4.....score: 0.13873657641911902
Epsilon: 0.15, Min sample: 5.....score: 0.09264605166785017
Epsilon: 0.15, Min sample: 6.....score: 0.07957245123544254
Epsilon: 0.15, Min sample: 7.....score: None
Epsilon: 0.15, Min sample: 8.....score: None
Epsilon: 0.15, Min sample: 9.....score: None
Epsilon: 0.2, Min sample: 3.....score: -0.34352957424599945
Epsilon: 0.2, Min sample: 4.....score: -0.32980543833913967
Epsilon: 0.2, Min sample: 5.....score: 0.1825642832387009
Epsilon: 0.2, Min sample: 6.....score: 0.1584293948698758
Epsilon: 0.2, Min sample: 7.....score: 0.12266342750596144
Epsilon: 0.2, Min samp

In [None]:
preds

array([-1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1,
       -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1,
       -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1,
       -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1,
       -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1,
       -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1,
       -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1,
       -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1,
       -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1])

In [None]:
px.scatter_3d(data_frame = data, x = "sepal length (cm)", y="sepal width (cm)",
              z= "petal length (cm)", color = preds)