
# DBSCAN Clustering on IRIS Dataset


The Iris dataset contains the data for 50 flowers from each of the 3 species - Setosa, Versicolor and Virginica.
The data gives the measurements in centimeters of the following variables for each of the flowers:
Sepal length and width
Petal length and width
![IRIS.png](attachment:IRIS.png)




In [None]:
import numpy as np
import matplotlib.pyplot as plt
import pandas as pd

In [None]:
from sklearn.datasets import load_iris
iris = load_iris()

In [None]:
print(iris.DESCR)

In [None]:
X = iris.data

In [None]:
X

In [None]:
y = iris.target

In [None]:
y

In [None]:
print(X.shape)

In [None]:
from sklearn.cluster import DBSCAN
dbscan = DBSCAN(eps=0.5, min_samples=4)

In [None]:
#Fitting the model
DBModel = dbscan.fit(X)
DBModel

In [None]:
#Now let's grab the labels for each point in the model.
labels = DBModel.labels_
labels

In [None]:
np.unique(labels)

In [None]:
n_clusters=len(set(labels))-(1 if -1 in labels else 0)
n_clusters

In [None]:
#Plotting Points
plt.scatter(X[:,0],X[:,1])
plt.xlabel('Sepal Length (cm)', fontsize=16)
plt.ylabel('Sepal Width (cm)', fontsize=16)

In [None]:
plt.scatter(X[labels == -1, 0], X[labels == -1, 1], s = 30, c = 'black', label = 'noise')
plt.scatter(X[labels == 0, 0], X[labels == 0, 1], s = 30, c = 'red', label = 'Cluster 1')
plt.scatter(X[labels == 1, 0], X[labels == 1, 1], s = 30, c = 'blue', label = 'Cluster 2')
plt.scatter(X[labels == 2, 0], X[labels == 2, 1], s = 30, c = 'green', label = 'Cluster 3')


plt.title('Clusters of IRIS Flowers')
plt.xlabel('Sepal Length (cm)')
plt.ylabel('Sepal Width (cm)')
plt.legend()
plt.show()

In [None]:
help(DBSCAN)