# Clustering

In [1]:
from sklearn import datasets
from sklearn.preprocessing import StandardScaler
from sklearn.cluster import KMeans
from collections import Counter
import warnings
warnings.filterwarnings("ignore")

In [2]:
iris = datasets.load_iris()
features = iris.data
scaler = StandardScaler()
features_std = scaler.fit_transform(features)

In [3]:
cluster = KMeans(n_clusters=3, random_state=0, n_jobs=-1)
model = cluster.fit(features_std)

<u>KMeans clustering</u>

In [4]:
Counter(model.labels_)

Counter({1: 50, 0: 47, 2: 53})

In [5]:
new_observation = [[0.8, 0.8 , 0.8, 0.8]]
model.predict(new_observation)

array([0])

In [6]:
model.cluster_centers_

array([[ 1.13597027,  0.08842168,  0.99615451,  1.01752612],
       [-1.01457897,  0.85326268, -1.30498732, -1.25489349],
       [-0.05021989, -0.88337647,  0.34773781,  0.2815273 ]])

In [7]:
from sklearn.cluster import MiniBatchKMeans

In [8]:
cluster = MiniBatchKMeans(n_clusters=3, random_state=0, batch_size=100)
model = cluster.fit(features_std)

<u>MiniBatchKMeans clustering</u>

In [9]:
Counter(model.labels_)

Counter({1: 50, 2: 41, 0: 59})

In [10]:
from sklearn.cluster import MeanShift

In [11]:
cluster = MeanShift(n_jobs=-1)
model = cluster.fit(features_std)

<u>MeanShift clustering</u>

In [12]:
Counter(model.labels_)

Counter({1: 50, 0: 100})

In [13]:
from sklearn.cluster import DBSCAN

In [14]:
cluster = DBSCAN(n_jobs=-1)
model = cluster.fit(features_std)

<u>DBSCAN clustering</u>

In [15]:
Counter(model.labels_)

Counter({0: 45, -1: 34, 1: 71})

In [16]:
from sklearn.cluster import AgglomerativeClustering

In [17]:
cluster = AgglomerativeClustering(n_clusters=3)
model = cluster.fit(features_std)

<u>Agglomerative clustering</u>

In [18]:
Counter(model.labels_)

Counter({1: 49, 2: 30, 0: 71})