In [16]:
# CLUSTERING

import numpy as np

from sklearn import datasets
from sklearn.preprocessing import StandardScaler
from sklearn.cluster import KMeans, MiniBatchKMeans, MeanShift, DBSCAN
from sklearn.cluster import AgglomerativeClustering

In [4]:
# CLUSTERING USING K-MEANS

# load data
iris = datasets.load_iris()
features = iris.data

# standardize features
scaler = StandardScaler()
features_standardized = scaler.fit_transform(features)

# create k-mean object
cluster = KMeans(n_clusters=3, 
                random_state=0,
                n_jobs=-1)
# train model
model = cluster.fit(features_standardized)
# view predict class
model.labels_



array([1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1,
       1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1,
       1, 1, 1, 1, 1, 1, 2, 2, 2, 0, 0, 0, 2, 0, 0, 0, 0, 0, 0, 0, 0, 2,
       0, 0, 0, 0, 2, 0, 0, 0, 0, 2, 2, 2, 0, 0, 0, 0, 0, 0, 0, 2, 2, 0,
       0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 2, 0, 2, 2, 2, 2, 0, 2, 2, 2,
       2, 2, 2, 0, 0, 2, 2, 2, 2, 0, 2, 0, 2, 0, 2, 2, 0, 2, 2, 2, 2, 2,
       2, 0, 0, 2, 2, 2, 0, 2, 2, 2, 0, 2, 2, 2, 0, 2, 2, 0])

In [5]:
# Create new observation
new_observation = [[0.8, 0.8, 0.8, 0.8]]
# predict cluster
model.predict(new_observation)

array([2])

In [7]:
# view cluster centers
model.cluster_centers_

array([[-0.05021989, -0.88337647,  0.34773781,  0.2815273 ],
       [-1.01457897,  0.85326268, -1.30498732, -1.25489349],
       [ 1.13597027,  0.08842168,  0.99615451,  1.01752612]])

In [9]:
# SPEED UP K-MEANS CLUSTERING

# Load data
iris = datasets.load_iris()
features = iris.data
# Standardize features
scaler = StandardScaler()
features_std = scaler.fit_transform(features)

# create k-mean object
cluster = MiniBatchKMeans(n_clusters=3,
                         random_state=0,
                         batch_size=100)
# train model
model = cluster.fit(features_std)



In [11]:
# CLUSTERING USING MEANSHIFT

# load data
iris = datasets.load_iris()
features = iris.data

# standardize features
scaler = StandardScaler()
features_std = scaler.fit_transform(features)

# create meanshift object
cluster = MeanShift(n_jobs=-1)
# train model
model = cluster.fit(features_std)

In [14]:
# CLUSTERING USING DBSCAN

# load data
iris = datasets.load_iris()
features = iris.data

# standardize features
scaler = StandardScaler()
features_std = scaler.fit_transform(features)

# create meanshift object
cluster = DBSCAN(n_jobs=-1)

# train model
model = cluster.fit(features_std)
# show cluster membership
model.labels_

array([ 0,  0,  0,  0,  0,  0,  0,  0,  0,  0,  0,  0,  0,  0, -1, -1,  0,
        0,  0,  0,  0,  0,  0,  0,  0,  0,  0,  0,  0,  0,  0,  0, -1, -1,
        0,  0,  0,  0,  0,  0,  0, -1,  0,  0,  0,  0,  0,  0,  0,  0,  1,
        1,  1,  1,  1,  1, -1, -1,  1, -1, -1,  1, -1,  1,  1,  1,  1,  1,
       -1,  1,  1,  1, -1,  1,  1,  1,  1,  1,  1,  1,  1,  1,  1,  1,  1,
       -1,  1, -1,  1,  1,  1,  1,  1, -1,  1,  1,  1,  1, -1,  1, -1,  1,
        1,  1,  1, -1, -1, -1, -1, -1,  1,  1,  1,  1, -1,  1,  1, -1, -1,
       -1,  1,  1, -1,  1,  1, -1,  1,  1,  1, -1, -1, -1,  1,  1,  1, -1,
       -1,  1,  1,  1,  1,  1,  1,  1,  1,  1,  1,  1, -1,  1],
      dtype=int64)

In [18]:
# LUSTERING USING HIERARCHIAL MERGING

# Load data
iris = datasets.load_iris()
features = iris.data

# Standardize features
scaler = StandardScaler()
features_std = scaler.fit_transform(features)

# create cluster object
cluster = AgglomerativeClustering(n_clusters=3)
# train model
model = cluster.fit(features_std)
# show cluster membership
model.labels_

array([1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1,
       1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 2, 1, 1,
       1, 1, 1, 1, 1, 1, 0, 0, 0, 2, 0, 2, 0, 2, 0, 2, 2, 0, 2, 0, 2, 0,
       2, 2, 2, 2, 0, 0, 0, 0, 0, 0, 0, 0, 0, 2, 2, 2, 2, 0, 2, 0, 0, 2,
       2, 2, 2, 0, 2, 2, 2, 2, 2, 0, 2, 2, 0, 0, 0, 0, 0, 0, 2, 0, 0, 0,
       0, 0, 0, 0, 0, 0, 0, 0, 0, 2, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
       0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0], dtype=int64)