<a href="https://colab.research.google.com/github/22Ifeoma22/22Ifeoma22/blob/main/MultipleClusteringModels.ipynb" target="_parent"><img src="https://colab.research.google.com/assets/colab-badge.svg" alt="Open In Colab"/></a>

In [32]:
import pandas as pd
import numpy as np
import matplotlib.pyplot as plt
import matplotlib


In [33]:
from sklearn import metrics
from sklearn.cluster import KMeans
from sklearn.cluster import AgglomerativeClustering
from sklearn.cluster import DBSCAN
from sklearn.cluster import MeanShift
from sklearn.cluster import Birch
from sklearn.cluster import AffinityPropagation
from sklearn.cluster import MiniBatchKMeans
import warnings
warnings.filterwarnings("ignore")


In [34]:
iris_df = pd.read_csv("/content/sample_data/iris.csv",
                        skiprows=1,
                         names = ['sepal-length',
                               'sepal-width',
                                'petal-length',
                                'petal-width',
                                 'class'])
iris_df.head()

Unnamed: 0,sepal-length,sepal-width,petal-length,petal-width,class
0,5.1,3.5,1.4,0.2,Iris-setosa
1,4.9,3.0,1.4,0.2,Iris-setosa
2,4.7,3.2,1.3,0.2,Iris-setosa
3,4.6,3.1,1.5,0.2,Iris-setosa
4,5.0,3.6,1.4,0.2,Iris-setosa


In [35]:
iris_df = iris_df.sample(frac=1).reset_index(drop=True)
iris_df.head()



Unnamed: 0,sepal-length,sepal-width,petal-length,petal-width,class
0,5.0,3.2,1.2,0.2,Iris-setosa
1,6.1,2.8,4.7,1.2,Iris-versicolor
2,7.7,2.8,6.7,2.0,Iris-virginica
3,7.2,3.6,6.1,2.5,Iris-virginica
4,6.7,3.3,5.7,2.1,Iris-virginica


In [36]:
iris_df.shape

(150, 5)

In [37]:
from sklearn import preprocessing
label_encoding = preprocessing.LabelEncoder()
iris_df['class'] = label_encoding.fit_transform(iris_df['class'].astype(str))

iris_df.head()


Unnamed: 0,sepal-length,sepal-width,petal-length,petal-width,class
0,5.0,3.2,1.2,0.2,0
1,6.1,2.8,4.7,1.2,1
2,7.7,2.8,6.7,2.0,2
3,7.2,3.6,6.1,2.5,2
4,6.7,3.3,5.7,2.1,2


In [38]:
iris_features = iris_df.drop('class', axis=1)

iris_features.head()


Unnamed: 0,sepal-length,sepal-width,petal-length,petal-width
0,5.0,3.2,1.2,0.2
1,6.1,2.8,4.7,1.2
2,7.7,2.8,6.7,2.0
3,7.2,3.6,6.1,2.5
4,6.7,3.3,5.7,2.1


In [46]:
iris_labels = iris_df['class']
iris_labels.sample(5)



138    0
146    2
143    2
133    0
50     0
Name: class, dtype: int64

In [51]:
def build_model(clustering_model, data, labels):

      model = clustering_model(data)

      print('homo\\tcompl\\tv-meas\\tARI\\tAMI\\tsilhouette')
      print(50 * '-')

      print('%.3f\\t%.3f\\t%.3f\\t%.3f\\t%.3f\\t%.3f'
             %(metrics.homogeneity_score(labels, model.labels_),
              metrics.completeness_score(labels, model.labels_),
              metrics.v_measure_score(labels, model.labels_),
              metrics.adjusted_rand_score(labels, model.labels_),
              metrics.adjusted_mutual_info_score(labels,  model.labels_),
              metrics.silhouette_score(data, model.labels_)))



In [54]:
def k_means(data, n_clusters=3, max_iter=1000):
       model = KMeans(n_clusters=n_clusters, max_iter=max_iter).fit(data)
       return model



In [55]:
build_model(k_means, iris_features, iris_labels)

homo\tcompl\tv-meas\tARI\tAMI\tsilhouette
--------------------------------------------------
0.751\t0.765\t0.758\t0.730\t0.755\t0.553


In [57]:
def agglomerative_fn(data, n_clusters=3):
    model = AgglomerativeClustering(n_clusters = n_clusters).fit(data)

    return model


In [59]:
build_model(agglomerative_fn, iris_features, iris_labels)

homo\tcompl\tv-meas\tARI\tAMI\tsilhouette
--------------------------------------------------
0.761\t0.780\t0.770\t0.731\t0.767\t0.554


In [60]:
def dbscan_fn(data, eps=0.45, min_samples=4):
     model = DBSCAN(eps=eps, min_samples=min_samples).fit(data)
     return model



In [61]:
build_model(dbscan_fn, iris_features, iris_labels)

homo\tcompl\tv-meas\tARI\tAMI\tsilhouette
--------------------------------------------------
0.577\t0.609\t0.593\t0.508\t0.584\t0.372


In [62]:
def mean_shift_fn(data, bandwidth=0.85):
        model = MeanShift(bandwidth=bandwidth).fit(data)
        return model


In [63]:
build_model(mean_shift_fn, iris_features, iris_labels)

homo\tcompl\tv-meas\tARI\tAMI\tsilhouette
--------------------------------------------------
0.760\t0.772\t0.766\t0.744\t0.763\t0.551


In [64]:
def birch_fn(data, n_clusters=3):
       model = Birch(n_clusters=n_clusters).fit(data)
       return model



In [70]:
build_model(birch_fn, iris_features, iris_labels)

homo\tcompl\tv-meas\tARI\tAMI\tsilhouette
--------------------------------------------------
0.700\t0.745\t0.722\t0.642\t0.718\t0.513


In [71]:
def affinity_propagation_fn(data, damping=0.6, max_iter=1000):
       model = AffinityPropagation(damping=damping, max_iter=max_iter).fit(data)
       return model



In [72]:
build_model(affinity_propagation_fn, iris_features, iris_labels)

homo\tcompl\tv-meas\tARI\tAMI\tsilhouette
--------------------------------------------------
0.851\t0.492\t0.623\t0.437\t0.612\t0.349


In [73]:
def mini_batch_kmeans_fn(data, n_clusters=3, max_iter=1000):
        model = MiniBatchKMeans(n_clusters=n_clusters, max_iter=max_iter, batch_size=20).fit(data)
        return model



In [74]:
build_model(mini_batch_kmeans_fn, iris_features, iris_labels)

homo\tcompl\tv-meas\tARI\tAMI\tsilhouette
--------------------------------------------------
0.787\t0.809\t0.798\t0.746\t0.795\t0.555


In [69]:
"Spectral Clustering"

'Spectral Clustering'

In [65]:
from sklearn.cluster import SpectralClustering

In [75]:
SS = 1000 # Self similarity

In [76]:
IS = 10 # intra similar metrics

In [77]:
LS = 0.01 # Low similarity

In [78]:
similarity_mat = [[SS, IS, IS, LS, LS, LS, LS, LS, LS],
                  [IS, SS, IS, LS, LS, LS, LS, LS, LS],
                  [IS, IS, SS, LS, LS, LS, LS, LS, LS],
                  [LS, LS, LS, SS, IS, IS, LS, LS, LS],
                  [LS, LS, LS, IS, SS, IS, LS, LS, LS],
                  [LS, LS, LS, IS, IS, SS, LS, LS, LS],
                  [LS, LS, LS, LS, LS, LS, SS, IS, IS],
                  [LS, LS, LS, LS, LS, LS, IS, SS, IS],
                  [LS, LS, LS, LS, LS, LS, IS, IS, SS]]


In [79]:
spectral_model = SpectralClustering(n_clusters=3, affinity='precomputed').fit(similarity_mat)

In [80]:
spectral_model.labels_

array([1, 1, 1, 2, 2, 2, 0, 0, 0], dtype=int32)