#**PETAL OF IRIS** 


##KMEANS CLUSTERING
---

###Import Library K-Means
---
Import terlebih dahulu library yang dibutuhkan

In [None]:
import pandas as pd
import seaborn as sns
import numpy as np
import plotly as py
import matplotlib.pyplot as plt
from sklearn import metrics
from sklearn.cluster import KMeans
from sklearn.preprocessing import StandardScaler
from sklearn.metrics import silhouette_score

###Read CSV Files
---
Membaca file .csv yang akan di*cluster*kan.

In [None]:
iris = pd.read_csv('Iris.csv')
iris.head()

###File Shape
---
Menunjukkan banyak kolom dan baris dari data yang sudah di*upload*

In [None]:
iris.shape

###Deskripsi Data
---
Menunjukkan deskripsi dari data yang digunakan.

In [None]:
iris.describe()

###Data Checking
---
Mengecek apakah isi dari data terdapat null atau tidak.

In [None]:
iris.isnull().sum()

###Data Scatter Plot
---
Menyajikan hasil dari scatter plot dari data Iris.

In [None]:
plt.figure(1 , figsize = (15 , 6))
for species in ['Iris-setosa' , 'Iris-versicolor', 'Iris-virginica']:
    plt.scatter(x = 'PetalLengthCm' , y = 'PetalWidthCm' , data = iris[iris['Species'] == species] ,
                s = 200 , alpha = 0.5 , label = species)
plt.xlabel('Petal Length (cm)'), plt.ylabel('Petal Width (cm)') 
plt.title('Petal Of Iris')
plt.legend()
plt.show()

###Elbow Method
---
Menyajikan Elbow Method dari data Iris

In [None]:
iris_x = iris[['PetalLengthCm' , 'PetalWidthCm']].iloc[: , :].values
inertia = []

for n in range(1 , 11):
    kmeans = (KMeans(n_clusters = n ,init='k-means++', n_init = 10 ,max_iter=300, 
                        tol=0.0001,  random_state= 200  , algorithm='elkan') )
    kmeans.fit(iris_x)
    inertia.append(kmeans.inertia_)


In [None]:
plt.figure(1 , figsize = (15 ,6))
plt.plot(np.arange(1 , 11) , inertia , 'o')
plt.plot(np.arange(1 , 11) , inertia , '-' , alpha = 0.5)
plt.xlabel('Number of Clusters') , plt.ylabel('Inertia')
plt.title('Petal Of Iris')
plt.show()

###Pengubahan Data menjadi Array Numpy
---
Data yang ada diubah menjadi Array Numpy untuk mempermudah clustering 

In [None]:
x_array =  np.array(iris_x)
print(x_array)

###Data Preprocessing
---
Melakukan standardisasi pada nilai array menggunakan StandardScaler

In [None]:
scaler = StandardScaler()
x_scaled = scaler.fit_transform(x_array)
x_scaled

###K-Means Clustering
---
Melakukan clustering pada data Iris dan menentukan centroidnya.

In [None]:
kmeans = (KMeans(n_clusters = 5 ,init='k-means++', n_init = 10 ,max_iter=300, 
                        tol=0.0001,  random_state= 200  , algorithm='elkan') )

kmeans.fit(iris_x)
centroid = kmeans.cluster_centers_
iris["Cluster"] = kmeans.labels_
iris.head()

###Scatter Plot setelah Clustering
---
Scatter plot hasil clustering menggunakan K-Means.

In [None]:
h = 0.02
x_min, x_max = iris_x[:, 0].min() - 1, iris_x[:, 0].max() + 1
y_min, y_max = iris_x[:, 1].min() - 1, iris_x[:, 1].max() + 1
xx, yy = np.meshgrid(np.arange(x_min, x_max, h), np.arange(y_min, y_max, h))
Z = kmeans.predict(np.c_[xx.ravel(), yy.ravel()]) 

In [None]:
plt.figure(1 , figsize = (15 , 7) )
plt.clf()
Z = Z.reshape(xx.shape)
plt.imshow(Z , interpolation='nearest', 
           extent=(xx.min(), xx.max(), yy.min(), yy.max()),
           cmap = plt.cm.Pastel2, aspect = 'auto', origin='lower')

plt.scatter( x = 'PetalLengthCm' ,y = 'PetalWidthCm' , data = iris , c = iris["Cluster"] , s = 200 )
plt.scatter(x = centroid[: , 0] , y =  centroid[: , 1] , s = 300 , c = 'red' , alpha = 0.5)
plt.ylabel('Spending Score (1-100)') , plt.xlabel('Age')
plt.show()

###Silhouette Score
---
Menampilkan silhouette score dari hasil clustering.

In [None]:
score = silhouette_score(x_scaled, kmeans.labels_, metric="euclidean")

score

0.5306451277220161

##AGGLOMERATIVE HIERARCHY CLUSTERING
---

###Import Library AHC
---
Import library yang digunakan untuk Agglomerrative Hierarchy Clustering.

In [None]:
from scipy.cluster import hierarchy
from scipy.spatial import distance_matrix
from sklearn.cluster import AgglomerativeClustering
from sklearn.preprocessing import StandardScaler
import scipy.cluster.hierarchy
from scipy.cluster.hierarchy import fcluster

###Data Preprocessing
---
Melakukan standardisasi kembali untuk AHC.

In [None]:
aglo_iris = StandardScaler().fit_transform(iris_x)
aglo_iris

###Agglomerative Hierarchy Clustering Linkage Type

####Clustering dengan Complete Linkage AHC
---
Melakukan hierarchy clustering dengan metode complete linkage.

#####Penentuan Cluster
---
Menentukan cluster dalam data Iris.

In [None]:
agglom = AgglomerativeClustering(n_clusters=4, linkage='complete')
agglom.fit(aglo_iris)
agglom.labels_

#####Distance Matrix 
---
Menentukan jarak matriks data Iris.

In [None]:
dist_matrix = distance_matrix(aglo_iris,aglo_iris)
dist_matrix

#####AHC Complete Linkage Dendogram
---
Menampilkan dendogram hasil AHC complete linkage.

In [None]:
Z = hierarchy.linkage(dist_matrix, 'complete')
dendro = hierarchy.dendrogram(Z)

#####Silhouette Score
---
Menunjukkan silhouette score dari complete linkage.


In [None]:
clusters = fcluster(Z, 4, criterion='maxclust')
metrics.silhouette_score(iris_x,clusters)

####Clustering dengan Single Linkage AHC
---
Melakukan hierarchy clustering dengan metode single linkage.

#####Penentuan Cluster
---
Menentukan cluster untuk AHC

In [None]:
aggloms = AgglomerativeClustering(n_clusters=4, linkage='single')
aggloms.fit(aglo_iris)
aggloms.labels_

#####AHC Single Linkage Dendogram
---
Menampilkan dendogram dari hasil AHC Single Linkage

In [None]:
Zs = hierarchy.linkage(dist_matrix, 'single')
dendros = hierarchy.dendrogram(Zs)

#####Silhouette Score
---
Menampilkan silhouette score untuk AHC Single Linkage

In [None]:
clusters = fcluster(Zs, 4, criterion='maxclust')
metrics.silhouette_score(iris_x,clusters)

####Clustering dengan Average Linkage AHC
---
Menampilkan silhouette score untuk AHC Average Linkage

#####Penentuan Cluster
---
Menampilkan silhouette score untuk AHC Average Linkage

In [None]:
aggloavg = AgglomerativeClustering(n_clusters=4, linkage='average')
aggloavg.fit(aglo_iris)
aggloavg.labels_

#####AHC Average Linkage Dendogram
---
Menampilkan dendogram hasil AHC Average Linkage.

In [None]:
Zavg = hierarchy.linkage(dist_matrix, 'average')
dendravg = hierarchy.dendrogram(Zavg)

#####Silhouette Score
---
Menampilkan silhouette score untuk AHC Average Linkage

In [None]:
clusters = fcluster(Zavg, 4, criterion='maxclust')
metrics.silhouette_score(iris_x,clusters)