# Clarans on the IRIS Dataset

<div class="alert alert-block alert-info">
<b>Content:</b> In this notebook, we test and time Clarans using the IRIS dataset.
</div>



In [None]:
import time
import pandas as pd
import matplotlib.pyplot as plt

from sklearn import datasets
from pyclustering.cluster.clarans import clarans

# Load IRIS
Again we will use only the petal features. 
This feature selection is just for the purpose of creating a 2D dataset, that can be visualized.
This is not something one would normally do!

In [None]:
iris=pd.read_csv("data/iris.csv")
X=iris[['petal_length', 'petal_width']].to_numpy()

Run Clarans on the IRIS Dataset, using only features 2 and 3

In [None]:
# Clarans does not offer a parameter for random seed. Internally, random is initialized with system time
cla = clarans(X, number_clusters=3, numlocal=10, maxneighbor=10)

In [None]:
time_start=time.time()
cla.process()
print(time.time()-time_start) # returns the time in seconds

In [None]:
cluster_array=cla.get_clusters()
len(cluster_array)

Show the distribution of instances over clusters

In [None]:
for i in range(0,len(cluster_array)):
    print("Cluster ", i, " has ", len(cluster_array[i]), " data instances.")

Print the resulting clustering and medoids

In [None]:
cla.get_medoids()

In [None]:
X[cla.get_medoids(), :]

In [None]:
colors=['darkorange', 'darkmagenta', 'dodgerblue']
for i in range(0,len(cluster_array)):
    plt.scatter(X[cluster_array[i], 0], X[cluster_array[i], 1], c=colors[i], label=i)

plt.scatter(X[cla.get_medoids(), 0], X[cla.get_medoids(), 1], s=200,
           linewidth=1, facecolors='none', edgecolors='black')

<div class="alert alert-block alert-info">
<b>Take Aways:</b> 

* In this notebook, we have run Clarans.
* The resulting cluster representatives are indeed members of the dataset -- thus prototypical instances for their clusters.
* Runtime is much higher compared to k-means!
</div>