In [None]:
import numpy as np
import pandas as pd
import matplotlib.pyplot as mpl
%matplotlib inline
import seaborn as sns
from sklearn import datasets
from sklearn.cluster import KMeans

import warnings as wg
wg.filterwarnings("ignore")

In [None]:
iris = datasets.load_iris()
data = pd.DataFrame(iris.data, columns = iris.feature_names)
data.head()

In [None]:
data.info()

In [None]:
# iris_d = iris.drop(columns= ['Species' ,'Id'] )
# iris_d.head()
iris_df = data.iloc[:, [0, 1, 2, 3]].values

In [None]:
# within-cluster sum of squares
wcss = []

for i in range(1, 15):
    kmeans = KMeans(n_clusters=i, init='k-means++', max_iter=300, n_init=10, random_state=0)
    kmeans.fit(iris_df)
    wcss.append(kmeans.inertia_)

In [None]:
# mpl.plot(range(1, 15), wcss)
mpl.rcParams["figure.figsize"] = [5,3]
mpl.plot(range(1, 15), wcss, 'go--', color='green')
mpl.title('The elbow method')
mpl.xlabel('Number of clusters', size='8')
mpl.ylabel('WCSS', size='8') # Within cluster sum of squares
mpl.grid()
mpl.show()

In [None]:
model = KMeans(n_clusters = 3, init = 'k-means++', max_iter = 300, n_init = 10, random_state = 0)
y_kmeans = model.fit_predict(iris_df)

In [None]:
mpl.scatter(iris_df[y_kmeans == 0, 0], iris_df[y_kmeans == 0, 1], s=30, c='orange', label='Iris-setosa')
mpl.scatter(iris_df[y_kmeans == 1, 0], iris_df[y_kmeans == 1, 1], s=30, c='blue', label='Iris-versicolour')
mpl.scatter(iris_df[y_kmeans == 2, 0], iris_df[y_kmeans == 2, 1], s=30, c='green', label='Iris-virginica')

mpl.scatter(model.cluster_centers_[:, 0], model.cluster_centers_[:,1], s = 50, c = 'red', label = 'Centroids')
#Plotting the centroids of the clusters
mpl.grid()
mpl.legend()
mpl.show()

In [None]:
fig = mpl.figure(figsize = (7,7))
ax = fig.add_subplot(111, projection='3d')
mpl.scatter(iris_df[y_kmeans == 0, 0], iris_df[y_kmeans == 0, 1], s=50, c='purple', label='Iris-setosa')
mpl.scatter(iris_df[y_kmeans == 1, 0], iris_df[y_kmeans == 1, 1], s=50, c='orange', label='Iris-versicolour')
mpl.scatter(iris_df[y_kmeans == 2, 0], iris_df[y_kmeans == 2, 1], s=50, c='green', label='Iris-virginica')

#Plotting the centroids of the clusters
mpl.scatter(kmeans.cluster_centers_[:, 0], kmeans.cluster_centers_[:,1], s = 100, c = 'red', label = 'Centroids')
mpl.show()

In [None]:
#considering 0 Corresponds to 'Iris-setosa'
#1 to 'Iris-versicolour'
#2 to 'Iris-virginica'
y_kmeans = np.where(y_kmeans=='0', 'Iris-setosa', y_kmeans)
y_kmeans = np.where(y_kmeans=='1', 'Iris-versicolour', y_kmeans)
y_kmeans = np.where(y_kmeans=='2', 'Iris-virginica', y_kmeans)

data_with_clusters = iris_df.copy()
data_with_clusters["Cluster"] = y_kmeans
print(data_with_clusters.head(5))

In [None]:
sns.set_style('darkgrid')
sns.barplot(x = data_with_clusters["Cluster"] .unique(),
            y = data_with_clusters["Cluster"] .value_counts(),
            palette=sns.color_palette(["#e74c3c", "#34495e", "#2ecc71"]));

In [None]:
sns.violinplot(x="Cluster",y="petal width (cm)",data=data_with_clusters)
plt.show()
sns.violinplot(x="Cluster",y="sepal width (cm)",data=data_with_clusters)
plt.show()
sns.violinplot(x="Cluster",y="petal length (cm)",data=data_with_clusters)
plt.show()
sns.violinplot(x="Cluster",y="sepal length (cm)",data=data_with_clusters)
plt.show()