## Importing The Dependencies

In [None]:
import numpy as np
import pandas as pd
import matplotlib.pyplot as plt
import plotly.express as px
import plotly.graph_objects as go
import seaborn as sns

# Loading the data

In [None]:
iris = pd.read_csv('iris.csv')
iris.drop('Id',inplace=True,axis=1)

In [None]:
iris.head()

In [None]:
X  = iris.iloc[:,:-1]
y = iris.iloc[:,-1]

In [None]:
iris.head().style.background_gradient(cmap = sns.cubehelix_palette(as_cmap=True))

# EDA

In [None]:
fig = px.pie(iris, 'Species', color_discrete_sequence = ['#491D8B','#7D3AC1','#EB548C'], title = 'data_distribution', template = 'plotly')
fig.show()

# Sepal Length

In [None]:
fig = px.box(data_frame = iris, x = 'Species', y  = 'SepalLengthCm', color = 'Species', color_discrete_sequence=['#29066B','#7D3AC1','#EB548C'], orientation = 'v')
fig.show()

In [None]:
iris.Species.value_counts()

In [None]:
fig = px.histogram(data_frame = iris, x = 'SepalLengthCm', color = 'Species',color_discrete_sequence = ['#491D8B','#7D3AC1','#EB548C'], nbins = 50 )
fig.show()

# Sepal Width

In [None]:
fig = px.box(iris, x = 'SepalWidthCm', color = 'Species', color_discrete_sequence = ['#491D8B','#7D3AC1','#EB548C'], orientation= 'v')
fig.show()

In [None]:
fig = px.histogram(data_frame = iris, x = 'SepalWidthCm', color = 'Species', color_discrete_sequence = ['#491D8B','#7D3AC1','#EB548C'], nbins = 50)
fig.show()

# Petal Length

In [None]:
fig = px.box(iris, x = 'Species', y = 'PetalLengthCm', color = 'Species', color_discrete_sequence = ['#491D8B','#7D3AC1','#EB548C'], orientation= 'v')
fig.show()

In [None]:
fig = px.histogram(iris, x = 'PetalLengthCm', color = 'Species', color_discrete_sequence = ['#491D8B','#7D3AC1','#EB548C'], nbins= 50)
fig.show()

# Petal Width


In [None]:
fig = px.box(iris, x = 'Species', y = 'PetalWidthCm', color = 'Species', color_discrete_sequence = ['#491D8B','#7D3AC1','#EB548C'], orientation= 'v')
fig.show()

In [None]:
fig = px.histogram(data_frame=iris, x='PetalWidthCm',color='Species',color_discrete_sequence=['#491D8B','#7D3AC1','#EB548C'],nbins=30)
fig.show()

### From these plots we conclude that: 

* **Setosa has much smaller PetalWidth than the other 2 classes**

* **This difference is less clear between Virginica and Versicolor**

* **Overall this seems like an  PetalWidth interesting feature**

In [None]:
fig = px.scatter(data_frame=iris, x='SepalLengthCm',y='SepalWidthCm'
           ,color='Species',size='PetalLengthCm',template='seaborn',color_discrete_sequence=['#491D8B','#7D3AC1','#EB548C'],)

fig.update_layout(width=800, height=600,
                  xaxis=dict(color="#BF40BF"),
                 yaxis=dict(color="#BF40BF"))
fig.show()

In [None]:
fig = px.scatter(data_frame=iris, x='PetalLengthCm',y='PetalWidthCm'
           ,color='Species',size='SepalLengthCm',template='seaborn',color_discrete_sequence=['#491D8B','#7D3AC1','#EB548C'],)

fig.update_layout(width=800, height=600,
                  xaxis=dict(color="#BF40BF"),
                 yaxis=dict(color="#BF40BF"))
fig.show()

# KMeans
## Using the elbow method to find the optimal number of clusters for k-means clustering

In [None]:
from sklearn.cluster import KMeans

In [None]:
sse = []
for i in range(1,9):
    kmeans = KMeans(n_clusters=i , max_iter=300)
    kmeans.fit(X) 
    sse.append(kmeans.inertia_)

fig = px.line(y=sse,template="seaborn",title='Eblow Method')
fig.update_layout(width=800, height=600,
title_font_color="#BF40BF", 
xaxis=dict(color="#BF40BF",title="Clusters"), 
yaxis=dict(color="#BF40BF",title="SSE"))

In [None]:
kmeans = KMeans(n_clusters = 3, init = 'k-means++', max_iter = 300, n_init = 10, random_state = 0)
clusters = kmeans.fit_predict(X)

# Evaluation

In [None]:
fig = go.Figure()
fig.add_trace(go.Scatter(
    x=X[clusters == 0, 0], y=X[clusters == 0, 1],
    mode='markers',marker_color='#DB4CB2',name='Iris-setosa'
))

fig.add_trace(go.Scatter(
    x=X[clusters == 1, 0], y=X[clusters == 1, 1],
    mode='markers',marker_color='#c9e9f6',name='Iris-versicolour'
))

fig.add_trace(go.Scatter(
    x=X[clusters == 2, 0], y=X[clusters == 2, 1],
    mode='markers',marker_color='#7D3AC1',name='Iris-virginica'
))

fig.add_trace(go.Scatter(
    x=kmeans.cluster_centers_[:, 0], y= kmeans.cluster_centers_[:,1],
    mode='markers',marker_color='#CAC9CD',marker_symbol=4,marker_size=13,name='Centroids'
))
fig.update_layout(template='plotly_dark',width=1000, height=500,title='Kmean Clustering Results')