# Author : Subas Gupta

## Task 2 : Prediction using Unsupervised ML

In [1]:
# importing the required libraries
import pandas as pd
import numpy as np
import seaborn as sns
import matplotlib.pyplot as plt

In [2]:
# loading our dataset using pandas
df = pd.read_csv('Iris.csv')

In [3]:
df.shape

(150, 6)

In [4]:
df.info()

<class 'pandas.core.frame.DataFrame'>
RangeIndex: 150 entries, 0 to 149
Data columns (total 6 columns):
 #   Column         Non-Null Count  Dtype  
---  ------         --------------  -----  
 0   Id             150 non-null    int64  
 1   SepalLengthCm  150 non-null    float64
 2   SepalWidthCm   150 non-null    float64
 3   PetalLengthCm  150 non-null    float64
 4   PetalWidthCm   150 non-null    float64
 5   Species        150 non-null    object 
dtypes: float64(4), int64(1), object(1)
memory usage: 7.2+ KB


In [5]:
# Visualising the dataset
import plotly.express as px
import plotly.offline as py
py.init_notebook_mode(connected=True)
import plotly.graph_objs as go
from plotly.figure_factory import create_table

In [6]:
table = create_table(df.head())
py.iplot(table)

In [7]:
fig = px.scatter_matrix(df,
    dimensions=["SepalLengthCm", "SepalWidthCm", "PetalLengthCm", "PetalWidthCm"],
    color="Species")
fig.show()

In [8]:
import plotly.graph_objects as go

PLOT = go.Figure()


for C in list(df['Species'].unique()):
    
    PLOT.add_trace(go.Scatter3d(x = df[df['Species'] == C]['SepalLengthCm'],
                                y =df[df['Species'] == C]['SepalWidthCm'],
                                z = df[df['Species'] == C]['PetalLengthCm'],
                                mode = 'markers', marker_size = 8, marker_line_width = 1,
                                name = 'Cluster ' + str(C)))


PLOT.update_layout(width = 1000, height = 1000, autosize = True, showlegend = True,
                   scene = dict(xaxis=dict(title = 'SepalLengthCm', titlefont_color = 'black'),
                                yaxis=dict(title = 'SepalWidthCm',  titlefont_color = 'black'),
                                zaxis=dict(title = 'PetalLengthCm', titlefont_color = 'black')),
                   font = dict(family = "Gilroy", color  = 'black', size = 12))

In [None]:
# since our task is to cluster, Id column is not necessary while clustering so we can drop it.

In [9]:
!pip3 install chart_studio



In [10]:
X = df.iloc[:,[1, 2, 3, 4]].values

In [11]:
X

array([[5.1, 3.5, 1.4, 0.2],
       [4.9, 3. , 1.4, 0.2],
       [4.7, 3.2, 1.3, 0.2],
       [4.6, 3.1, 1.5, 0.2],
       [5. , 3.6, 1.4, 0.2],
       [5.4, 3.9, 1.7, 0.4],
       [4.6, 3.4, 1.4, 0.3],
       [5. , 3.4, 1.5, 0.2],
       [4.4, 2.9, 1.4, 0.2],
       [4.9, 3.1, 1.5, 0.1],
       [5.4, 3.7, 1.5, 0.2],
       [4.8, 3.4, 1.6, 0.2],
       [4.8, 3. , 1.4, 0.1],
       [4.3, 3. , 1.1, 0.1],
       [5.8, 4. , 1.2, 0.2],
       [5.7, 4.4, 1.5, 0.4],
       [5.4, 3.9, 1.3, 0.4],
       [5.1, 3.5, 1.4, 0.3],
       [5.7, 3.8, 1.7, 0.3],
       [5.1, 3.8, 1.5, 0.3],
       [5.4, 3.4, 1.7, 0.2],
       [5.1, 3.7, 1.5, 0.4],
       [4.6, 3.6, 1. , 0.2],
       [5.1, 3.3, 1.7, 0.5],
       [4.8, 3.4, 1.9, 0.2],
       [5. , 3. , 1.6, 0.2],
       [5. , 3.4, 1.6, 0.4],
       [5.2, 3.5, 1.5, 0.2],
       [5.2, 3.4, 1.4, 0.2],
       [4.7, 3.2, 1.6, 0.2],
       [4.8, 3.1, 1.6, 0.2],
       [5.4, 3.4, 1.5, 0.4],
       [5.2, 4.1, 1.5, 0.1],
       [5.5, 4.2, 1.4, 0.2],
       [4.9, 3

In [12]:
from sklearn.cluster import KMeans
wcss = []

for i in range(1, 11):
    kmeans = KMeans(n_clusters = i, init = 'k-means++', 
                    max_iter = 300, n_init = 10, random_state = 0)
    kmeans.fit(X)
    wcss.append(kmeans.inertia_)

In [13]:

px.line(x = range(1,11), 
        y= wcss,
       title="The Elbow Method",
       labels={'x': "Number of clusters",'y':"WCSS"})


In [14]:
# Applying kmeans to the dataset / Creating the kmeans classifier
kmeans = KMeans(n_clusters = 3, init = 'k-means++',
                max_iter = 300, n_init = 10, random_state = 0)
y_kmeans = kmeans.fit_predict(X)

In [15]:
y_kmeans

array([1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1,
       1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1,
       1, 1, 1, 1, 1, 1, 2, 2, 0, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2,
       2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 0, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2,
       2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 0, 2, 0, 0, 0, 0, 2, 0, 0, 0,
       0, 0, 0, 2, 2, 0, 0, 0, 0, 2, 0, 2, 0, 2, 0, 0, 2, 2, 0, 0, 0, 0,
       0, 2, 0, 0, 0, 0, 2, 0, 0, 0, 2, 0, 0, 0, 2, 0, 0, 2], dtype=int32)

In [16]:
# Represent neighborhoods as in previous bubble chart, adding cluster information under color.
trace0 = go.Scatter(x=X[y_kmeans == 0, 0],
                     y = X[y_kmeans == 0, 1],
                    
                     mode='markers',
                     marker=go.Marker(sizemode='diameter',
                                       opacity=0.9,
                                     size = 12,
                                     color = 'red'),
                     name='Iris-setosa',
                     showlegend=True
)

trace1 = go.Scatter(x=X[y_kmeans == 1, 0],
                     y = X[y_kmeans == 1, 1],
                     mode='markers',
                     marker=go.Marker(sizemode='diameter',
                                       opacity=0.9,
                                     size = 12,
                                     color = 'blue'),
                    name='Iris-versicolour',
                     showlegend=True
)
trace2 = go.Scatter(x=X[y_kmeans == 2, 0],
                     y = X[y_kmeans == 2, 1],
                     mode='markers',
                     marker=go.Marker(sizemode='diameter',
                                       opacity=0.9,
                                     size = 12,
                                     color = 'green'),
                    name='Iris-virginica',
                     showlegend=True
)


plotly.graph_objs.Marker is deprecated.
Please replace it with one of the following more specific types
  - plotly.graph_objs.scatter.Marker
  - plotly.graph_objs.histogram.selected.Marker
  - etc.




In [17]:
# Represent cluster centers.
trace3 = go.Scatter(x=kmeans.cluster_centers_[:, 0],
                     y=kmeans.cluster_centers_[:,1],
                     name='Centroid',
                     mode='markers',
                     marker=go.Marker(symbol='circle',
                                       size=20,
                                      color='yellow'),
                     showlegend=True)
data1 = go.Data([trace0, trace1,trace2,trace3])

fig1 = go.Figure(data=data1)


plotly.graph_objs.Data is deprecated.
Please replace it with a list or tuple of instances of the following types
  - plotly.graph_objs.Scatter
  - plotly.graph_objs.Bar
  - plotly.graph_objs.Area
  - plotly.graph_objs.Histogram
  - etc.




In [18]:
py.iplot(fig1, filename='baltimore-2dim')