# Cluster visualization of the (fuzzy) Iris dataset
This notebook provides 3d visualizations of the Iris dataset.

In [1]:
import plotly.plotly as py
import plotly.graph_objs as go
import pandas as pd

## Data preparation

In [2]:
!ammonite/iris.amm 2&1> /dev/null

The following code imports the CSV file with the fuzzy partitioned dataset generated by the `iris.amm` script.

In [3]:
predictions = pd.read_csv('ammonite/iris_predictions.csv', header=None)
predictions.columns = ['_SepalLength','_SepalWidth','_PetalLength',\
                       '_PetalWidth','Cluster0', 'Cluster1', 'Cluster2', 'Closest']
predictions['Closest'] = predictions['Closest'].map(lambda idx: 'Cluster' + str(int(idx)))

Next, we read the original Iris data provided by Plotly and merge it with our `predictions`.

In [4]:
df = pd.read_csv('https://raw.githubusercontent.com/plotly/datasets/master/iris.csv')
df = pd.concat([df, predictions], axis=1)
df

Unnamed: 0,SepalLength,SepalWidth,PetalLength,PetalWidth,Name,_SepalLength,_SepalWidth,_PetalLength,_PetalWidth,Cluster0,Cluster1,Cluster2,Closest
0,5.1,3.5,1.4,0.2,Iris-setosa,5.1,3.5,1.4,0.2,0.996336,0.001163,0.002501,Cluster0
1,4.9,3.0,1.4,0.2,Iris-setosa,4.9,3.0,1.4,0.2,0.976945,0.007159,0.015896,Cluster0
2,4.7,3.2,1.3,0.2,Iris-setosa,4.7,3.2,1.3,0.2,0.980304,0.006263,0.013433,Cluster0
3,4.6,3.1,1.5,0.2,Iris-setosa,4.6,3.1,1.5,0.2,0.968301,0.009837,0.021862,Cluster0
4,5.0,3.6,1.4,0.2,Iris-setosa,5.0,3.6,1.4,0.2,0.993927,0.001942,0.004131,Cluster0
5,5.4,3.9,1.7,0.4,Iris-setosa,5.4,3.9,1.7,0.4,0.933123,0.021077,0.045800,Cluster0
6,4.6,3.4,1.4,0.3,Iris-setosa,4.6,3.4,1.4,0.3,0.979482,0.006508,0.014010,Cluster0
7,5.0,3.4,1.5,0.2,Iris-setosa,5.0,3.4,1.5,0.2,0.999611,0.000121,0.000268,Cluster0
8,4.4,2.9,1.4,0.2,Iris-setosa,4.4,2.9,1.4,0.2,0.931535,0.021539,0.046926,Cluster0
9,4.9,3.1,1.5,0.1,Iris-setosa,4.9,3.1,1.5,0.1,0.983684,0.005044,0.011272,Cluster0


## Plot configuration
In this section we configure the plot layout and provide a convenience function for plotting the data.

In [5]:
layout = dict(
    width=800,
    height=550,
    autosize=False,
    scene=dict(
        xaxis=dict(
            gridcolor='rgb(255, 255, 255)',
            zerolinecolor='rgb(255, 255, 255)',
            showbackground=True,
            backgroundcolor='rgb(230, 230,230)'
        ),
        yaxis=dict(
            gridcolor='rgb(255, 255, 255)',
            zerolinecolor='rgb(255, 255, 255)',
            showbackground=True,
            backgroundcolor='rgb(230, 230,230)'
        ),
        zaxis=dict(
            gridcolor='rgb(255, 255, 255)',
            zerolinecolor='rgb(255, 255, 255)',
            showbackground=True,
            backgroundcolor='rgb(230, 230,230)'
        ),
        aspectratio = dict( x=1, y=1, z=0.7 ),
        aspectmode = 'manual'
    ),
)

In [6]:
def plot(df, column_name, plot_name):
    colors = ['rgb(228,26,28)','rgb(55,126,184)','rgb(77,175,74)']
    data = []
    for i in range(len(df[column_name].unique())):
        name = df[column_name].unique()[i]
        color = colors[i]
        x = df[df[column_name] == name]['SepalLength']
        y = df[df[column_name] == name]['SepalWidth']
        z = df[df[column_name] == name]['PetalLength']

        trace = dict(
            name = name,
            x = x, y = y, z = z,
            type = "scatter3d",
            mode = 'markers',
            marker = dict(size=3, color=color, line=dict(width=0)),
            text = \
                'Cluster0: ' + \
                df[df[column_name] == name]['Cluster0'].astype('str') + \
                '<br>Cluster1: ' + \
                df[df[column_name] == name]['Cluster1'].astype('str') + \
                '<br>Cluster2: ' + \
                df[df[column_name] == name]['Cluster2'].astype('str'),
            textposition = "top"
        )
        data.append(trace)

        cluster = dict(
            color = color,
            opacity = 0.3,
            type = "mesh3d",
            x = x, y = y, z = z
        )
        data.append(cluster)
    fig = dict(data=data, layout=layout)
    return py.iplot(fig, filename=plot_name, validate=False)

## The hard partitioned clusters

In [7]:
plot(df, 'Name', 'iris-hard-partitioned-clusters')

## The fuzzy partitioned clusters

In [8]:
plot(df, 'Closest', 'iris-fuzzy-partitioned-clusters')