In [0]:
import pandas as pd
import numpy as np

##Plotly Configuration

In [0]:
def dist_color_gen(totPart, curPart):
    ''' Return RGBvale as String - needed by Scatter3d:marker in plotly
    '''
    import colorsys
    inc = 1/totPart
    rgba = colorsys.hsv_to_rgb(inc*curPart, 0.5, 0.4) + (0.8,)
    return "rgba" + str(rgba)


def configure_plotly_browser_state():
    import IPython
    display(IPython.core.display.HTML('''
        <script src="/static/components/requirejs/require.js"></script>
        <script>
          requirejs.config({
            paths: {
              base: '/static/base',
              plotly: 'https://cdn.plot.ly/plotly-1.5.1.min.js?noext',
            },
          });
        </script>
        '''))
    
configure_plotly_browser_state()



import plotly as py
import plotly.graph_objs as go
from plotly.offline import download_plotlyjs, init_notebook_mode, plot, iplot

#Instructions for building the 3-D plot
init_notebook_mode(connected=False)



# Reading and Preprocessing the Embeddings
Using Pandas datastructures

In [0]:
from sklearn.preprocessing import normalize, StandardScaler

# CSV assumed to not have the header
# df = pd.read_csv("/content/mini_auto-7lands.csv")
df = pd.read_csv("/content/midnet-resnet-7lands.csv")

df = df.drop(columns="TileID")
# Scaling the data to bring all the attributes to a comparable level
scaler = StandardScaler()
df_scaled = scaler.fit_transform(df)
# Normalizing the data so that the data, approximately follows a Gaussian distribution
df_normalized = normalize(df_scaled)
df_normalized = pd.DataFrame(df_normalized)

# Clustering the Embedding


## OPTICS clustering
using scikit-learn

In [0]:
from sklearn.cluster import OPTICS, cluster_optics_dbscan

optics_model = OPTICS(min_samples = 10, xi = 0.05, min_cluster_size = 0.0001)
optics_model.fit(df_normalized)

opticCluster =[]
opticCluster = optics_model.fit_predict(df_normalized)
df_normalized["Cluster"] = opticCluster

In [0]:
df_normalized

## t-SNE dimension reduction of clustered data
using scikit-learn

In [0]:
from sklearn.manifold import TSNE

perplexity = 40
plotX = pd.DataFrame(df_normalized)
plotX.columns = df_normalized.columns

tsne_3d = TSNE(n_components=3, perplexity=perplexity)
TCs_3d = pd.DataFrame(tsne_3d.fit_transform(plotX.drop(["Cluster"], axis=1)))
TCs_3d.columns = ["TC1_3d","TC2_3d","TC3_3d"]

tsne_2d = TSNE(n_components=2, perplexity=perplexity)
TCs_2d = pd.DataFrame(tsne_2d.fit_transform(plotX.drop(["Cluster"], axis=1)))
TCs_2d.columns = ["TC1_2d","TC2_2d"]

plotX = pd.concat([plotX,TCs_3d,TCs_2d], axis=1, join='inner')

cluster = []
clustID_dict ={}
for i, c in enumerate( sorted(plotX['Cluster'].unique()) ):
    cluster.append( plotX[plotX["Cluster"] == c] )
    clustID_dict[i] = str(c)

## Plot t-SNE
using plotly

### 3d plot

In [0]:
configure_plotly_browser_state()

trace3d = []
for ci in range(len(cluster)):
    trace3d.append( go.Scatter3d(
                    x = cluster[ci]["TC1_3d"],
                    y = cluster[ci]["TC2_3d"],
                    z = cluster[ci]["TC3_3d"],
                    mode = "markers",
                    name = "Cluster"+ clustID_dict[ci] ,
                    marker = dict(color = dist_color_gen(len(cluster), ci)),
                    text = None)
                )


title = "Visualizing Clusters in THREE Dimensions Using T-SNE (perplexity=" + str(perplexity) + ")"

layout = dict(title = title,
              xaxis= dict(title= 'TC1',ticklen= 5,zeroline= False),
              yaxis= dict(title= 'TC2',ticklen= 5,zeroline= False)
             )

fig = dict(data = trace3d, layout = layout)

iplot(fig)

In [0]:
configure_plotly_browser_state()

trace2d = []
for ci in range(len(cluster)):
    trace2d.append( go.Scatter(
                    x = cluster[ci]["TC1_2d"],
                    y = cluster[ci]["TC2_2d"],
                    mode = "markers",
                    name = "Cluster"+ clustID_dict[ci] ,
                    marker = dict(color = dist_color_gen(len(cluster), ci)),
                    text = None)
                )


title = "Visualizing Clusters in TWO Dimensions Using T-SNE (perplexity=" + str(perplexity) + ")"

layout = dict(title = title,
              xaxis= dict(title= 'TC1',ticklen= 5,zeroline= False),
              yaxis= dict(title= 'TC2',ticklen= 5,zeroline= False)
             )

fig = dict(data = trace2d, layout = layout)

iplot(fig)