# Rendering Pokémon Data in 3D With Plotly

by Max Woolf (@minimaxir)

*This notebook is licensed under the MIT License. If you use the code or data visualization designs contained within this notebook, it would be greatly appreciated if proper attribution is given back to this notebook and/or myself. Thanks! :)*

In [3]:
import pandas as pd

df = pd.read_csv('pokemon_pca.csv')
print df[0:10]

   id                                       pca_features
0   1  [-1.23288451991,1.25751301158,-0.154036296149,...
1   2  [-1.25876948186,1.20488144835,-0.0856727220402...
2   3  [-1.86888535992,1.10924763187,0.233794649552,-...
3   4  [-5.5015619452,1.63057365516,1.16733920449,-2....
4   5  [-5.36193529818,1.750917732,1.16426992864,-2.6...
5   6  [-5.73002790529,1.6082049465,1.48118672493,-2....
6   7  [-5.37877428895,1.59894486609,-3.62901043204,-...
7   8  [-5.31107974746,1.57686776226,-3.24631491838,-...
8   9  [-6.27443209163,1.57820192626,-2.7857660322,0....
9  10  [0.166795762526,-0.134710812065,0.033508421016...

[10 rows x 2 columns]


Split the 50 PCA features into separate columns

In [4]:
# http://stackoverflow.com/a/29370709
# [map(lambda x: 'pc' + str(x) ,xrange(50))]
#zip(*df['pca_features'].map(lambda x: x.strip('[]').split(',')))

from pandas import *

df_pca = df['pca_features'].str[1:-1].str.split(',').apply(pd.Series).astype(float)

#df_pca['id'] = df['id']

#print df['pca_features'].str[1:-1].str.split(',').apply(pd.Series).astype(float)

print df_pca[0:10]

         0         1         2         3         4         5         6   \
0 -1.232885  1.257513 -0.154036 -4.462953  2.949458 -0.061759 -1.821282   
1 -1.258769  1.204881 -0.085673 -4.173804  2.705234  0.026046 -1.802777   
2 -1.868885  1.109248  0.233795 -3.873805  2.857019  0.704600 -2.314641   
3 -5.501562  1.630574  1.167339 -2.732808 -1.072408  0.795293 -0.984745   
4 -5.361935  1.750918  1.164270 -2.683356 -0.862958  0.575582 -0.910651   
5 -5.730028  1.608205  1.481187 -2.952475 -2.066053  1.758801 -2.425852   
6 -5.378774  1.598945 -3.629010 -0.296878  0.657976 -1.027818 -1.448678   
7 -5.311080  1.576868 -3.246315 -0.424035  0.625853 -0.880972 -1.390879   
8 -6.274432  1.578202 -2.785766  0.340570  0.726097  0.069230 -2.196737   
9  0.166796 -0.134711  0.033508 -0.320916  0.125463  0.100083  0.033249   

         7         8         9         10        11        12        13  \
0  1.412679 -3.568611  0.597976  0.109155  0.942337  1.429673 -0.622190   
1  1.305795 -3.386331  0

In [5]:
# https://github.com/danielfrg/tsne

from tsne import bh_sne
pca_3d = bh_sne(df_pca.values, d=3, perplexity=5., theta=0.0)   # completes in ~10 seconds

print pca_3d

[[  9.06698516 -58.91983405  56.96302973]
 [  9.34021358 -58.54332754  56.83858773]
 [ 10.27025974 -58.98115704  56.33810829]
 ..., 
 [  8.48132973   3.557443    56.01212604]
 [ -2.29944518  58.67083507  31.62355597]
 [ 48.23427541 -16.31896487  13.59286847]]


Prototype 3D chart in `matplotlib`.

In [6]:
# http://matplotlib.org/examples/mplot3d/scatter3d_demo.html

from mpl_toolkits.mplot3d import Axes3D
import matplotlib.pyplot as plt

fig = plt.figure()
ax = fig.add_subplot(111, projection='3d')

ax.scatter(pca_3d[:,0], pca_3d[:,1], pca_3d[:,2])

ax.set_xlabel('X Label')
ax.set_ylabel('Y Label')
ax.set_zlabel('Z Label')

fig.savefig("pokemon_pca_test.png")

![](pokemon_pca_test.png)

## Plotly

Based on the official [3D Clustering](https://plot.ly/python/3d-point-clustering/) tutorial.

In [151]:
df_3d = DataFrame(pca_3d)
df_3d.columns = ['x','y','z']

print df_3d.head(11)

            x          y          z
0    9.066985 -58.919834  56.963030
1    9.340214 -58.543328  56.838588
2   10.270260 -58.981157  56.338108
3   54.954869  -0.859728  46.132409
4   54.806082  -0.826325  46.299707
5   55.869840  -0.123644  46.561305
6    5.320937  -1.948512 -73.548244
7    5.641952  -2.042390 -73.338820
8    6.575312  -2.306488 -72.760295
9  -71.424219  17.908584  45.587366
10 -67.754186  17.425071  46.059058

[11 rows x 3 columns]


In [152]:
df_pokemon = pd.read_csv("pokemon_data/pokemon.csv")
df_pokemon = df_pokemon[(df_pokemon.id < 10000)]
df_pokemon['identifier'] = df_pokemon['identifier'].str.title()

df_types = pd.read_csv("pokemon_data/pokemon_types.csv")
df_types.columns = ['id', 'type_id', 'slot']
df_types = df_types[(df_types.slot == 1)]

df_type_names = pd.read_csv("pokemon_data/types.csv")
df_type_names.columns = ['type_id','type','generation_id','damage_class_id']

#print df_types.head()
#print df_type_names.head()

df_types = df_types.merge(df_type_names, on='type_id')
df_pokemon = df_pokemon.merge(df_types, on='id')
df_plot = concat([df_pokemon, df_3d], axis=1)
df_plot = df_plot[['id', 'identifier', 'type', 'x', 'y', 'z']]

print df_plot.head(12)

    id  identifier   type          x          y          z
0    1   Bulbasaur  grass   9.066985 -58.919834  56.963030
1    2     Ivysaur  grass   9.340214 -58.543328  56.838588
2    3    Venusaur  grass  10.270260 -58.981157  56.338108
3    4  Charmander   fire  54.954869  -0.859728  46.132409
4    5  Charmeleon   fire  54.806082  -0.826325  46.299707
5    6   Charizard   fire  55.869840  -0.123644  46.561305
6    7    Squirtle  water   5.320937  -1.948512 -73.548244
7    8   Wartortle  water   5.641952  -2.042390 -73.338820
8    9   Blastoise  water   6.575312  -2.306488 -72.760295
9   10    Caterpie    bug -71.424219  17.908584  45.587366
10  11     Metapod    bug -67.754186  17.425071  46.059058
11  12  Butterfree    bug -28.071288 -50.971519  -8.339898

[12 rows x 6 columns]



A value is trying to be set on a copy of a slice from a DataFrame.
Try using .loc[row_index,col_indexer] = value instead



Add a color palette manually for each type. Colors derived from palette used on [Bulbapedia](http://bulbapedia.bulbagarden.net/wiki/Type).

In [154]:
df_palette = DataFrame([
            ['normal', '#A8A878'],
            ['fire', '#F08030'],
            ['fighting','#C03028'],
            ['water','#6890F0'],
            ['flying','#A890F0'],
            ['grass','#78C850'],
            ['poison', '#A040A0'],
            ['electric', '#F8D030'],
            ['ground', '#E0C068'],
            ['psychic', '#F85888'],
            ['rock', '#B8A038'],
            ['ice', '#98D8D8'],
            ['bug', '#A8B820'],
            ['dragon', '#7038F8'],
            ['ghost', '#705898'],
            ['dark', '#705848'],
            ['steel', '#B8B8D0'],
            ['fairy', '#EE99AC']
        ])

print df_palette

df_palette.columns = ['type', 'typecolor']
df_plot = df_plot.merge(df_palette, on='type').sort('id')

print df_plot.head(12)

           0        1
0     normal  #A8A878
1       fire  #F08030
2   fighting  #C03028
3      water  #6890F0
4     flying  #A890F0
5      grass  #78C850
6     poison  #A040A0
7   electric  #F8D030
8     ground  #E0C068
9    psychic  #F85888
10      rock  #B8A038
11       ice  #98D8D8
12       bug  #A8B820
13    dragon  #7038F8
14     ghost  #705898
15      dark  #705848
16     steel  #B8B8D0
17     fairy  #EE99AC

[18 rows x 2 columns]
     id  identifier   type          x          y          z typecolor
0     1   Bulbasaur  grass   9.066985 -58.919834  56.963030   #78C850
1     2     Ivysaur  grass   9.340214 -58.543328  56.838588   #78C850
2     3    Venusaur  grass  10.270260 -58.981157  56.338108   #78C850
66    4  Charmander   fire  54.954869  -0.859728  46.132409   #F08030
67    5  Charmeleon   fire  54.806082  -0.826325  46.299707   #F08030
68    6   Charizard   fire  55.869840  -0.123644  46.561305   #F08030
113   7    Squirtle  water   5.320937  -1.948512 -73.548244   #6890F0

Special case: Normal/Flying Pokemon should be classified as Flying Pokemon, as the omission is notable.

In [155]:
df_normfly = pd.read_csv("pokemon_data/pokemon_types.csv")
df_normfly = df_normfly[(df_normfly.pokemon_id < 10000)]
normal1 = set(df_normfly.query("type_id == 1 & slot == 1")['pokemon_id'])
flying2 = set(df_normfly.query("type_id == 3 & slot == 2")['pokemon_id'])
              
normfly = normal1.intersection(flying2)
              
print normfly

for pokemon_id in normfly:
    df_plot.loc[df_plot.id == pokemon_id, 'type'] = 'flying'

print df_plot.head(20)

set([519, 520, 521, 396, 397, 398, 16, 17, 18, 276, 277, 22, 163, 164, 441, 333, 83, 84, 85, 627, 628, 21, 661])
     id  identifier    type          x          y          z typecolor
0     1   Bulbasaur   grass   9.066985 -58.919834  56.963030   #78C850
1     2     Ivysaur   grass   9.340214 -58.543328  56.838588   #78C850
2     3    Venusaur   grass  10.270260 -58.981157  56.338108   #78C850
66    4  Charmander    fire  54.954869  -0.859728  46.132409   #F08030
67    5  Charmeleon    fire  54.806082  -0.826325  46.299707   #F08030
68    6   Charizard    fire  55.869840  -0.123644  46.561305   #F08030
113   7    Squirtle   water   5.320937  -1.948512 -73.548244   #6890F0
114   8   Wartortle   water   5.641952  -2.042390 -73.338820   #6890F0
115   9   Blastoise   water   6.575312  -2.306488 -72.760295   #6890F0
218  10    Caterpie     bug -71.424219  17.908584  45.587366   #A8B820
219  11     Metapod     bug -67.754186  17.425071  46.059058   #A8B820
220  12  Butterfree     bug -28.071

In [156]:
# Checkpoint, for safety
df_plot.to_csv('pokemon_pca_plot.csv', index=False)

In [157]:
import plotly.plotly as py
import plotly.graph_objs as go
from plotly.offline import plot

scatter = dict(
    mode = "markers",
    name = "y",
    type = "scatter3d",
    text = df_plot['identifier'],
    hoverinfo='text',
    #color=df_plot['typecolor'],
    showlegend = True,
    legendgroup = df_plot['typecolor'],
    x = df_plot['x'], y = df_plot['y'], z = df_plot['z'],
    marker = dict( size=10, color=df_plot['typecolor'], 
                 line = dict(
                    width = 3,
                    
        ))
)

empty_axis = dict(zeroline=False, showaxeslabels=False, showticklabels=False, title='')

clusters = dict(
    alphahull = 10,
    name = "y",
    opacity = 0.1,
    color = "#1a1a1a",
    type = "mesh3d",
    hoverinfo='none',
    x = df_plot['x'], y = df_plot['y'], z = df_plot['z']
)
layout = dict(
    #title = 
    scene = dict(
        xaxis = empty_axis,
        yaxis = empty_axis,
        zaxis = empty_axis
    ),
       margin = dict(     
        l=0,
        r=0,
        t=0,
        b=0
        ),
    height=600,
    showlegend=True
)

#fig = dict( data=[scatter, clusters], layout=layout )
fig = dict( data=[scatter], layout=layout )

# Use py.iplot() for IPython notebook
plot(fig, filename = 'pokemon-3d-1.html', auto_open=False)

'file:///Users/maxwoolf/Dropbox/Spark/pokemon-3d-1.html'

The correct way to implement the plot is to render each `trace` separately. This fixes bot the font color issue and the legend issue.

In [158]:
plots_list = []

for idx, (pokemon_type, color) in df_palette.iterrows():
    #print pokemon_type

    df_filter = df_plot[df_plot['type'].str.contains(pokemon_type)]
    
    scatter = dict(
        mode = "markers",
        name = pokemon_type.title(),
        type = "scatter3d",
        text =  df_filter['identifier'],
        hoverinfo='text',
        #color=df_plot['typecolor'],
        showlegend = True,
        #legendgroup = pokemon_type.title(),
        x =  df_filter['x'], y =  df_filter['y'], z = df_filter['z'],
        marker = dict( size=10, color=color, 
                     line = dict(
                        width = 3,

            ))
    )
    
    plots_list.append(scatter)
    
#    clusters = dict(
#    alphahull = 5,
#    name = pokemon_type.title(),
#    opacity = 0.1,
#    color = color,
#    type = "mesh3d",
#    hoverinfo='none',
#    legendgroup = pokemon_type.title(),
#    x =  df_filter['x'], y =  df_filter['y'], z = df_filter['z'],
#        )
#    
#    plots_list.append(clusters)

layout = dict(
    #title = 
    scene = dict(
        xaxis = empty_axis,
        yaxis = empty_axis,
        zaxis = empty_axis
    ),
       margin = dict(     
        l=0,
        r=0,
        t=0,
        b=0
        ),
    font=dict(
        family='Source Sans Pro, Arial, sans-serif',
    ),
    height=600,
    showlegend=True
)

fig = dict( data=plots_list, layout=layout )

# Use py.iplot() for IPython notebook
plot(fig, filename = 'pokemon-3d-2.html', auto_open=False, show_link=False)

'file:///Users/maxwoolf/Dropbox/Spark/pokemon-3d-2.html'

Use [hack from Stack Overflow](http://stackoverflow.com/a/36610966) to remove plot.ly buttons.

In [159]:
HTMLlink = 'pokemon-3d-2.html'

with open(HTMLlink, 'r') as file:
    tempHTML = file.read()
# Replace the target strings
tempHTML = tempHTML.replace('displaylogo:!0', 'displaylogo:!1')
tempHTML = tempHTML.replace('modeBarButtonsToRemove:[]', 'modeBarButtonsToRemove:["sendDataToCloud"]')
with open(HTMLlink, 'w') as file:
    file.write(tempHTML)
del tempHTML

Final plots for real. Need 3:

* Plot with light-blue background for post, not standalone
* Plot with fill screen, standalone
* Plot w/ clusters standalone

In [160]:
plots_list = []

for idx, (pokemon_type, color) in df_palette.iterrows():
    #print pokemon_type

    df_filter = df_plot[df_plot['type'].str.contains(pokemon_type)]
    
    scatter = dict(
        mode = "markers",
        name = pokemon_type.title(),
        type = "scatter3d",
        text =  df_filter['identifier'],
        hoverinfo='text',
        #color=df_plot['typecolor'],
        showlegend = True,
        #legendgroup = pokemon_type.title(),
        x =  df_filter['x'], y =  df_filter['y'], z = df_filter['z'],
        marker = dict( size=10, color=color, 
                     line = dict(
                        width = 3,

            ))
    )
    
    plots_list.append(scatter)
    

layout_web = dict(
    #title = 
    scene = dict(
        xaxis = empty_axis,
        yaxis = empty_axis,
        zaxis = empty_axis,
        camera = dict(eye=dict(x=0.75, y=0.75, z=0.75))
    ),
       margin = dict(     
        l=0,
        r=0,
        t=0,
        b=0
        ),
    font=dict(
        family='Source Sans Pro, Arial, sans-serif',
    ),
    
    height=400,
    showlegend=True,
    paper_bgcolor='#f7f8fa',
    plot_bgcolor='#f7f8fa'
)

layout_standalone = dict(
    #title = 
    scene = dict(
        xaxis = empty_axis,
        yaxis = empty_axis,
        zaxis = empty_axis
    ),
       margin = dict(     
        l=0,
        r=0,
        t=0,
        b=0
        ),
    font=dict(
        family='Source Sans Pro, Arial, sans-serif',
    ),
    showlegend=True
)



fig = dict( data=plots_list, layout=layout_web)
plot(fig, filename = 'pokemon-3d-web.html', auto_open=False, show_link=False, include_plotlyjs=False)

fig = dict( data=plots_list, layout=layout_standalone)
plot(fig, filename = 'pokemon-3d-standalone.html', auto_open=False, show_link=False)

'file:///Users/maxwoolf/Dropbox/Spark/pokemon-3d-standalone.html'

In [161]:
plots_list = []

for idx, (pokemon_type, color) in df_palette.iterrows():
    #print pokemon_type

    df_filter = df_plot[df_plot['type'].str.contains(pokemon_type)]
    
    scatter = dict(
        mode = "markers",
        name = pokemon_type.title(),
        type = "scatter3d",
        text =  df_filter['identifier'],
        hoverinfo='text',
        #color=df_plot['typecolor'],
        showlegend = True,
        #legendgroup = pokemon_type.title(),
        x =  df_filter['x'], y =  df_filter['y'], z = df_filter['z'],
        marker = dict( size=10, color=color, 
                     line = dict(
                        width = 3,

            ))
    )
    
    plots_list.append(scatter)
    
    clusters = dict(
    alphahull = 5,
    name = pokemon_type.title(),
    opacity = 0.1,
    color = color,
    type = "mesh3d",
    hoverinfo='none',
    legendgroup = pokemon_type.title(),
    x =  df_filter['x'], y =  df_filter['y'], z = df_filter['z'],
        )
    
    plots_list.append(clusters)

fig = dict( data=plots_list, layout=layout_standalone)
plot(fig, filename = 'pokemon-3d-standalone-cluster.html', auto_open=False, show_link=False)

'file:///Users/maxwoolf/Dropbox/Spark/pokemon-3d-standalone-cluster.html'

In [150]:
for HTMLlink in ['pokemon-3d-web.html', 'pokemon-3d-standalone.html', 'pokemon-3d-standalone-cluster.html']:
    with open(HTMLlink, 'r') as file:
        tempHTML = file.read()
    # Replace the target strings
    tempHTML = tempHTML.replace('displaylogo:!0', 'displaylogo:!1')
    tempHTML = tempHTML.replace('modeBarButtonsToRemove:[]', 'modeBarButtonsToRemove:["sendDataToCloud"]')
    with open(HTMLlink, 'w') as file:
        file.write(tempHTML)
    del tempHTML

# The MIT License (MIT)

Copyright (c) 2016 Max Woolf

Permission is hereby granted, free of charge, to any person obtaining a copy of this software and associated documentation files (the "Software"), to deal in the Software without restriction, including without limitation the rights to use, copy, modify, merge, publish, distribute, sublicense, and/or sell copies of the Software, and to permit persons to whom the Software is furnished to do so, subject to the following conditions:

The above copyright notice and this permission notice shall be included in all copies or substantial portions of the Software.

THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE.