This notebook creates a Sankey chart explaining our data set usage for the project.  It does not currently render in Github.  To view the interactive Sankey charts, you can download the notebook and run in your local environment or view in NBviewer here: https://nbviewer.jupyter.org/github/DataCircles/traffic_collisions_viz_team/blob/master/notebooks/JD-Intro_Data_Sankey.ipynb

The HTML link is also provided below.


In [1]:
import plotly.graph_objects as go
import pandas as pd
import numpy as np
import webcolors
from webcolors import hex_to_rgb
%matplotlib inline

from plotly.offline import download_plotlyjs, init_notebook_mode, plot, iplot

# For Notebooks
init_notebook_mode(connected=True)

In [2]:
#defining nodes
node_label = ["collision_orig", "location_xy", "Census_Tracts_2010", "location_neighborhood","2018_Traffic_Flow", "Used", "Feature Engineering",
              "Not Used", "Separate", "Final Master"]
node_dict = {y:x for x, y in enumerate(node_label)}
node_dict
# {'collision_orig': 0, 'location_xy': 1, 'Census_Tracts_2010':2, 'location_neighborhood': 3, '2018_Traffic_Flow' : 4,
#'Used':5, 'Feature Engineering':6, 'Not Used': 7, Separate': 8, 'Final Master': 9}

{'collision_orig': 0,
 'location_xy': 1,
 'Census_Tracts_2010': 2,
 'location_neighborhood': 3,
 '2018_Traffic_Flow': 4,
 'Used': 5,
 'Feature Engineering': 6,
 'Not Used': 7,
 'Separate': 8,
 'Final Master': 9}

In [3]:
#defining paths
source = ['collision_orig',
          'collision_orig',
          'collision_orig',
          'location_xy',
          'location_xy',
          'Census_Tracts_2010',
          'Census_Tracts_2010',
          'location_neighborhood',
          'location_neighborhood',
          '2018_Traffic_Flow',
          '2018_Traffic_Flow', 
          'Used', 
          'Feature Engineering']
target = ['Used',
          'Feature Engineering',
          'Not Used',
          'Used',
          'Not Used',
          'Used',
          'Not Used', 
          'Feature Engineering',
          'Not Used',
          'Not Used',
          'Separate', 
          'Final Master', 
          'Final Master'] 
values = [19, 4, 21, 2, 2, 1, 10, 1, 1, 12, 2, 22, 5]

In [4]:
#connecting sources and targets
source_node = [node_dict[x] for x in source]
target_node = [node_dict[x] for x in target]
# [0, 0, 0, 1, 1, 2, 2, 3, 3, 4, 4, 5, 6]
# [5, 6, 7, 5, 7, 5, 7, 6, 7, 7, 8, 9, 9]

In [5]:
#creating basic figure
fig = go.Figure( 
    data=[go.Sankey( # The plot we are interest
        # This part is for the node information
        node = dict( 
            label = node_label
        ),
        # This part is for the link information
        link = dict(
            source = source_node,
            target = target_node,
            value = values
        ))])

# With this save the plots 
plot(fig,
     image_filename='sankey_plot_1', 
     image='png', 
     image_width=1000, 
     image_height=600
)
# And shows the plot
fig.show()

In [6]:
#customizing node color - in order of node labels, NOT how it ends up showing in chart
node_color = ['#018571','#35978F', '#5AB4AC','#80CDC1','#BF812D', '#01665E', '#01665E', '#8c8c8c', '#A6611A',  '#003c30']


fig = go.Figure(
    data=[go.Sankey(
        node = dict(
            label = node_label,
            color = node_color
        ),
        link = dict(
            source = source_node,
            target = target_node,
            value = values
        ))])
plot(fig,
     image_filename='sankey_plot_2', 
     image='png', 
     image_width=1000, 
     image_height=600
)
fig.show()

In [7]:
#customizing path colors - in order of targets below in green
node_label_color = {x:y for x, y in zip(node_label, node_color)}
link_color = [node_label_color[x] for x in target]

link_color = ['rgba({},{},{}, 0.4)'.format(
    hex_to_rgb(x)[0],
    hex_to_rgb(x)[1],
    hex_to_rgb(x)[2]) for x in link_color] 
link_color


#['rgba(1,102,94, 0.4)',
# 'rgba(1,102,94, 0.4)',
# 'rgba(140,140,140, 0.4)',
# 'rgba(1,102,94, 0.4)',
# 'rgba(140,140,140, 0.4)',
# 'rgba(1,102,94, 0.4)',
# 'rgba(140,140,140, 0.4)',
# 'rgba(1,102,94, 0.4)',
# 'rgba(140,140,140, 0.4)',
# 'rgba(140,140,140, 0.4)',
# 'rgba(166,97,26, 0.4)',
# 'rgba(0,60,48, 0.4)',
# 'rgba(0,60,48, 0,4)']

['rgba(1,102,94, 0.4)',
 'rgba(1,102,94, 0.4)',
 'rgba(140,140,140, 0.4)',
 'rgba(1,102,94, 0.4)',
 'rgba(140,140,140, 0.4)',
 'rgba(1,102,94, 0.4)',
 'rgba(140,140,140, 0.4)',
 'rgba(1,102,94, 0.4)',
 'rgba(140,140,140, 0.4)',
 'rgba(140,140,140, 0.4)',
 'rgba(166,97,26, 0.4)',
 'rgba(0,60,48, 0.4)',
 'rgba(0,60,48, 0.4)']

In [14]:
#final plot
fig = go.Figure(
    data=[go.Sankey(
        node = dict(
            label = node_label,
            color = node_color
        ),
        link = dict(
            source = source_node,
            target = target_node,
            value = values, 
            color = link_color,
        ))])
plot(fig,
     image_filename='sankey_plot_3', 
     image='png', 
     image_width=1000, 
     image_height=600
)
fig.show()

import plotly.io as pio
pio.write_html(fig, file='data_sankey.html', auto_open=True)

https://nbviewer.jupyter.org/github/DataCircles/traffic_collisions_viz_team/blob/master/notebooks/JD-Intro_Data_Sankey.ipynb
