In [8]:
# Import required library
import pandas as pd

In [9]:
# Read in the data containing complaints regarding parked vehicles, abandoned vehicles, and transportation
vehicle_comps = pd.read_csv('Data/vehicle_complaints.csv')

# Visualize the data
vehicle_comps

Unnamed: 0.1,Unnamed: 0,open_dt,Year,ontime,case_status,closure_reason,subject,reason,type,department,submittedphoto,pwd_district,city_council_district,neighborhood,neighborhood_services_district,location_street_name,location_zipcode,latitude,longitude,source
0,0,1/1/2021 1:46,2021,ONTIME,Closed,Case Closed. Closed date : 2021-01-02 12:53:10...,Transportation - Traffic Division,Enforcement & Abandoned Vehicles,Parking Enforcement,BTDT,,10B,9,Roxbury,13,INTERSECTION Elmore St & Kensington St,,42.3594,-71.0587,Constituent Call
1,2,1/1/2021 4:58,2021,ONTIME,Closed,Case Closed. Closed date : 2021-01-02 12:56:14...,Transportation - Traffic Division,Enforcement & Abandoned Vehicles,Parking Enforcement,BTDT,,7,4,Dorchester,9,27 W Tremlett St,2124.0,42.2944,-71.0734,Citizens Connect App
2,3,1/1/2021 6:11,2021,ONTIME,Closed,Case Closed. Closed date : 2021-01-02 08:11:45...,Transportation - Traffic Division,Enforcement & Abandoned Vehicles,Parking Enforcement,BTDT,https://311.boston.gov/media/boston/report/pho...,1C,2,South End,6,INTERSECTION Chandler St & Cazenove St,,42.3594,-71.0587,Citizens Connect App
3,4,1/1/2021 7:06,2021,ONTIME,Closed,Case Closed. Closed date : 2021-01-02 12:46:24...,Transportation - Traffic Division,Enforcement & Abandoned Vehicles,Parking Enforcement,BTDT,https://311.boston.gov/media/boston/report/pho...,3,2,Dorchester,5,50 Dorset St,2125.0,42.3239,-71.0592,Citizens Connect App
4,6,1/1/2021 7:55,2021,ONTIME,Closed,Case Closed. Closed date : 2021-01-02 12:53:56...,Transportation - Traffic Division,Enforcement & Abandoned Vehicles,Parking Enforcement,BTDT,,3,3,Dorchester,7,18 Hinckley St,2125.0,42.3173,-71.0603,Citizens Connect App
...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...
49521,60947,12/31/2019 19:06,2019,ONTIME,Closed,Case Closed. Closed date : 2019-12-31 20:20:49...,Transportation - Traffic Division,Enforcement & Abandoned Vehicles,Parking Enforcement,BTDT,,1C,2,South End,6,42 Paul Pl,2118.0,42.3467,-71.0678,Citizens Connect App
49522,60948,12/31/2019 19:19,2019,ONTIME,Closed,Case Closed. Closed date : 2020-01-02 03:43:44...,Transportation - Traffic Division,Enforcement & Abandoned Vehicles,Parking Enforcement,BTDT,https://311.boston.gov/media/boston/report/pho...,3,3,Dorchester,7,22 Treadway Rd,2125.0,42.3137,-71.0585,Citizens Connect App
49523,60949,12/31/2019 20:55,2019,ONTIME,Closed,Case Closed. Closed date : 2020-01-02 03:44:35...,Transportation - Traffic Division,Enforcement & Abandoned Vehicles,Parking Enforcement,BTDT,,3,4,Dorchester,8,INTERSECTION Nottingham St & Bowdoin Ave,,42.3594,-71.0587,Citizens Connect App
49524,60950,12/31/2019 22:13,2019,ONTIME,Closed,Case Closed. Closed date : 2020-01-02 03:18:29...,Transportation - Traffic Division,Enforcement & Abandoned Vehicles,Parking Enforcement,BTDT,https://311.boston.gov/media/boston/report/pho...,7,3,Dorchester,7,59 Minot St,2122.0,42.2857,-71.0479,Citizens Connect App


In [10]:
# Import plotly library
import plotly.graph_objects as go

# Create necessary functions to create the sankey diagram
def code_mapping(df, src, targ):
    '''
    Maps labels in src and targ columns to integers

    :param df: A Dataframe containing at least a source and target column
    :param src: The column containing labels you want for the source nodes
    :param targ: The column containing labels you want for the target nodes
    :return:
    '''
    # Extract distinct labels for the Sankey nodes
    labels = list(df[src]) + list(df[targ])

    # Define a list of integer codes
    codes = list(range(len(labels)))

    # Create a dictionary mapping the labels to random codes
    lc_map = dict(zip(labels, codes))

    # In the df, substitute the codes for the labels
    df = df.replace({src: lc_map, targ: lc_map})

    return df, labels

def make_sankey(df, src, targ, title, vals=None, **kwargs):
    '''
    Generates the sankey diagram

    :param df: a Dataframe containing the data needed for the Sankey diagram
    :param src: the column name that has the data for the source nodes
    :param targ: the column name that has the data for the target nodes
    :param title: the title for the Sankey diagram
    :param vals: the frequency counts of links
    :param kwargs: any other arguments needed to customize the Sankey diagram
    :return: the Sankey diagram
    '''
    # Call the code_mapping function to get a revised Dataframe and labels
    df, labels = code_mapping(df, src, targ)

    # Retrieve the 'Value' data
    if vals:
        values = df[vals]

    # If 'Value' is not given as a parameter, set all values to zero
    else:
        values = [1] * len(df)

    # Customize the Sankey diagram
    pad = kwargs.get('pad', 10)
    thickness = kwargs.get('thickness', 70)
    line_color = kwargs.get('line_color', 'black')
    line_width = kwargs.get('line_width', 1)

    # Create the links and nodes for the Sankey diagram
    link = {'source': df[src], 'target': df[targ], 'value': values}
    node = {'label': labels, 'pad':pad, 'thickness':thickness, 'line':{'color':line_color, 'width':line_width}}

    # Graph the Sankey Diagram
    sk = go.Sankey(link=link, node=node)
    fig = go.Figure(sk)
    fig.update_layout(title_text=title, font_size=10)
    
    return fig

In [11]:
# Filter the data to only include the columns needed for the Sankey
vehicle_comps = vehicle_comps.filter(items=['type', 'neighborhood'], axis=1)

# Add a value colum to the data
sankey_df = vehicle_comps.groupby(vehicle_comps.columns.to_list(), as_index=False).size()
sankey_df = sankey_df.rename(columns={'size':'Value'})

In [12]:
# Make the sankey diagram
sankey = make_sankey(sankey_df, 'neighborhood','type', 
                    'Sankey Diagram Mapping Neighborhoods to Complaints Regarding Parked Vehicles, Abandoned Vehicles,' +
                     ' and the Flow of Traffic', vals='Value')

# Visualize it
sankey

In [13]:
# Install and import chart_studio
!pip install chart_studio 
import chart_studio



In [14]:
# Upload the sankey diagram to plotly chart studio so it can be embedded into the webpage
username = 'miahuebscher' # your username
api_key = 'v6bxo5XBAW9GNOq0IEzI' # your api key - go to profile > settings > regenerate key
chart_studio.tools.set_credentials_file(username=username, api_key=api_key)

import chart_studio.plotly as py
py.plot(sankey, filename = 'complaints_sankey', auto_open=True)

'https://plotly.com/~miahuebscher/1/'