In [2]:
import numpy as np
import pandas as pd
import geopandas
import matplotlib.pyplot as plt
import matplotlib.colors as colors
import plotly.graph_objects as go
import urllib, json

In [3]:
# Prepare "nodes" dataframe
def node_table(df):
    cols_1 = list(df.Applications.values)
    cols_2 = list(df.columns[-3:])
    cols_1.extend(cols_2)
    df_labels = pd.DataFrame(cols_1, columns=['labels'])

    return df_labels


# Build square matrix
def numeric_source_target(df):
    df_labels = node_table(df)
    df.Applications = [df_labels[df_labels['labels'] == vals].index.values for vals in df.Applications]
    df.Applications = df.Applications.map(lambda x: x[0])
    cols = [df_labels[df_labels['labels'] == vals].index.values for vals in df.columns[1:]]
    new_col_names = [name for col in cols for name in col]
    old_col_names = df.columns[1:]
    df.rename(columns=dict(zip(old_col_names, new_col_names)), inplace=True)

    return df


def source_target_table(df):
    square_df = numeric_source_target(df)
    var_list = list(square_df.iloc[:, 1:].columns)
    melt_df = pd.melt(square_df, id_vars=[square_df.columns[0]], value_vars=var_list, var_name='target', value_name='value')
    melt_df = melt_df.rename(columns={square_df.columns[0]: 'source'})
    melt_df = melt_df.astype('int')
    melt_df = melt_df[melt_df['value'] == 1]

    return melt_df


# plot sankey diagram
def sankey_plot(df):
    df_labels = node_table(df)
    melt_df = source_target_table(df)
    pd.set_option('display.max_rows', None)
    url = 'https://raw.githubusercontent.com/plotly/plotly.js/master/test/image/mocks/sankey_energy.json'
    response = urllib.request.urlopen(url)
    data = json.loads(response.read())
    # Make sure link color correponds to source node color
    node_color = data['data'][0]['node']['color']
    link_color = [node_color[i] for i in melt_df.source]

    source = melt_df.source.values.tolist()
    target = melt_df.target.values.tolist()
    value = melt_df.value.values.tolist()
    labels = df_labels.labels.values.tolist()
    link = dict(source = source, target = target, value = value, color=link_color)
    node = dict(label = labels, pad=15, thickness=5, color = node_color)
    sankey = go.Sankey(link=link, node=node, arrangement='snap')
    fig = go.Figure(sankey)
    
    return fig


In [6]:
df = pd.read_excel('apps_transform.xlsx', sheet_name='apps').fillna(0)
fig = sankey_plot(df)
fig.update_layout(width=1000, height=1200, font_size = 12)
fig.show()
fig.write_html('AV_facilitated_transformations.html')