
# Import necesssary Modules

In [4]:
from IPython.core.display import display, HTML
display(HTML("<style>.container { width:100% !important; }</style>"))

import re
import dash
from jupyter_dash import JupyterDash
import dash_bootstrap_components as dbc
import dash_core_components as dcc
import dash_html_components as html
from dash.dependencies import Input, Output, State
import plotly.express as px
import networkx as nx
import plotly.graph_objects as go
import _pickle as cPickle
import pandas as pd
import numpy as np

# User Configurations

In [5]:
external_stylesheets = ['https://codepen.io/chriddyp/pen/bWLwgP.css']

color_dict = {'VENDOR_ID': '#87CEFA',
              'ACCOUNT': '#00CC96',
              'Full_Address': '#EF553B',
              'BUSINESS_UNIT': '#AB63FA'}

# Utility functions 

## Compute Quarterwise PargeRank Variation

In [6]:
def QuarterWise_PageRankVariation(PageRank_df: 'pd.DataFrame'):
    '''
    Function computes QuarterWise PageRank variation for the entities


    '''
    # Pivot table to create quarterwise pagerank columns for each entity
    PageRank_df = (PageRank_df.pivot_table(index=['id', 'Entity_type'],
                                           columns='Quarter', values='pagerank',
                                           aggfunc='first')
                   .reset_index(drop=False))
    PageRank_df.columns = (['id', 'Entity_type'] +
                           ['PageRank_'+col for col in PageRank_df.columns if 'Q' in col])

    pgrk_cols = [col for col in PageRank_df.columns if 'PageRank_' in col]

    pr_rank_arr = PageRank_df[pgrk_cols].values

    l, w = pr_rank_arr.shape
    prct_change_arr = np.zeros((l, w-1))

    for i in range(1, w):
        prct_change_arr[:, i-1] = (pr_rank_arr[:, i] -
                                   pr_rank_arr[:, i-1])/pr_rank_arr[:, i-1]*100

    prct_cng_cols = ['PGRK_PrctCng_Q2', 'PGRK_PrctCng_Q3', 'PGRK_PrctCng_Q4']
    prct_cng_df = pd.DataFrame(prct_change_arr, columns=prct_cng_cols)

    PageRank_df = pd.concat([PageRank_df, prct_cng_df], axis=1)
    return PageRank_df

## Create Vendor Graph Plot

In [129]:
def Plot_VenGraph(Vendor, quarter):
    Edges_df_sub = Edges_df[(Edges_df.src == Vendor) &
                            (Edges_df.Quarter == quarter)].copy()

    G = nx.from_pandas_edgelist(Edges_df_sub, 'src', 'dst')
    pos = nx.spring_layout(G)

    pos_df = pd.DataFrame(pos).T.reset_index(drop=False)
    pos_df.columns = ['Entity', 'x', 'y']

    edge_x = []
    edge_y = []

    for edge in G.edges():
        x0, y0 = list(pos[edge[0]])  # list(pos[edge[0]])
        x1, y1 = list(pos[edge[1]])
        edge_x.append(x0)
        edge_x.append(x1)
        edge_x.append(None)
        edge_y.append(y0)
        edge_y.append(y1)
        edge_y.append(None)

    PageRank_df_sub = df[(df.id.isin(
        G.nodes))]
    PageRank_df_sub = PageRank_df_sub.merge(
        pos_df, how='left', left_on='id', right_on='Entity')
    PageRank_df_sub['size'] = 10

    fig = (px.scatter(
        PageRank_df_sub, x="x", y="y",
        color="Entity_type",
        color_discrete_map=color_dict,
        custom_data=[f'PageRank_{quarter}', f'PGRK_PrctCng_{quarter}'],
        size='size',
        opacity=0.6,
        size_max=50,
        text='id',
        labels={'VENDOR_ID': 'Vendor',
                'ACCOUNT': 'Account',
                'Full_Address': 'Address',
                'BUSINESS_UNIT': 'Bussiness Unit'},
        hover_data=['id'],
        # textfont=dict(
        #     family="sans serif",
        #     size=18,
        #     color="LightSeaGreen"),
    ))
    fig.update_traces(
    hovertemplate="<br>".join([
        "PageRank: %{customdata[0]}",
        "PageRank Variation: %{customdata[1]}",
    ]))
    
    fig.add_trace(go.Scatter(
        x=edge_x, y=edge_y,
        line=dict(width=0.5, color='#888'),
        hoverinfo='none',
        mode='lines',
        showlegend=False))
    fig.update_layout(
        transition_duration=500,
        title=f'Graph of vendor {Vendor} in {quarter}<br>',
        xaxis=dict(visible=False, showgrid=False,
                   zeroline=False, showticklabels=False),
        yaxis=dict(visible=False, showgrid=False,
                   zeroline=False, showticklabels=False),
        legend=dict(  orientation="h",
            title_text='',
            yanchor="top",
            y=1.0,
            xanchor="right",
            x=0.99),
        plot_bgcolor='rgba(0,0,0,0)',
        margin=dict(t=75,l=30,r=10),
        height=1000,width=1250,
#         width=800, height=650
    )
    return fig

## Create Bar chart for top vendors

In [100]:
def plot_TopVendor_bar(quarter: 'Quarter'):
    df_sub = (df[(df.Entity_type == 'VENDOR_ID') & (~df[f'PGRK_PrctCng_{quarter}'].isna())]
              [['id', f'PageRank_{quarter}', f'PGRK_PrctCng_{quarter}']]
              .sort_values(f'PGRK_PrctCng_{quarter}', ascending=True).tail(10))
    fig = px.bar(df_sub,
                 y="id",
                 x=f'PGRK_PrctCng_{quarter}',
                 text='id',
                 hover_data=[f'PageRank_{quarter}'],
                 labels={'id': 'Vendor',
                         f'PGRK_PrctCng_{quarter}': 'PageRank Variation (%)',
                         f'PageRank_{quarter}': 'PageRank'},
                orientation='h')
    fig.update_layout(transition_duration=500,
                      autosize=True,
                      title_xanchor='left',
                      title=f"Top Vendors by PageRank variation in {quarter}",
                      xaxis=dict(side='top',title_standoff=0),
                      yaxis=dict(visible=False, showgrid=False,zeroline=False, showticklabels=False),
                      margin=dict(t=80,l=0,r=0,b=0),
                      height=400,
                      
    #                       height=650
                     )
    return fig

## PageRank Variation by Entity

In [117]:
def Entity_PRvar_bar(Vendor,quarter):
    Edges_df_sub = Edges_df[(Edges_df.src==Vendor) & (Edges_df.Quarter==quarter)]

    Edges_df_sub = Edges_df_sub[['dst','Entity_type']].merge(df[['id',f'PGRK_PrctCng_{quarter}']],left_on='dst',right_on='id',how='left')

    agg_Ent_PR_var = Edges_df_sub.groupby('Entity_type').agg({f'PGRK_PrctCng_{quarter}': np.nanmean}).reset_index()

    fig = px.bar(agg_Ent_PR_var,
                 x="Entity_type",
                 y=f'PGRK_PrctCng_{quarter}',
                 hover_data=[f'PGRK_PrctCng_{quarter}'],
                 color="Entity_type",
                 color_discrete_map=color_dict,
                 labels={'Entity_type': 'Entity',
                         f'PGRK_PrctCng_{quarter}': 'PageRank Variation (%)'})
    fig.update_layout(transition_duration=500,
                      autosize=True,
                      title_xanchor='left',
                      yaxis=dict(visible=True,title=None, showgrid=True,zeroline=True, showticklabels=True),
                      xaxis=dict(visible=True,title=None, showgrid=False,zeroline=False, showticklabels=True),
                      title=f"Entity PageRank variation(%) for<br>{Vendor} in {quarter}",
                      margin=dict(t=75,l=10,r=0),
                      title_font_size=15,
                      showlegend= False)
    return fig

# Loading Datasets

In [24]:
# Load data
with open(r"./static/PageRank_df.pkl", "rb") as input_file:
    PageRank_df = cPickle.load(input_file)
    PageRank_df['id'] = PageRank_df.id.apply(lambda x: re.sub(', ',',<br>',x,2)) #re.sub('\\\\','p', '\\vbj\\jjbb\\', 2  )

with open(r"./static/Edges_df.pkl", "rb") as input_file:
    Edges_df = cPickle.load(input_file)
    Edges_df['dst'] = Edges_df.dst.apply(lambda x: re.sub(', ',',<br>',x,2))
    
with open(r"./static/Vendor_IDandName_df.pkl", "rb") as input_file:
    Vend_idName_df = cPickle.load(input_file)    

 # Preparing Inputs

In [25]:
ent_type_dict = pd.Series(PageRank_df.Entity_type.values,
                          index=PageRank_df.id).to_dict()

df = QuarterWise_PageRankVariation(PageRank_df).round(2)
Edges_df = Edges_df.merge(df[['id','Entity_type']],left_on='dst',right_on='id',how='left')

# App HTML Layout 

In [130]:
# App initaialization

# external deployment
# app = dash.Dash(__name__, external_stylesheets=external_stylesheets)

# deployment within notebook
app = JupyterDash(__name__, external_stylesheets=[dbc.themes.BOOTSTRAP])

# Layout design

app.layout = html.Div([
    dbc.Container([
        dbc.Row([
            dbc.Col([
                html.Div([
                    html.H2(children='PageRank Analysis for top Vendors'),
                    html.Label('Select Quarter'),
                    dcc.Dropdown(
                        id='select-quarter',
                        options=[
                            {'label': 'Q2', 'value': 'Q2'},
                            {'label': 'Q3', 'value': 'Q3'},
                            {'label': 'Q4', 'value': 'Q4'}],
                        value='Q2',
                        clearable=False),
                    dcc.Graph(id='top-vendor-bar'),
                    dcc.Graph(id='entity-prv-bar')])
            ], id='top-vendors', width=3),

            dbc.Col([
                html.Div([
                    dbc.Row([
                        dbc.Col([html.H5(id='select-quarter-vendor-graph',
                                          style={"margin-top": "10px",
                                                 "margin-right": "10px",
                                                 "margin-left": "45px"})],width=6),
                        dbc.Col([dcc.RadioItems(
                            id='user-selected-quarter',
                            options=[
                                {'label': 'Q2 ', 'value': 'Q2'},
                                {'label': 'Q3 ', 'value': 'Q3'},
                                {'label': 'Q4 ', 'value': 'Q4'}],
                            value='Q2',
                            labelStyle={'display': 'inline-block',
                                        "margin-top": "10px",
                                        "margin-right": "15px",
                                        "margin-left": "15px"}
                        )],width=6)
                    ]),
                    dcc.Graph(id='vendor-graph')
                ])
            ], width=9)
        ])
    ]),
    dcc.Store(id='selected-vendor')
])


@app.callback(
    Output('select-quarter-vendor-graph', 'children'),
    Input('selected-vendor', 'data'))
def update_vendor_radiotit(Vendor):
    return f"View {Vendor.get('vendor')} graph in: "


@app.callback(
    Output('selected-vendor', 'data'),
    Input('top-vendor-bar', 'hoverData'),
    Input('select-quarter', 'value'))
def update_vendor(Vendor, quarter):
    if Vendor is not None:
        Vendor = Vendor['points'][0]['label']
    else:
        df_sub = (df[(df.Entity_type == 'VENDOR_ID') & (~df[f'PGRK_PrctCng_{quarter}'].isna())]
                  [['id', f'PageRank_{quarter}', f'PGRK_PrctCng_{quarter}']]
                  .sort_values(f'PGRK_PrctCng_{quarter}', ascending=False).head(1))
        Vendor = df_sub.id.item()
    return {'vendor': Vendor}


@app.callback(
    Output('user-selected-quarter', 'value'),
    Input('select-quarter', 'value'),
    Input('top-vendor-bar', 'hoverData'))
def update_quarter_radio(quarter: 'Quarter',Vendor: 'Vendor'):
    return quarter


@app.callback(
    Output('top-vendor-bar', 'figure'),
    Input('select-quarter', 'value'))
def create_TopVendor_bar(quarter: 'Quarter'):
    return plot_TopVendor_bar(quarter)


@app.callback(
    Output('top-vendor-bar', 'hoverData'),
    Input('select-quarter', 'value'))
def update_hover_data(quarter):
    return None

@app.callback(
    Output('entity-prv-bar', 'figure'),
    Input('selected-vendor', 'data'),
    Input('select-quarter', 'value'))
def create_Ent_bar(Vendor,quarter):
    Vendor = Vendor.get('vendor')
    return Entity_PRvar_bar(Vendor,quarter)


@app.callback(
    Output('vendor-graph', 'figure'),
    Input('selected-vendor', 'data'),
    Input('user-selected-quarter', 'value'))
def Create_graph(Vendor, quarter):
    Vendor = Vendor.get('vendor')
    return Plot_VenGraph(Vendor, quarter)


app.run_server(mode='external')

Dash app running on http://127.0.0.1:8050/



`item` has been deprecated and will be removed in a future version



### PageRank Variation by Entity

In [54]:
Vendor = 'ACEUS0000032144'
quarter = 'Q3'
PageRank Variation by Entity
def Entity_PRvar_bar(Vendor,quarter):
    Edges_df_sub = Edges_df[(Edges_df.src==Vendor) & (Edges_df.Quarter==quarter)]

    Edges_df_sub = Edges_df_sub[['dst','Entity_type']].merge(df[['id',f'PGRK_PrctCng_{quarter}']],left_on='dst',right_on='id',how='left')

    agg_Ent_PR_var = Edges_df_sub.groupby('Entity_type').agg({f'PGRK_PrctCng_{quarter}': np.nanmean}).reset_index()

    fig = px.bar(agg_Ent_PR_var,
                 x="Entity_type",
                 y=f'PGRK_PrctCng_{quarter}',
                 hover_data=[f'PGRK_PrctCng_{quarter}'],
                 color="Entity_type",
                 color_discrete_map=color_dict,
                 labels={'Entity_type': 'Entity',
                         f'PGRK_PrctCng_{quarter}': 'PageRank Variation (%)'})
    fig.update_layout(transition_duration=500,
                      autosize=True,
                      title_xanchor='left',
                      title=f"Entity PageRank variation for {Vendor} in {quarter}",
                      margin=dict(t=100,l=0,r=0),
                      #xaxis=dict(side='top', title_standoff=0),
                      showlegend= False)
    return fig

In [37]:
Edges_df_sub[Edges_df_sub.Entity_type == 'Full_Address']

Unnamed: 0,dst,Entity_type,id,PGRK_PrctCng_Q3
0,"LA QUINTA INN & SUITES,<br>THOUSAND OAKS NEWBU...",Full_Address,"LA QUINTA INN & SUITES,<br>THOUSAND OAKS NEWBU...",
3,"LA QUINTA INN & SUITES,<br>CHARLESTON RIVERVIE...",Full_Address,"LA QUINTA INN & SUITES,<br>CHARLESTON RIVERVIE...",
4,"LA QUINTA INN & SUITES,<br>COLUMBIA JESSUP,<br...",Full_Address,"LA QUINTA INN & SUITES,<br>COLUMBIA JESSUP,<br...",


In [11]:
def plot_TopVendor_bar(quarter: 'Quarter'):
    df_sub = (df[(df.Entity_type == 'VENDOR_ID') & (~df[f'PGRK_PrctCng_{quarter}'].isna())]
              [['id', f'PageRank_{quarter}', f'PGRK_PrctCng_{quarter}']]
              .sort_values(f'PGRK_PrctCng_{quarter}', ascending=True).tail(10))
    fig = px.bar(df_sub,
                 y="id",
                 x=f'PGRK_PrctCng_{quarter}',
                 text='id',
                 hover_data=[f'PageRank_{quarter}'],
                 labels={'id': 'Vendor',
                         f'PGRK_PrctCng_{quarter}': 'PageRank Variation (%)',
                         f'PageRank_{quarter}': 'PageRank'},
                orientation='h')
    fig.update_layout(transition_duration=500,
                      autosize=True,
                      title_xanchor='left',
                      title=f"Top Vendors by PageRank variation in {quarter}",
                      xaxis=dict(side='top',title_standoff=0),
                      yaxis=dict(visible=False, showgrid=False,zeroline=False, showticklabels=False),
                      margin=dict(t=100,l=0,r=0),
                      height=500,
                      
    #                       height=650
                     )
    return fig

Unnamed: 0,id,Entity_type,PageRank_Q1,PageRank_Q2,PageRank_Q3,PageRank_Q4,PGRK_PrctCng_Q2,PGRK_PrctCng_Q3,PGRK_PrctCng_Q4
0,,Full_Address,177.13,140.41,76.53,85.71,-20.73,-45.5,12.01
1,",<br>BLVD DE LA PLAINE 11,<br>B-1050, BRUSSEL...",Full_Address,0.44,0.45,0.45,0.41,0.77,0.21,-8.79
2,",<br>FORTIUS 7 2134,<br>HOOFDDROP, NLD",Full_Address,,0.33,,,,,
3,",<br>SANTIAGO DE LOS CABALLEROS,<br>DOM",Full_Address,,,0.28,,,,
4,",<br>VOLKSPLEIN 70,<br>MAASTRICT, NLD",Full_Address,,0.33,,,,,
