In [1]:
import datetime
import numpy as np
import pandas as pd

import plotly.graph_objects as go
from ipywidgets import widgets

In [2]:
import os
import json
import requests
import pandas as pd
from pyspark.sql import SparkSession

In [3]:
FILE1 = 'data-vol-1/medicare-geo/MUP_PHY_R20_P04_V10_D18_Prov_Svc.csv'

In [4]:
STRATIFY = 'Rndrng_Prvdr_Zip5'

In [5]:
spark = SparkSession.builder.appName("PhysiciansProvider").getOrCreate()
pardf = spark.read.parquet('state.parquet/Rndrng_Prvdr_State_Abrvtn=CA')

In [6]:
def total(lookup, df=pardf):
    agg_df = pardf.groupby(STRATIFY, lookup).count().toPandas()
    return agg_df

In [7]:
layout = go.Layout(
    xaxis=dict(
        title='Count',
        titlefont=dict(
            family='Arial, sans-serif',
            size=12,
            color='lightgrey'
        ),
        showticklabels=True,
        tickangle=45,
        tickfont=dict(
            family='Old Standard TT, serif',
            size=12,
            color='black'
        ),
    ),
    yaxis=dict(
        title='Provider',
        titlefont=dict(
            family='Arial, sans-serif',
            size=8,
            color='lightgrey'
        ),
        showticklabels=True,
        tickangle=45,
        tickfont=dict(
            family='Old Standard TT, serif',
            size=8,
            color='black'
        )
    )
)

# Static Variables

In [8]:
LOOKUP = 'Rndrng_Prvdr_Last_Org_Name'

# Lookup by zip

In [9]:
agg_df = total(LOOKUP)
agg_df.sort_values(by='count', ascending=True, inplace=True)

In [10]:
state = widgets.Dropdown(
    options=sorted(list(agg_df['Rndrng_Prvdr_Zip5'].unique())),
    value='94536',
    description='Zip Code:',
)

trace = go.Bar(x=agg_df['count'], y=agg_df[LOOKUP],
               name='Count', orientation='h')

g = go.FigureWidget(data=[trace], layout=layout)

In [11]:
def validate():
    if state.value in agg_df['Rndrng_Prvdr_Zip5'].unique():
        return True
    else:
        return False


def response(change):
    if validate():
        filter_list = [i for i in agg_df['Rndrng_Prvdr_Zip5'] == state.value]
        temp_df = agg_df[filter_list]
        x = temp_df['count']
        y = temp_df[LOOKUP]
        with g.batch_update():
            g.data[0].x = x
            g.data[0].y = y

state.observe(response, names="value")

In [12]:
container = widgets.HBox([state])
widgets.VBox([container, g])

VBox(children=(HBox(children=(Dropdown(description='Zip Code:', index=915, options=('29425', '33136', '62677',…