# Add LA County neighborhood data
* Countywide statistical areas

In [1]:
import altair as alt
import pandas as pd

In [2]:
URL = "https://raw.githubusercontent.com/ANRGUSC/lacounty_covid19_data/master/data/Covid-19.csv"
S3_FILE_PATH = "s3://public-health-dashboard/jhu_covid19/"

def clean_data():
    df = pd.read_csv(URL)
    df = (df.assign(
            date = pd.to_datetime(df["Time Stamp"]).dt.date,
            date2 = pd.to_datetime(df["Time Stamp"]),
            cases = df["Number of cases"].fillna(0).astype(int),
        ).drop(columns = ["Time Stamp", "Number of cases"])
          .sort_values(["date", "Region"])
          .reset_index(drop=True)
    )
    
    # Derive columns
    group_cols = ["Region"]
    sort_cols = ["Region", "date"]
    
    df = (df.assign(
            new_cases = df.sort_values(sort_cols).groupby(group_cols)["cases"].diff(periods=1),
        )
    )
    
    df = (df.assign(
            cases_avg7=df.cases.rolling(window=7).mean(),
            new_cases_avg7=df.new_cases.rolling(window=7).mean(),
        )   
    )
    
    quartiles = (df.groupby("date")["cases"].describe()[["25%", "50%", "75%"]]
                 .rename(columns = {"25%": "ptile25",
                                    "50%": "ptile50",
                                    "75%" :"ptile75"})
                 .reset_index()
                )
    
    df2 = pd.merge(df, quartiles, on = "date", how = "left", validate = "m:1")
    
    df2.to_parquet(f"{S3_FILE_PATH}lacounty-neighborhood-time-series.parquet")
    
    return df2

clean_data()

Unnamed: 0,Region,Latitude,Longitude,date,date2,cases,new_cases,cases_avg7,new_cases_avg7,ptile25,ptile50,ptile75
0,Alhambra,34.093042,-118.127060,2020-03-16,2020-03-16,2,,,,1.00,2.0,2.0
1,Arcadia,34.136208,-118.040150,2020-03-16,2020-03-16,1,,,,1.00,2.0,2.0
2,Beverly Hills,34.069650,-118.396306,2020-03-16,2020-03-16,1,,,,1.00,2.0,2.0
3,Boyle Heights,34.043689,-118.209768,2020-03-16,2020-03-16,5,,,,1.00,2.0,2.0
4,Carson,33.832204,-118.251755,2020-03-16,2020-03-16,1,,,,1.00,2.0,2.0
...,...,...,...,...,...,...,...,...,...,...,...,...
34146,Wilmington,33.780016,-118.262509,2020-07-02,2020-07-02,621,17.0,478.857143,6.857143,21.25,116.5,356.5
34147,Wilshire Center,34.061515,-118.432771,2020-07-02,2020-07-02,481,4.0,528.000000,7.285714,21.25,116.5,356.5
34148,Winnetka,34.205883,-118.570934,2020-07-02,2020-07-02,506,10.0,491.571429,-96.857143,21.25,116.5,356.5
34149,Wiseburn,33.913173,-118.375051,2020-07-02,2020-07-02,43,0.0,494.571429,8.714286,21.25,116.5,356.5


In [3]:
df = pd.read_parquet(f"{S3_FILE_PATH}lacounty-neighborhood-time-series.parquet")

In [4]:
import default_parameters 

def show_svg(image_name):
    image_path = f"../notebooks/{image_name}.svg"
    altair_saver.save(image_name, image_path)
    display(SVG(filename = image_path))
    os.remove(image_path)

    
#---------------------------------------------------------------#
# Chart parameters
#---------------------------------------------------------------#
navy = "#0A4C6A"
gray = "#797C7C"

# These colors are used for the shading on cases/deaths
light_gray = "#EAEBEB"


title_font_size = 10
font_name = "Arial"
grid_opacity = 0.4
domain_opacity = 0.4
stroke_opacity = 0
time_unit = "monthdate"
chart_width = 250
chart_height = 200
bin_spacing = 100
fulldate_format = "%-m/%-d/%y"
monthdate_format = "%-m/%-d"

two_weeks_ago = default_parameters.two_weeks_ago

In [None]:
#make_chart(test, "Alhambra", "cases")
make_chart(test, "Alhambra", "new_cases")

In [5]:
def make_chart(df, neighborhood, chart_type):
    if chart_type == "cases":
        plot_col = "cases_avg7"
        chart_title = f"{neighborhood}: Cases"
    if chart_type == "new_cases":
        plot_col = "new_cases_avg7"
        chart_title = f"{neighborhood}: New Cases"

    # Make cases charts
    cases_line = (
        alt.Chart(df.drop(columns = "date"))
        .mark_line()
        .encode(
            x=alt.X("date2", timeUnit=time_unit, 
                    title="date", axis=alt.Axis(format=monthdate_format)
                   ),
            y=alt.Y(plot_col, title="7-day avg"),
            color=alt.value(navy),
        )
    )
    
    cases_shaded = (
        alt.Chart(df[df.date >= two_weeks_ago].drop(columns = "date"))
        .mark_area()
        .encode(
            x=alt.X("date2", timeUnit = time_unit,
                   title = "date", axis=alt.Axis(format=monthdate_format)
                   ),
            y=alt.Y(plot_col, title="7-day avg"),
            color=alt.value(light_gray)
        )
    )
    
    ptile25_line = (
        alt.Chart(df.drop(columns = "date"))
        .mark_line()
        .encode(
            x=alt.X("date2", timeUnit=time_unit, 
                    title="date", axis=alt.Axis(format=monthdate_format)
                   ),
            y=alt.Y("ptile25", title="7-day avg"),
            color=alt.value(gray),
        )
    )
    
    ptile75_line = (
        alt.Chart(df.drop(columns = "date"))
        .mark_line()
        .encode(
            x=alt.X("date2", timeUnit=time_unit, 
                    title="date", axis=alt.Axis(format=monthdate_format)
                   ),
            y=alt.Y("ptile75", title="7-day avg"),
            color=alt.value(gray),
        )
    )
    
    if chart_type == "cases":
        cases_chart = (
            (cases_shaded + cases_line + ptile25_line + ptile75_line)
              .properties(
                  title=chart_title, width=chart_width, height=chart_height
                ).configure_title(
                fontSize=title_font_size, font=font_name, anchor="middle", color="black"
            )
            .configure_axis(gridOpacity=grid_opacity, domainOpacity=domain_opacity)
            .configure_view(strokeOpacity=stroke_opacity)
            )   
            
    
    if chart_type == "new_cases":
        cases_chart = (
            (cases_shaded + cases_line)
            .properties(
                  title=chart_title, width=chart_width, height=chart_height
                )
            .configure_title(
                fontSize=title_font_size, font=font_name, anchor="middle", color="black"
            )
            .configure_axis(gridOpacity=grid_opacity, domainOpacity=domain_opacity)
            .configure_view(strokeOpacity=stroke_opacity)
            )   
    
    #show_svg(cases_chart)
    
    return cases_chart


In [10]:
import ipywidgets

neigborhood_dropdown = ipywidgets.Dropdown(description="Neighborhood")
output = ipywidgets.Output()

display(neigborhood_dropdown)
display(output)

change_guard = False

def on_neighborhood_selection(*args):
    global change_guard
    if change_guard:
        return
    output.clear_output(wait=True)
    neighborhood = neigborhood_dropdown.value
    
    df = pd.read_parquet(f"{S3_FILE_PATH}lacounty-neighborhood-time-series.parquet")
    df = df[df.Region == neighborhood]
    
    with output:
        display(make_chart(df, neighborhood, "cases"))
        display(make_chart(df, neighborhood, "new_cases"))


on_neighborhood_selection()
neigborhood_dropdown.observe(on_neighborhood_selection, names="value")

Dropdown(description='Neighborhood', options=(), value=None)

Output()