# LA County COVID-19 Cases by Neighborhoods

Use LA County's [countywide statistical areas](http://geohub.lacity.org/datasets/lacounty::countywide-statistical-areas-csa) and look at trends in cases and new cases by neighborhood.

In [1]:
import altair as alt
import ipywidgets
import numpy as np
import pandas as pd
import pytz

import default_parameters 
import neighborhood_utils

from datetime import date, datetime, timedelta
from IPython.display import Markdown 

#---------------------------------------------------------------#
# Chart parameters
#---------------------------------------------------------------#
navy = "#1696D2"
gray = "#515252"

# These colors are used for the shading on cases/deaths
light_gray = "#EAEBEB"

title_font_size = 10
font_name = "Arial"
grid_opacity = 0.4
domain_opacity = 0.4
stroke_opacity = 0
time_unit = "monthdate"
chart_width = 250
chart_height = 200
bin_spacing = 100

fulldate_format = default_parameters.fulldate_format
monthdate_format = default_parameters.monthdate_format

time_zone = default_parameters.time_zone
yesterday_date = default_parameters.yesterday_date
two_days_ago = default_parameters.two_days_ago
two_weeks_ago = default_parameters.two_weeks_ago
one_week_ago = (
    (datetime.today()
                .astimezone(pytz.timezone(f'{time_zone}'))
                .date()
        - timedelta(days=8)
    )
)

one_month_ago = (
    (datetime.today()
                .astimezone(pytz.timezone(f'{time_zone}'))
                .date()
        - timedelta(days=31)
    )
)

start_date = date(2020, 4, 1)

In [2]:
# Prepare neighborhood case data, testing data, and merge
df1 = neighborhood_utils.clean_data()

df1 = (df1[(df1.date >= start_date) & (df1.aggregate_region != "Long Beach")]
      .sort_values(["aggregate_region", "date"])
      .reset_index(drop=True)
     )

df2 = neighborhood_utils.clean_testing_data()


df = pd.merge(df1, df2.drop(columns = "region_num"), 
              on = ["aggregate_region", "population", "date", "date2"], 
              how = "left", validate = "1:1")

In [3]:
def top_5_bottom_5(df, yesterday_date):
    keep_cols = ["aggregate_region", "cases", "cases_per100k", "rank", "max_rank"]
    
    df = (df[df.date == yesterday_date][keep_cols]
          .assign(
             cases = df.cases.map("{:,}".format),
             rank = df["rank"].astype(int),
             max_rank = df.max_rank.astype(int),
             cases_per100k = df.cases_per100k.round(2).map("{:,.2f}".format),
          )
          .rename(columns = {"aggregate_region": "Neighborhood"})
          .sort_values("rank")
         )
    
    
    cols_to_show = ["Neighborhood", "cases", "cases_per100k"]
    
    top_5 = (df[df["rank"] <= 5][cols_to_show]
             .reset_index(drop=True)
            )
    
    bottom_5 = (df[(df["rank"] >= df["max_rank"] - 5)]
                .sort_values("rank", ascending=False)
                [cols_to_show]
                .reset_index(drop=True)
               )
    
    
    display(Markdown(
        "5 Neighborhoods with <strong>Worst Outbreak</strong> (ranked on cases per 100k)"
        )
    )
    
    display(top_5)
    
    display(Markdown(
        "5 Neighborhoods with <strong>Least Severe</strong> Outbreak (ranked on cases per 100k)"
        )
    )
    display(bottom_5)


In [4]:
def summary_sentence(df, neighborhood):
    extract_col = "cases"
    cases_1month = df[df.date == one_month_ago][extract_col].iloc[0]
    cases_2weeks = df[df.date == two_weeks_ago][extract_col].iloc[0]
    cases_1week = df[df.date == one_week_ago][extract_col].iloc[0]
    cases_yesterday = df[df.date == yesterday_date][extract_col].iloc[0]
    
    pct_positive_2days = (df[df.date == two_days_ago]["pct_positive"].iloc[0] * 100).round(1)
    positive_per1k_2days = df[df.date == two_days_ago]["positive_per1k"].iloc[0].round(2)
    
    try:
        extract_col2 = "cases_per100k"
        n_cases_1week = df[df.date == one_week_ago][extract_col2].iloc[0].round(2)
        n_cases_yesterday = df[df.date == yesterday_date][extract_col2].iloc[0].round(2)
        
        pct_change = (((n_cases_yesterday - n_cases_1week) / n_cases_1week) * 100).round(1)
                     
        ranking = df[df.date == yesterday_date]["rank"].iloc[0].astype(int)
        max_rank = df[df.date == yesterday_date]["max_rank"].iloc[0].astype(int)
        
        
        display(Markdown(
            f"Cumulative cases reported in {neighborhood}: "
            f"{cases_1month:,} cases a month ago, {cases_2weeks:,} cases 2 weeks ago, " 
            f"{cases_1week:,} cases 1 week ago, and {cases_yesterday:,} cases yesterday. "
            f"This translates to a <strong>{pct_change}% </strong> change in the past week. "
            f"Of those tested so far, {pct_positive_2days}% tested positive, with persons testing positive at a "
            f"rate of {positive_per1k_2days:,} per 1k. "
            f"As of {yesterday_date.strftime(fulldate_format)}, "
            f"{neighborhood} ranked <strong> {ranking} out of {max_rank} </strong> neighborhoods "
            "on cases per 100k <i>(1 being the most severely hit)</i>."
            )
        )   
    except AttributeError:
         display(Markdown(
            f"Cumulative cases reported in {neighborhood}: "
            f"{cases_1month} cases a month ago, {cases_2weeks} cases 2 weeks ago, and " 
            f"{cases_1week} cases 1 week ago. "
            f"{neighborhood} has missing data; cases per 100k and rankings based on cases per 100k "
             "cannot be calculated. "
            )
        )

In [5]:
def make_chart(df, neighborhood, chart_type):
    if chart_type == "cases":
        plot_col = "cases_avg7"
        p25_col = "cases_p25"
        p75_col = "cases_p75"
        chart_title = f"{neighborhood}: Cases"
    
    if chart_type == "new_cases":
        plot_col = "new_cases_avg7"
        p25_col = ""
        p75_col = ""
        chart_title = f"{neighborhood}: New Cases"
    
    if chart_type == "normalized_cases":
        plot_col = "cases_per100k_avg7"
        chart_title = f"{neighborhood}: Cases per 100k"
        p25_col = "ncases_p25"
        p75_col = "ncases_p75"
        
    # Make cases charts
    cases_line = (
        alt.Chart(df.drop(columns = "date"))
        .mark_line()
        .encode(
            x=alt.X("date2", timeUnit=time_unit, 
                    title="date", axis=alt.Axis(format=monthdate_format)
                   ),
            y=alt.Y(plot_col, title="7-day avg"),
            color=alt.value(navy),
        )
    )
    
    cases_shaded = (
        alt.Chart(df[df.date >= two_weeks_ago].drop(columns = "date"))
        .mark_area()
        .encode(
            x=alt.X("date2", timeUnit = time_unit,
                   title = "date", axis=alt.Axis(format=monthdate_format)
                   ),
            y=alt.Y(plot_col, title="7-day avg"),
            color=alt.value(light_gray)
        )
    )
    
    ptile25_line = (
        alt.Chart(df.drop(columns = "date"))
        .mark_line()
        .encode(
            x=alt.X("date2", timeUnit=time_unit, 
                    title="date", axis=alt.Axis(format=monthdate_format)
                   ),
            y=alt.Y(p25_col, title="7-day avg"),
            color=alt.value(gray),
        )
    )
    
    ptile75_line = (
        alt.Chart(df.drop(columns = "date"))
        .mark_line()
        .encode(
            x=alt.X("date2", timeUnit=time_unit, 
                    title="date", axis=alt.Axis(format=monthdate_format)
                   ),
            y=alt.Y(p75_col, title="7-day avg"),
            color=alt.value(gray),
        )
    )
    
    if (chart_type == "cases") or (chart_type == "normalized_cases"):
        chart = (cases_shaded + cases_line + ptile25_line + ptile75_line)
                
    
    if chart_type == "new_cases":
        chart = (cases_shaded + cases_line)
    
    chart = (chart
        .properties(
              title=chart_title, width=chart_width, height=chart_height
            )
        .configure_title(
            fontSize=title_font_size, font=font_name, anchor="middle", color="black"
        )
        .configure_axis(gridOpacity=grid_opacity, domainOpacity=domain_opacity)
        .configure_view(strokeOpacity=stroke_opacity)
        )   
    
    return chart

In [6]:
top_5_bottom_5(df, yesterday_date)

5 Neighborhoods with <strong>Worst Outbreak</strong> (ranked on cases per 100k)

Unnamed: 0,Neighborhood,cases,cases_per100k
0,Castaic,2153,7601.06
1,Wholesale District,2518,6593.18
2,Vernon Central,3215,5940.39
3,Central,2285,5842.94
4,South Park,2152,5504.82


5 Neighborhoods with <strong>Least Severe</strong> Outbreak (ranked on cases per 100k)

Unnamed: 0,Neighborhood,cases,cases_per100k
0,La Canada Flintridge,188,467.05
1,Miracle Mile,218,639.88
2,Pacific Palisades,201,723.88
3,Santa Monica Mountains,240,760.65
4,Rancho Palos Verdes,329,766.76
5,Westwood,434,797.49


<strong><span style='color:f"#515252"'>The 25th and 75th percentile of cases and cases per 100k are calculated each day and drawn in gray.</span></strong>


<strong><span style='color:#1696D2'>The neighborhood's own trend is drawn in blue.</span></strong>


The percentiles provide context to whether a neighborhood's cases are relatively high or lower compared to other parts of the county. Data from the past 2 weeks are shaded in gray.

**Note:** Some neighborhoods report cases, but do not have population data. Rankings are calculated using cases per 100k to normalize across population differences. Neighborhoods without population data are excluded from the rankings.

In [7]:
# Make interactive chart
neigborhood_dropdown = ipywidgets.Dropdown(
    description="Neighborhood",
    style = {"description_width":"initial"},
    options=df.aggregate_region.unique()
)

output = ipywidgets.Output()

display(neigborhood_dropdown)
display(output)

def on_neighborhood_selection(*args):
    output.clear_output(wait=True)
    neighborhood = neigborhood_dropdown.value
    
    subset_df = df[df.aggregate_region == neighborhood]
    
    with output:
        summary_sentence(subset_df, neighborhood)
        display(make_chart(subset_df, neighborhood, "cases"))
        display(make_chart(subset_df, neighborhood, "normalized_cases"))
        display(make_chart(subset_df, neighborhood, "new_cases"))


on_neighborhood_selection()
neigborhood_dropdown.observe(on_neighborhood_selection, names="value")

Dropdown(description='Neighborhood', options=('Acton', 'Agoura Hills', 'Alhambra', 'Altadena', 'Arcadia', 'Arl…

Output()