# LA County COVID-19 Cases by Neighborhoods

Use LA County's [countywide statistical areas](http://geohub.lacity.org/datasets/lacounty::countywide-statistical-areas-csa) and look at trends in cases and new cases by neighborhood.

**Related daily reports:** 
1. **[US counties report on cases and deaths for select major cities](https://cityoflosangeles.github.io/covid19-indicators/us-county-trends.html)**
1. **[CA counties report on cases, deaths, and hospitalizations](https://cityoflosangeles.github.io/covid19-indicators/ca-county-trends.html)**
1. **[Los Angeles County, detailed indicators](https://cityoflosangeles.github.io/covid19-indicators/coronavirus-stats.html)**

Code available in GitHub: [https://github.com/CityOfLosAngeles/covid19-indicators](https://github.com/CityOfLosAngeles/covid19-indicators)
<br>
Get informed with [public health research](https://github.com/CityOfLosAngeles/covid19-indicators/blob/master/reopening-sources.md)

In [None]:
import altair as alt
import geopandas as gpd
import pandas as pd

import default_parameters
import make_charts
import neighborhood_charts
import neighborhood_utils

from datetime import date
from IPython.display import Markdown, HTML, display_html

# Default parameters
start_date = default_parameters.start_date
today_date = default_parameters.today_date

fulldate_format = default_parameters.fulldate_format

start_date = date(2020, 4, 1)
alt.renderers.enable('html')

In [None]:
# Get vaccinations by zipcode, and do spatial join
def vax_by_zipcode():
    vax_df = neighborhood_utils.clean_zipcode_vax_data()
    crosswalk = pd.read_parquet(neighborhood_utils.CROSSWALK_URL)[
        ["Region", "aggregate_region"]]
    
    COUNTYWIDE_STATISTICAL_AREAS = (
        "https://github.com/CityOfLosAngeles/covid19-indicators/raw/master/"
        "data/la_countywide_statistical_areas.geojson"
    )
    
    csa = gpd.read_file(COUNTYWIDE_STATISTICAL_AREAS)
    csa = csa.assign(
        Region = (csa.LABEL.str.replace("City of ", "")
                  .str.replace("Unincorporated - ", "")
                  .str.replace("Los Angeles - ", "")
                  .str.strip()
                 )
    )[["Region", "geometry"]]
    
    # Spatial join neighborhoods with zipcodes
    gdf = gpd.sjoin(vax_df, csa, 
                    how = "inner", 
                    op = "intersects").drop(columns="index_right")
    
    # Merge in crosswalk to put neighborhoods into their aggregate_region
    gdf2 = pd.merge(gdf, crosswalk, 
                   on = "Region", 
                   how = "inner")
    
    return gdf2


df = neighborhood_charts.prep_data(start_date)
vaccinations = vax_by_zipcode()

# Construct list again, because we change the "/" into "-"
la_neighborhoods = sorted(list(df[df.group_name=="City of LA"].aggregate_region.unique()))
unincorporated_neighborhoods = sorted(list(df[df.group_name=="Unincorporated"].aggregate_region.unique()))
incorporated_neighborhoods = sorted(list(df[df.group_name=="Incorporated"].aggregate_region.unique()))

data_through = df.date.max()
neighborhoods = list(df.aggregate_region.unique())

In [None]:
def calculate_county_avg(df, group_by = "county", output_col = "fully_vaccinated_percent"):
    group_cols = ["population", "at_least_one_dose_percent", 
                  "fully_vaccinated_percent"]
    
    if group_by == "county": 
        keep_cols = ["zipcode"] + group_cols
        
    elif group_by == "aggregate_region":
        keep_cols = group_cols
        
    df = (df[(df.date == vaccinations.date.max()) & 
            (df.population > 0)]
          [keep_cols]  
          .drop_duplicates()
          .reset_index(drop=True)
         )
    
    colname = f"{output_col.replace('percent', 'num')}"
    df = df.assign(
        new_col = df.apply(lambda x: x[output_col] * x.population, axis=1)
    ).rename(columns = {"new_col": f"{colname}"})

    # Calculate county-level % fully vax
    denominator = df.population.sum().astype(int)
    numerator = df[colname].sum().astype(float)
    
    vax_rate = round(numerator / denominator, 3)
    
    return round(vax_rate * 100, 1)

COUNTY_FULLY_VAX_PERCENT = calculate_county_avg(vaccinations, 
                                                group_by = "county", 
                                                output_col = "fully_vaccinated_percent"
                                               )

In [None]:
# Display a HTML table of the vax rates for all the zipcodes that are present for aggregate_region
def display_vax_by_zipcode(df, neighborhood, COUNTY_FULLY_VAX_PERCENT):
    keep_cols = ["date", "zipcode", "population", 
                "at_least_one_dose_percent", "fully_vaccinated_percent"]
    
    
    df = (df[(df.aggregate_region==neighborhood) & 
           (df.date == df.date.max())]
          [keep_cols]
          .drop_duplicates()
          .assign(
              fully_vaccinated_percent_scaled = df.apply(lambda x: x.fully_vaccinated_percent * 100, axis=1)
          )
         )

    NEIGHBORHOOD_FULLY_VAX_PERCENT = calculate_county_avg(df, 
                                                          group_by="aggregate_region",
                                                          output_col = "fully_vaccinated_percent"
                                                         )
    
    NEIGHBORHOOD_PARTIAL_VAX_PERCENT = calculate_county_avg(df, 
                                                          group_by="aggregate_region",
                                                          output_col = "at_least_one_dose_percent"
                                                         )   
  
    
    
    table = pd.DataFrame({'neighborhood': [neighborhood],
                   '% 1+ dose': [NEIGHBORHOOD_PARTIAL_VAX_PERCENT], 
                    '% fully vax': [NEIGHBORHOOD_FULLY_VAX_PERCENT]
                    })
    
    # Length of table 
    num_zipcodes = len(df)
    above_avg = len(df[df.fully_vaccinated_percent_scaled >= COUNTY_FULLY_VAX_PERCENT])
    below_avg = num_zipcodes - above_avg
    
    if above_avg != 1:
        verb_tense1 = "are"
    elif above_avg == 1:
        verb_tense1 = "is"
    if below_avg != 1:
        verb_tense2 = "are"
    elif below_avg == 1:
        verb_tense2 = "is"    
    
    # Write caption
    display(Markdown(
        f"Out of {num_zipcodes} zipcodes included in {neighborhood}, "
        f"{above_avg} {verb_tense1} at or above LA County's average full vaccination rate "
        f"of {COUNTY_FULLY_VAX_PERCENT}%, and "
        f"{num_zipcodes - above_avg} {verb_tense2} below average. "
        )
    )  
    '''
    # Too many zipcodes in some neighborhoods, 20+...makes the page very long.
    # Instead, summarize results then
    formatted = (table.style.format({
            '% 1+ dose': "{:.0f}%", 
            '% fully vax': "{:.0f}%", 
            'date': '{:%-m-%d-%y}', 
            'population': '{:,.0f}'
        }).set_table_attributes("style='display:inline'")
        .set_caption(f'Vaccination Rates in {neighborhood} by weighted zipcodes')
        .hide_index()
    )
    
    display_html(table)
    '''

In [None]:
display(Markdown(
        f"Report updated: {default_parameters.today_date.strftime(fulldate_format)}; "
        f"data available through {data_through.strftime(fulldate_format)}."
    )
)

<strong><span style='color:f"#515252"'>The 25th and 75th percentile of cases and cases per 100k are calculated each day and drawn in gray.</span></strong>

<strong><span style='color:#1696D2'>The neighborhood's own trend is drawn in blue.</span></strong>

The percentiles provide context to whether a neighborhood's cases are relatively high or lower compared to other parts of the county. Data from the past 2 weeks are shaded in gray.

**Note:** Some neighborhoods report cases, but do not have population data. Rankings are calculated using cases per 100k to normalize across population differences. Neighborhoods without population data are excluded from the rankings.
Also, since rolling averages are used, decimals are possible, and are rounded to 1 decimal place. 

In [None]:
#References
#https://stackoverflow.com/questions/35160256/how-do-i-output-lists-as-a-table-in-jupyter-notebook
#https://stackovearflow.com/questions/7572901/python-html-combine-multiple-htmls-into-one

<a id='top'></a>

## Neighborhoods by Group:
Select neighborhoods within the City of LA, other incorporated cities in LA County, and unincorporated areas in LA County.

In [None]:
for list_name in [la_neighborhoods, unincorporated_neighborhoods, incorporated_neighborhoods]:
    combined_html = ""
    for i, name in enumerate(list_name):
        new_name = name.replace(" - ", "-").replace(" ", "-")
        
        if i < len(list_name)-1:
            name_html = f"<a href=#{new_name}>{name}, </a>"
            combined_html += name_html

        elif i == len(list_name)-1:
            name_html = f"<a href=#{new_name}>{name}</a>"
            combined_html += name_html
    
    if list_name == la_neighborhoods:
        group_title = "City of LA: "
    elif list_name == incorporated_neighborhoods:
        group_title = "LA County - Incorporated Cities: "
    elif list_name == unincorporated_neighborhoods:
        group_title = "LA County - Unincorporated: "

    display(Markdown(f"<strong>{group_title}</strong>"))
    display(HTML(combined_html))

In [None]:
for c in neighborhoods:
    id_anchor = c.replace(" - ", "-").replace(" ", "-")
    
    display(HTML(f"<a id={id_anchor}></a>"))
    chart = neighborhood_charts.make_chart(df, c)
    make_charts.show_svg(chart)
    display_vax_by_zipcode(vaccinations, c, COUNTY_FULLY_VAX_PERCENT)
    display(HTML(
        "<br>"
        "<a href=#top>Return to top</a><br>"
    ))