# Add LA County neighborhood data
* Countywide statistical areas

In [7]:
import altair as alt
import pandas as pd

In [22]:
URL = "https://raw.githubusercontent.com/ANRGUSC/lacounty_covid19_data/master/data/Covid-19.csv"

df = pd.read_csv(URL)

In [50]:
def clean_data(df):
    df = (df.assign(
            date = pd.to_datetime(df["Time Stamp"]),
            cases = df["Number of cases"].fillna(0).astype(int),
        ).drop(columns = ["Time Stamp", "Number of cases"])
          .sort_values(["date", "Region"])
          .reset_index(drop=True)
    )
    
    # Derive columns
    group_cols = ["Region"]
    sort_cols = ["Region", "date"]
    
    df = (df.assign(
            new_cases = df.sort_values(sort_cols).groupby(group_cols)["cases"].diff(periods=1),
        )
    )
    
    quartiles = (df.groupby("date")["cases"].describe()[["25%", "50%", "75%"]]
                 .rename(columns = {"25%": "ptile25",
                                    "50%": "ptile50",
                                    "75%" :"ptile75"})
                 .reset_index()
                )
    
    df2 = pd.merge(df, quartiles, on = "date", how = "left", validate = "m:1")
    
    return df2

In [51]:
df = pd.read_csv(URL)
df = clean_data(df)

In [52]:
df.head()

Unnamed: 0,Region,Latitude,Longitude,date,cases,new_cases,ptile25,ptile75
0,Alhambra,34.093042,-118.12706,2020-03-16,2,,1.0,2.0
1,Arcadia,34.136208,-118.04015,2020-03-16,1,,1.0,2.0
2,Beverly Hills,34.06965,-118.396306,2020-03-16,1,,1.0,2.0
3,Boyle Heights,34.043689,-118.209768,2020-03-16,5,,1.0,2.0
4,Carson,33.832204,-118.251755,2020-03-16,1,,1.0,2.0


In [54]:
df[df.Region=="Hacienda Heights"]

Unnamed: 0,Region,Latitude,Longitude,date,cases,new_cases,ptile25,ptile75
878,Hacienda Heights,33.993068,-117.968676,2020-03-26,2,,2.00,9.00
1076,Hacienda Heights,33.993068,-117.968676,2020-03-27,2,0.0,0.00,5.00
1402,Hacienda Heights,33.993068,-117.968676,2020-03-28,2,0.0,0.00,5.00
1748,Hacienda Heights,33.993068,-117.968676,2020-03-29,2,0.0,0.00,6.00
2094,Hacienda Heights,33.993068,-117.968676,2020-03-30,2,0.0,0.00,8.00
...,...,...,...,...,...,...,...,...
32573,Hacienda Heights,33.993068,-117.968676,2020-06-28,324,17.0,19.25,325.75
32911,Hacienda Heights,33.993068,-117.968676,2020-06-29,337,13.0,20.00,336.75
33249,Hacienda Heights,33.993068,-117.968676,2020-06-30,353,16.0,20.00,348.50
33587,Hacienda Heights,33.993068,-117.968676,2020-07-01,363,10.0,20.00,352.00


In [4]:
df2 = df[df.Region.str.contains("Hacienda Heights")]
(
    alt.Chart(df2)
    .mark_line()
    .encode(
        x=alt.X("date"),
        y=alt.Y("cases"),
        color=alt.value("navy")
    )
)