# LA County coronavirus vaccine administration by city/community

### Load Python tools

In [1]:
%load_ext lab_black

In [2]:
import pandas as pd
import geopandas as gpd
import altair as alt
import altair_latimes as lat
import glob
import path
import os

In [3]:
alt.themes.register("latimes", lat.theme)
alt.themes.enable("latimes")
pd.options.display.max_columns = 50
pd.options.display.max_rows = 1000
alt.data_transformers.disable_max_rows()

DataTransformerRegistry.enable('default')

### Create a timeseries from coronavirus-tracker data

In [4]:
path = "/Users/mhustiles/data/github/coronavirus-tracker/_notebooks/data/raw/vaccines/los-angeles/"
all_files = glob.glob(os.path.join(path, "*.csv"))

In [5]:
df_from_each_file = (
    pd.read_csv(f, encoding="ISO-8859-1", low_memory=False) for f in all_files
)
concatenated_df = pd.concat(df_from_each_file, ignore_index=True)

In [6]:
df = concatenated_df.copy()

In [7]:
population = df[df["update_date"] == df["update_date"].max()][["area", "pop_2019"]]

In [8]:
population.head()

Unnamed: 0,area,pop_2019
4176,City of Agoura Hills,18019
4177,City of Alhambra,72940
4178,City of Arcadia,51103
4179,City of Artesia,14266
4180,City of Avalon,3077


### Strip out the strings placed in columns by the county

In [9]:
df = df[
    (df.vaccinations.str.strip() != "Unreliable Data")
    & (df.pop_2019.str.strip() != "No Pop Data")
    & (df.vaccinations_pct.str.strip() != "Unreliable Data")
    & (df.vaccinations_pct.str.strip() != "No Pop Data")
    & (df.vaccinations.str.strip() != "<5")
]

### Clean up places names

In [10]:
df.head()

Unnamed: 0,area,vaccinations,pop_2019,vaccinations_pct,update_date
0,City of Agoura Hills,5008,17447,28.7,2021-03-25
1,City of Alhambra,18947,71313,26.6,2021-03-25
2,City of Arcadia,13777,49666,27.7,2021-03-25
3,City of Artesia,3571,13877,25.7,2021-03-25
4,City of Avalon,317,3000,10.6,2021-03-25


In [11]:
df.area = df.area.str.replace("City of", "", regex=False)

In [12]:
df.head()

Unnamed: 0,area,vaccinations,pop_2019,vaccinations_pct,update_date
0,Agoura Hills,5008,17447,28.7,2021-03-25
1,Alhambra,18947,71313,26.6,2021-03-25
2,Arcadia,13777,49666,27.7,2021-03-25
3,Artesia,3571,13877,25.7,2021-03-25
4,Avalon,317,3000,10.6,2021-03-25


### Merge with population figures/rates from vaccine disparity project

In [13]:
disparity = pd.read_csv("../../vaccine-disparity/_workspace/neighborhoods_grouped.csv")

In [14]:
disparity_clean = disparity[
    [
        "name",
        "total_pop",
        "totalpop_17andunder",
        "totalpop_18to64",
        "totalpop_65andup",
        "totalpop_17andunder_percent",
        "totalpop_65andup_percent",
        "totalpop_65andup_percent",
        "majority_white",
        "majority_hispanic",
        "median_householdincome",
    ]
]

In [15]:
df_pop_merge = pd.merge(
    disparity_clean, df, left_on="name", right_on="area", indicator=True
)

In [16]:
df_pop_merge._merge.value_counts()

both          11924
right_only        0
left_only         0
Name: _merge, dtype: int64

In [17]:
df["week"] = pd.to_datetime(df["update_date"]).dt.week

In [18]:
df.week.value_counts()

13    2352
12    2352
11    2352
10    2342
14    2340
9     2338
8     1670
15     332
Name: week, dtype: int64

In [19]:
df.sort_values("update_date", ascending=False).head(10)

Unnamed: 0,area,vaccinations,pop_2019,vaccinations_pct,update_date,week
4259,West Covina,38583,88873,43.4,2021-04-12,15
4406,Unincorporated - Angeles National Forest,296,1096,27.0,2021-04-12,15
4404,Unincorporated - Altadena,17205,36812,46.7,2021-04-12,15
4403,Unincorporated - Agua Dulce,958,3736,25.6,2021-04-12,15
4402,Unincorporated - Acton,1652,6961,23.7,2021-04-12,15
4401,Los Angeles - Woodland Hills,27387,57284,47.8,2021-04-12,15
4400,Los Angeles - Winnetka,15821,41471,38.1,2021-04-12,15
4399,Los Angeles - Wilshire Center,13370,40599,32.9,2021-04-12,15
4398,Los Angeles - Wilmington,11236,42449,26.5,2021-04-12,15
4397,Los Angeles - Wholesale District,12332,32528,37.9,2021-04-12,15


### Clean up data types

In [20]:
df["vaccinations"] = df["vaccinations"].astype(float)

In [21]:
df["pop_2019"] = df["pop_2019"].astype(float)

In [22]:
df["vaccinations_pct"] = ((df["vaccinations"] / df["pop_2019"]) * 100).round(2)

### Which week of the year? 

In [23]:
df["week"] = pd.to_datetime(df["update_date"]).dt.week

In [24]:
df["weekday"] = pd.to_datetime(df["update_date"]).dt.day_name()

### Just get the updates each Monday

In [25]:
df = df[df["weekday"] == "Monday"]

In [26]:
df.sort_values("week", ascending=False).head()

Unnamed: 0,area,vaccinations,pop_2019,vaccinations_pct,update_date,week,weekday
4345,Los Angeles - Palms,14793.0,36835.0,40.16,2021-04-12,15,Monday
4282,Los Angeles - Chatsworth,14390.0,31934.0,45.06,2021-04-12,15,Monday
4284,Los Angeles - Chinatown,2707.0,7441.0,36.38,2021-04-12,15,Monday
4285,Los Angeles - Cloverdale/Cochran,3649.0,11658.0,31.3,2021-04-12,15,Monday
4286,Los Angeles - Country Club Park,5057.0,12607.0,40.11,2021-04-12,15,Monday


### Widen the data for weekly average columns

In [27]:
df_pivot = (
    pd.pivot_table(
        df,
        values="vaccinations_pct",
        index="area",
        columns="week",
        aggfunc="mean",
    )
    .round(2)
    .reset_index()
)

In [28]:
df_pivot.head()

week,area,9,10,11,12,13,14,15
0,Agoura Hills,21.82,21.82,28.7,28.7,38.91,38.91,47.18
1,Alhambra,18.68,18.68,26.57,26.57,37.35,37.35,45.1
2,Arcadia,20.9,20.9,27.74,27.74,37.66,37.66,46.08
3,Artesia,20.83,20.83,25.73,25.73,35.27,35.27,42.27
4,Avalon,10.0,10.0,10.57,10.57,10.27,10.27,10.43


In [29]:
df_pivot.columns = ["pct_week_" + str(col) for col in df_pivot.columns]

In [30]:
df_pivot.rename(
    columns={"pct_week_area": "name", "week_pop_2019": "population"}, inplace=True
)

In [31]:
df_pivot.head()

Unnamed: 0,name,pct_week_9,pct_week_10,pct_week_11,pct_week_12,pct_week_13,pct_week_14,pct_week_15
0,Agoura Hills,21.82,21.82,28.7,28.7,38.91,38.91,47.18
1,Alhambra,18.68,18.68,26.57,26.57,37.35,37.35,45.1
2,Arcadia,20.9,20.9,27.74,27.74,37.66,37.66,46.08
3,Artesia,20.83,20.83,25.73,25.73,35.27,35.27,42.27
4,Avalon,10.0,10.0,10.57,10.57,10.27,10.27,10.43


### Merge with population figures/rates from vaccine disparity project

In [32]:
disparity = pd.read_csv("../../vaccine-disparity/_workspace/neighborhoods_grouped.csv")

In [33]:
disparity.head()

Unnamed: 0.1,Unnamed: 0,id,name,tract,tract_percent,Unnamed: 0_x,total_white,white_17andunder,white_18to64,white_65andup,total_hispanic,hispanic_17andunder,hispanic_18to64,hispanic_65andup,total_black,black_17andunder,black_18to64,black_65andup,total_native_american,native_american_17andunder,native_american_18to64,native_american_65andup,total_other,other_17andunder,other_18to64,...,total_asian_percent,asian_17andunder_percent,asian_18to64_percent,asian_65andup_percent,total_pacificislander_percent,pacificislander_17andunder_percent,pacificislander_18to64_percent,pacificislander_65andup_percent,nonwhite_percentage,totalpop_17andunder_percent,totalpop_18to64_percent,totalpop_65andup_percent,healthcare_workers_percent,majority_nonwhite,majority_black,majority_asian,majority_white,majority_american_indian_and_alaska_native,majority_hispanic,majority_18to64,majority_65andup,Unnamed: 0_y,neighborhood,median_householdincome,majorityhigherthanmedianincomeLA
0,0,1,Agoura Hills,56022.97,3.015807,15128,14294.386712,2565.000554,9240.747705,2488.638453,2060.384429,637.837458,1224.50195,2.1e-05,475.768824,167.211468,235.017585,73.539771,29.80088,16.466653,13.334227,0.0,1360.190418,599.562387,692.68505,...,0.0841,0.076485,0.085944,0.084423,0.0,0.0,0.0,0.0,0.256824,0.215044,0.630619,0.159371,0.058344,True,False,False,True,False,False,True,False,0.0,Agoura Hills,121292.2827,True
1,1,2,Alhambra,161868.09,20.001823,47335,7500.64187,595.828265,4755.725927,2149.087677,30438.112581,7119.58294,19780.332201,1.5e-05,1892.355281,466.04967,1181.319618,244.985992,400.171514,26.149556,248.238252,125.783706,19460.555741,5174.416696,12865.429555,...,0.511584,0.393858,0.517814,0.598009,0.002586,0.005788,0.001722,0.002694,0.911412,0.169381,0.651545,0.179751,0.098254,False,False,True,False,False,False,True,False,1.0,Alhambra,61897.39551,False
2,2,3,Arcadia,126258.98,12.228262,37169,13381.530815,2115.148625,6685.762725,4580.619465,8147.091902,2441.806335,4856.81538,6e-06,990.881657,334.528273,525.148452,131.204932,70.730779,0.017364,24.079301,46.634115,5085.192956,1626.462948,3048.032363,...,0.60552,0.596204,0.642072,0.495743,0.001375,0.000157,0.001546,0.002277,0.783141,0.222551,0.592985,0.185774,0.103864,False,False,True,False,False,False,True,False,2.0,Arcadia,92543.61666,True
3,3,4,Artesia,55463.91,3.000606,18036,2543.198368,249.844904,1424.499924,868.85354,6684.617774,1596.709299,4444.111268,9.7e-05,852.042661,301.822462,497.79144,52.42876,59.913886,0.003356,0.049661,59.860869,4909.63036,1361.455844,3131.351644,...,0.367121,0.321175,0.387681,0.341434,0.002986,2.4e-05,0.001575,0.01278,0.848194,0.201502,0.644574,0.153806,0.181542,False,False,False,False,False,False,True,False,3.0,Artesia,67486.31903,False
4,4,5,Avalon,11981.0,0.830664,3881,1298.646327,95.240319,830.798525,372.607483,1704.840285,714.150321,926.143353,0.000333,3.354092,0.0,3.354092,0.0,0.0,0.0,0.0,0.0,583.530943,294.597351,263.280451,...,0.000558,0.0,2.7e-05,0.003757,0.000324,0.0,0.000558,0.0,0.574229,0.265365,0.580198,0.14441,0.067722,False,False,False,False,False,True,True,False,4.0,Avalon,73854.46872,True


In [34]:
disparity_clean = disparity[
    [
        "name",
        "total_pop",
        #         "totalpop_17andunder_percent",
        #         "totalpop_18to64",
        #         "totalpop_65andup",
        "totalpop_17andunder_percent",
        "totalpop_65andup_percent",
        "nonwhite_percentage",
        "majority_white",
        "majority_hispanic",
        "median_householdincome",
    ]
]

In [35]:
df_merge = pd.merge(df_pivot, disparity_clean, on="name")

In [37]:
df_merge["change_week9_to_15"] = (
    ((df_merge["pct_week_15"] - df_merge["pct_week_9"]) / df_merge["pct_week_9"]) * 100
).round()

In [None]:
df_merge_slim = df_merge[
    [
        "name",
        "pct_week_8",
        "pct_week_15",
        "total_pop",
        "totalpop_17andunder_percent",
        "totalpop_65andup_percent",
        "nonwhite_percentage",
        "majority_white",
        "majority_hispanic",
        "median_householdincome",
        "change_week8_to_15",
    ]
]

In [None]:
df_merge_slim.sort_values("change_week8_to_15", ascending=False).tail(10)

In [None]:
larger_places = df_merge_slim[df_merge_slim["total_pop"] > 5000]

### Which places with populations of more than 5,000 saw the largest pct increase from week 8 to week 15?

In [None]:
larger_places.sort_values("change_week8_to_15", ascending=False).head(10)

### Mean income and older population share in places with largest pct increases

In [None]:
round(
    larger_places.sort_values("change_week8_to_15", ascending=False)
    .head(10)["median_householdincome"]
    .mean()
)

In [None]:
round(
    larger_places.sort_values("change_week8_to_15", ascending=False)
    .head(10)["totalpop_65andup_percent"]
    .mean()
    * 100,
    2,
)

### Which places with populations of more than 5,000 saw the smallest pct increase from week 8 to week 14?

In [None]:
larger_places.sort_values("change_week8_to_15", ascending=False).tail(10)

### Mean income and older population share in places with smalles pct increases

In [None]:
round(
    larger_places.sort_values("change_week8_to_15", ascending=False)
    .tail(10)["median_householdincome"]
    .mean()
)

In [None]:
round(
    larger_places.sort_values("change_week8_to_15", ascending=False)
    .tail(10)["totalpop_65andup_percent"]
    .mean()
    * 100,
    2,
)

### Which places have the highest vaccination coverage right now? 

In [None]:
larger_places[["name", "pct_week_15"]].sort_values("pct_week_15", ascending=False).head(
    10
)

### What's the relationship between these variables and vaccination rate increases?

In [None]:
df_corr = larger_places[
    [
        "name",
        "pct_week_15",
        "change_week8_to_15",
        "nonwhite_percentage",
        "totalpop_65andup_percent",
        "median_householdincome",
    ]
]

In [None]:
df_corr.corr(method="pearson")

---

### Merge with geography

In [None]:
places_geo = gpd.read_file("../../vaccine-disparity/assets/lac-areas.json")

In [None]:
larger_places.head()

---

### Export to csv

In [None]:
import datetime as dt

today = dt.datetime.today().strftime("%m-%d-%Y")

In [None]:
concatenated_df.to_csv(
    "output/vaccine-community/lac_vax_by_place_" + today + ".csv",
    index=False,
)

In [None]:
df_pivot.to_csv(
    "output/vaccine-community/lac_vax_by_place_weeks_" + today + ".csv",
    index=False,
)

In [None]:
larger_places.sort_values("change_week8_to_15", ascending=False).head(10).to_csv(
    "output/vaccine-community/largest_vax_coverage_change" + today + ".csv",
    index=False,
)

In [None]:
larger_places.sort_values("change_week8_to_15", ascending=False).tail(10).to_csv(
    "output/vaccine-community/smallest_vax_coverage_change" + today + ".csv",
    index=False,
)