In [1]:
import pandas as pd
import plotly.express as px

In [2]:
rep_counties = ['Amador',
 'Calaveras',
 'Colusa',
 'Del Norte',
 'El Dorado',
 'Glenn',
 'Kern',
 'Kings',
 'Lassen',
 'Madera',
 'Mariposa',
 'Modoc',
 'Placer',
 'Plumas',
 'Shasta',
 'Sierra',
 'Siskiyou',
 'Sutter',
 'Tehama',
 'Trinity',
 'Tulare',
 'Tuolumne',
 'Yuba'] #from earlier analysis

In [3]:
dem_counties = ['Alameda',
 'Alpine',
 'Butte',
 'Contra Costa',
 'Fresno',
 'Humboldt',
 'Imperial',
 'Inyo',
 'Lake',
 'Los Angeles',
 'Marin',
 'Mendocino',
 'Merced',
 'Mono',
 'Monterey',
 'Napa',
 'Nevada',
 'Orange',
 'Riverside',
 'Sacramento',
 'San Benito',
 'San Bernardino',
 'San Diego',
 'San Francisco',
 'San Joaquin',
 'San Luis Obispo',
 'San Mateo',
 'Santa Barbara',
 'Santa Clara',
 'Santa Cruz',
 'Solano',
 'Sonoma',
 'Stanislaus',
 'Ventura',
 'Yolo'] #from earlier analysis

In [4]:
def political_majority(area):
    if area in dem_counties:
        return "Democrat"
    elif area in rep_counties:
        return "Republican"

In [5]:
#fig.write_html("vaccine_rates_by_political.html")

# Cases By County

In [6]:
CA_counties = dem_counties + rep_counties
county = pd.DataFrame(CA_counties, columns = ["area"])

In [7]:
cases = pd.read_csv("covid_cases_county.csv")[["date","area","population","cumulative_cases","cumulative_deaths"]]
cases["date"] = pd.to_datetime(cases["date"]).dropna()
latest_cases = cases[cases["date"] == "2022-02-22 00:00:00"]

CA_cases_only = pd.merge(latest_cases,county, on="area")
CA_cases_only.rename(columns = {"area":"county"}, inplace =True) 
#rename area to county so that we can merge to vaccine data later

In [8]:
vax_counties = pd.read_csv("vax_counties.csv")[["county","administered_date","cumulative_fully_vaccinated"]]
#choose only relevant cols

In [9]:
vax_counties["administered_date"] = pd.to_datetime(vax_counties["administered_date"]) #convert to datetime format

In [10]:
latest_vax_counties = vax_counties[vax_counties["administered_date"] == "2022-02-22"].dropna()
#set as same date
latest_vax_counties = latest_vax_counties[["county","cumulative_fully_vaccinated"]] 
#drop date column as it is no longer relevant

CA_vax_cases= pd.merge(CA_cases_only,latest_vax_counties, on="county") #merge cases with vaccine data


CA_vax_cases["case_prop"] = CA_vax_cases["cumulative_cases"]/CA_vax_cases["population"]
CA_vax_cases["death_prop"] = CA_vax_cases["cumulative_deaths"]/CA_vax_cases["population"]
CA_vax_cases["vax_prop"] = CA_vax_cases["cumulative_fully_vaccinated"]/CA_vax_cases["population"]

#find the proportions of cases, deaths, and fully vaccinated to populations

CA_vax_cases["majority"] = CA_vax_cases["county"].apply(political_majority) #assign political majorities

CA_vax_cases

Unnamed: 0,date,county,population,cumulative_cases,cumulative_deaths,cumulative_fully_vaccinated,case_prop,death_prop,vax_prop,majority
0,2022-02-22,Alameda,1685886.0,250792.0,1751.0,1344161,0.14876,0.001039,0.797302,Democrat
1,2022-02-22,Alpine,1117.0,125.0,0.0,717,0.111907,0.0,0.641898,Democrat
2,2022-02-22,Amador,38531.0,8311.0,83.0,20602,0.215696,0.002154,0.534686,Republican
3,2022-02-22,Butte,217769.0,32523.0,371.0,114774,0.149346,0.001704,0.527045,Democrat
4,2022-02-22,Calaveras,44289.0,7198.0,115.0,24108,0.162523,0.002597,0.544334,Republican
5,2022-02-22,Colusa,22593.0,3421.0,21.0,12746,0.151419,0.000929,0.564157,Republican
6,2022-02-22,Contra Costa,1160099.0,185073.0,1231.0,936053,0.159532,0.001061,0.806873,Democrat
7,2022-02-22,Del Norte,27558.0,5671.0,41.0,12973,0.205784,0.001488,0.470753,Republican
8,2022-02-22,El Dorado,193098.0,26377.0,193.0,116107,0.136599,0.000999,0.601285,Republican
9,2022-02-22,Fresno,1032227.0,226048.0,2635.0,597328,0.218991,0.002553,0.578679,Democrat


In [22]:
fig = px.box(CA_vax_cases, y= "vax_prop", 
                color = "majority",
                points = "all",
                hover_data = ["county"],
                title = "Proportion Fully Vaccinated by Political Majority of Counties in CA",
                labels = {"vax_prop" : "Proportion Fully Vaccinated",
                          "majority" : "Majority"})

fig.update_traces(boxpoints='all', jitter=0.2)

fig.update_traces(hovertemplate='Percent Fully Vaccinated: %{y} <br>County Name: %{customdata}')

fig.show()

In [12]:
vax_death_fig = px.scatter(CA_vax_cases, x = "vax_prop", y = "death_prop",
                           color = "majority",
                           title = "Scatter Plot of Proportions of Vaccines vs Proportions of Deaths by County",
                           hover_data = ["county"],
                           labels = {"vax_prop" : "Proportion Fully Vaccinated",
                                     "death_prop": "Proportion of Deaths",
                              "majority" : "Political Majority of Area"})

vax_death_fig.update_traces(hovertemplate='Percent Fully Vaccinated: %{x} <br> Proportion of Deaths %{y} <br> County: %{customdata}')

vax_death_fig.show()

In [13]:
#vax_death_fig.write_html("vaccine_vs_death_scatter.html")

# Miscellaneous Data - Zip Code Level

We decided to not use this data because the variance in data at the zip code level made it hard to decipher, and we could not match the number of deaths by zip code to our other data. We also did not find much information about how vaccine equity metrics were measured, even if it was included in our dataset.

In [14]:
big_table = pd.read_csv("vax_by_zip.csv")

In [15]:
type(big_table.loc[0,"as_of_date"]) # check type in

str

In [16]:
big_table["as_of_date"] = pd.to_datetime(big_table["as_of_date"]) #convert to datetime format

In [17]:
big_table["as_of_date"].max() #find the latest date as of when we acquired the table

Timestamp('2022-02-22 00:00:00')

In [18]:
latest = big_table[big_table["as_of_date"] == "2022-02-22 00:00:00"].dropna() #filter out the latest date
latest.head()
#drop na

Unnamed: 0,as_of_date,zip_code_tabulation_area,local_health_jurisdiction,county,vaccine_equity_metric_quartile,vem_source,age12_plus_population,age5_plus_population,persons_fully_vaccinated,persons_partially_vaccinated,percent_of_population_fully_vaccinated,percent_of_population_partially_vaccinated,percent_of_population_with_1_plus_dose,booster_recip_count,redacted
104076,2022-02-22,94582,Contra Costa,Contra Costa,4.0,Healthy Places Index Score,34809.5,40433,42744.0,2755.0,1.0,0.068137,1.0,27798.0,No
104077,2022-02-22,92108,San Diego,San Diego,3.0,Healthy Places Index Score,20384.0,21182,18074.0,7012.0,0.853272,0.331036,1.0,7087.0,No
104079,2022-02-22,93461,San Luis Obispo,San Luis Obispo,3.0,Healthy Places Index Score,1202.9,1289,731.0,123.0,0.567106,0.095423,0.662529,251.0,No
104080,2022-02-22,96121,Lassen,Lassen,2.0,CDPH-Derived ZCTA Score,309.0,319,135.0,15.0,0.423197,0.047022,0.470219,62.0,No
104081,2022-02-22,92277,San Bernardino,San Bernardino,1.0,Healthy Places Index Score,17633.7,20363,7165.0,1149.0,0.351864,0.056426,0.40829,2621.0,No


In [19]:
fig = px.box(latest, y= "percent_of_population_fully_vaccinated", 
                color = "vaccine_equity_metric_quartile",
                points = "all",
                hover_data = ["zip_code_tabulation_area"],
                title = "Proportion Fully Vaccinated by Vaccine Equity Metric Quartiles",
                labels = {"percent_of_population_fully_vaccinated" : "Proportion Fully Vaccinated",
                          "vaccine_equity_metric_quartile" : "Vaccine Equity Metric Quartiles"})

fig.update_traces(boxpoints='all', jitter=0.2)

fig.update_traces(hovertemplate='Percent Fully Vaccinated: %{y} <br>Zipcode: %{customdata}')

fig.show()