In [1]:
import pandas as pd
import os
import requests

In [2]:
df = pd.read_csv("Resources/COVID19VaccineRecods.csv", low_memory=False, encoding='utf-8')

In [3]:
df.head()

Unnamed: 0,_id,as_of_date,zip_code_tabulation_area,local_health_jurisdiction,county,vaccine_equity_metric_quartile,vem_source,age12_plus_population,age5_plus_population,tot_population,persons_fully_vaccinated,persons_partially_vaccinated,percent_of_population_fully_vaccinated,percent_of_population_partially_vaccinated,percent_of_population_with_1_plus_dose,up_to_date_count,redacted
0,1,2021-01-12,93618,Tulare,Tulare,1.0,Healthy Places Index Score,24482.3,28588,31470.0,51.0,252.0,0.001621,0.008008,0.009629,0,Information redacted in accordance with CA sta...
1,2,2021-01-12,95437,Mendocino,Mendocino,2.0,Healthy Places Index Score,12595.5,13932,14859.0,66.0,317.0,0.004442,0.021334,0.025776,0,Information redacted in accordance with CA sta...
2,3,2021-01-12,95991,Sutter,Sutter,1.0,Healthy Places Index Score,33300.8,37870,40861.0,114.0,835.0,0.00279,0.020435,0.023225,0,Information redacted in accordance with CA sta...
3,4,2021-01-12,93444,San Luis Obispo,San Luis Obispo,3.0,Healthy Places Index Score,18951.8,20522,21331.0,155.0,441.0,0.007266,0.020674,0.02794,0,Information redacted in accordance with CA sta...
4,5,2021-01-12,95039,Monterey,Monterey,1.0,CDPH-Derived ZCTA Score,860.0,1032,1074.0,,,,,,0,Information redacted in accordance with CA sta...


In [4]:
# Define a dictionary to map the old column names to new names
renamed_columns = {
    '_id': 'ID',
    'as_of_date': 'Date',
    'zip_code_tabulation_area': 'Zip Code',
    'local_health_jurisdiction': 'Health Jurisdiction',
    'county': 'County Name',
    'vaccine_equity_metric_quartile': 'Vaccine Quartile',
    'vem_source': 'Vaccine Source',
    'age12_plus_population': 'Age 12+ Population',
    'age5_plus_population': 'Age 5+ Population',
    'tot_population': 'Total Population',
    'persons_fully_vaccinated': 'Fully Vaccinated',
    'persons_partially_vaccinated': 'Partially Vaccinated',
    'percent_of_population_fully_vaccinated': 'Percent Fully Vaccinated',
    'percent_of_population_partially_vaccinated': 'Percent Partially Vaccinated',
    'percent_of_population_with_1_plus_dose': 'Percent With 1+ Dose',
    'up_to_date_count': 'Up To Date Count',
    'redacted': 'Redacted Info'
}

# Use the rename method to apply the column name mapping
df.rename(columns=renamed_columns, inplace=True)

# Display the DataFrame with renamed columns
df.head()


Unnamed: 0,ID,Date,Zip Code,Health Jurisdiction,County Name,Vaccine Quartile,Vaccine Source,Age 12+ Population,Age 5+ Population,Total Population,Fully Vaccinated,Partially Vaccinated,Percent Fully Vaccinated,Percent Partially Vaccinated,Percent With 1+ Dose,Up To Date Count,Redacted Info
0,1,2021-01-12,93618,Tulare,Tulare,1.0,Healthy Places Index Score,24482.3,28588,31470.0,51.0,252.0,0.001621,0.008008,0.009629,0,Information redacted in accordance with CA sta...
1,2,2021-01-12,95437,Mendocino,Mendocino,2.0,Healthy Places Index Score,12595.5,13932,14859.0,66.0,317.0,0.004442,0.021334,0.025776,0,Information redacted in accordance with CA sta...
2,3,2021-01-12,95991,Sutter,Sutter,1.0,Healthy Places Index Score,33300.8,37870,40861.0,114.0,835.0,0.00279,0.020435,0.023225,0,Information redacted in accordance with CA sta...
3,4,2021-01-12,93444,San Luis Obispo,San Luis Obispo,3.0,Healthy Places Index Score,18951.8,20522,21331.0,155.0,441.0,0.007266,0.020674,0.02794,0,Information redacted in accordance with CA sta...
4,5,2021-01-12,95039,Monterey,Monterey,1.0,CDPH-Derived ZCTA Score,860.0,1032,1074.0,,,,,,0,Information redacted in accordance with CA sta...


In [5]:
# Delete the last column
df = df.drop(df.columns[-1], axis=1)

In [6]:
df.head()

Unnamed: 0,ID,Date,Zip Code,Health Jurisdiction,County Name,Vaccine Quartile,Vaccine Source,Age 12+ Population,Age 5+ Population,Total Population,Fully Vaccinated,Partially Vaccinated,Percent Fully Vaccinated,Percent Partially Vaccinated,Percent With 1+ Dose,Up To Date Count
0,1,2021-01-12,93618,Tulare,Tulare,1.0,Healthy Places Index Score,24482.3,28588,31470.0,51.0,252.0,0.001621,0.008008,0.009629,0
1,2,2021-01-12,95437,Mendocino,Mendocino,2.0,Healthy Places Index Score,12595.5,13932,14859.0,66.0,317.0,0.004442,0.021334,0.025776,0
2,3,2021-01-12,95991,Sutter,Sutter,1.0,Healthy Places Index Score,33300.8,37870,40861.0,114.0,835.0,0.00279,0.020435,0.023225,0
3,4,2021-01-12,93444,San Luis Obispo,San Luis Obispo,3.0,Healthy Places Index Score,18951.8,20522,21331.0,155.0,441.0,0.007266,0.020674,0.02794,0
4,5,2021-01-12,95039,Monterey,Monterey,1.0,CDPH-Derived ZCTA Score,860.0,1032,1074.0,,,,,,0


In [7]:
#HPI File
url = "https://api.healthyplacesindex.org/api/hpi?geography=zips&year=2022&indicator=hpi2score&format=json&key=721a0a48-97f3-4a3f-a794-2bc855972617"
data = requests.get(url).json()
hpi_df = pd.DataFrame(data)
hpi_df

Unnamed: 0,geoid,name,population,value,percentile,numerator,denominator
0,94601,94601,53039,-0.380256,0.246002,,
1,94501,94501,63821,0.584656,0.860625,,
2,94560,94560,47171,0.389073,0.740289,,
3,94587,94587,74722,0.448256,0.783701,,
4,94580,94580,30488,0.246554,0.654227,,
...,...,...,...,...,...,...,...
1308,93274,93274,74000,-0.566216,0.150038,,
1309,95370,95370,28096,0.040532,0.525514,,
1310,95372,95372,2056,-0.000992,0.501904,,
1311,95627,95627,3802,-0.119862,0.418126,,


In [8]:
column_name_mapping = {
    'geoid': 'Zip Codes',
    'name': 'Location Name',
    'population': 'Population',
    'value': 'HPI Value',
    'percentile': 'HPI Percentile',
    'numerator': 'Numerator',
    'denominator': 'Denominator'
}

# Rename the columns using the dictionary
hpi_df.rename(columns=column_name_mapping, inplace=True)

In [9]:
hpi_df.head()

Unnamed: 0,Zip Codes,Location Name,Population,HPI Value,HPI Percentile,Numerator,Denominator
0,94601,94601,53039,-0.380256,0.246002,,
1,94501,94501,63821,0.584656,0.860625,,
2,94560,94560,47171,0.389073,0.740289,,
3,94587,94587,74722,0.448256,0.783701,,
4,94580,94580,30488,0.246554,0.654227,,
