In [1]:
# import package requirements
import pandas as pd

### COVID-19 Vaccination Demographics (USA,National)
<p>https://data.cdc.gov/Vaccinations/COVID-19-Vaccination-Demographics-in-the-United-St/km4m-vcsb<p>

In [154]:
# import dataset via cdc api
dfv = pd.read_csv('https://data.cdc.gov/resource/km4m-vcsb.csv?$limit=50000')

In [155]:
# filter dataset to only show: Ages_25-64_yrs
age_list = ['Ages_25-39_yrs', 'Ages_40-49_yrs', 'Ages_50-64_yrs']
dfv = dfv[dfv['demographic_category'].isin(age_list)].reset_index(drop=True)

In [156]:
# change column to datetime
dfv['date'] = pd.to_datetime(dfv['date'])

# maintain original date column
dfv['raw_date'] = dfv['date']

In [157]:
# get month_year column
dfv['year'] = pd.DatetimeIndex(dfv['date']).year.tolist() # extract year
dfv['month'] = pd.DatetimeIndex(dfv['date']).month.tolist() # extract month
dfv['week'] = pd.DatetimeIndex(dfv['date']).isocalendar().week.reset_index()['week'].tolist() # extract month

dfv['dateByMonth'] =  dfv['month'].astype(str) + '/' +  dfv['year'].astype(str)

dfv['date'] = dfv['year'].astype(str) + ' week ' + dfv['week'].astype(str)

In [158]:
# percent of administered who are boosted
dfv['percent_boosted'] = (dfv['booster_doses_yes']/dfv['administered_dose1'])

# change decimal place
dfv['percent_boosted'] = round(dfv['percent_boosted'], 3)

# create list of columns to keep
cols_to_keep = ['date', 'administered_dose1', 'booster_doses_yes', 'percent_boosted', 'raw_date']
dfv = dfv[cols_to_keep]

In [159]:
dfv.shape

(1455, 5)

In [160]:
# preview dataset
dfv.head()

Unnamed: 0,date,administered_dose1,booster_doses_yes,percent_boosted,raw_date
0,2022 week 15,35194201,13084173,0.372,2022-04-11
1,2022 week 15,58979500,26946110,0.457,2022-04-11
2,2022 week 15,54459825,16060122,0.295,2022-04-11
3,2022 week 14,58979500,26946110,0.457,2022-04-10
4,2022 week 14,54459825,16060122,0.295,2022-04-10


In [170]:
dfv.groupby('date')['administered_dose1'].sum().reset_index()

Unnamed: 0,date,administered_dose1
0,2020 week 50,15830
1,2020 week 51,3493372
2,2020 week 52,15122214
3,2020 week 53,15194656
4,2021 week 1,45846996
...,...,...
68,2022 week 52,285934017
69,2022 week 6,1028913969
70,2022 week 7,1031217220
71,2022 week 8,1033143536


In [166]:
import plotly.express as px

fig = px.line(dfv, x="date", y="administered_dose1")
fig.update_layout(title_text="USA Administered Dose")
fig.show()

### All-Cause Mortality United States: 25-65 years (USA)
<p>https://www.usmortality.com/#unitedstates<p>

In [141]:
# import dataset via csv file
df1 = pd.read_csv('data/usmortality_com-United States_25_44.csv')
df2 = pd.read_csv('data/usmortality_com-United States_45_64.csv')

In [148]:
dfm = pd.concat([df1, df2]).reset_index(drop=True)

In [150]:
dfm.shape

(234, 12)

In [149]:
dfm.head()

Unnamed: 0,date,state,baseline,normalLower,normalUpper,treshold,deaths,zscore,deathsReported,deathsCovid,deathsNonCovid,dateByMonth
0,2020 week 1,United States,2785,2413,3157,3529,3034,1.33871,,0,0,1/2020
1,2020 week 2,United States,2689,2337,3041,3393,2920,1.3125,,0,0,1/2020
2,2020 week 3,United States,2654,2326,2982,3310,2858,1.243902,,0,0,1/2020
3,2020 week 4,United States,2624,2302,2946,3268,2855,1.434783,,0,0,1/2020
4,2020 week 5,United States,2660,2480,2840,3020,2799,1.544444,,0,0,1/2020


In [152]:
import plotly.express as px

fig = px.line(dfm, x="date", y="deaths")
fig.update_layout(title_text="USA Mortality Ages 25-64")
fig.show()

### Combine Datasets

In [87]:
df = dfm.merge(dfv, on='date', how='inner')

In [88]:
df.head()

Unnamed: 0,date,state,baseline,normalLower,normalUpper,treshold,deaths,zscore,deathsReported,deathsCovid,deathsNonCovid,dateByMonth,administered_dose1,booster_doses_yes,percent_boosted
0,2020 week 50,United States,2703,2461,2945,3187,3644,7.77686,,0,0,12/2020,4592,0,0.0
1,2020 week 51,United States,2756,2466,3046,3336,3695,6.475862,,0,0,12/2020,427771,0,0.0
2,2020 week 51,United States,2756,2466,3046,3336,3695,6.475862,,0,0,12/2020,388437,0,0.0
3,2020 week 51,United States,2756,2466,3046,3336,3695,6.475862,,0,0,12/2020,322463,0,0.0
4,2020 week 51,United States,2756,2466,3046,3336,3695,6.475862,,0,0,12/2020,175820,0,0.0


In [91]:
fig = px.line(df, x="date", y="deaths")
fig.update_layout(title_text="USA Mortality Ages 25-44")
fig.show()

In [93]:
fig = px.line(df, x="date", y="administered_dose1")
fig.update_layout(title_text="USA Administered Doses 25-39")
fig.show()