In [296]:
# import package requirements
import pandas as pd
import plotly.express as px

### COVID-19 Vaccination Demographics (USA,National)
<p>https://data.cdc.gov/Vaccinations/COVID-19-Vaccination-Demographics-in-the-United-St/km4m-vcsb<p>

In [297]:
# import dataset via cdc api
dfv = pd.read_csv('https://data.cdc.gov/resource/km4m-vcsb.csv?$limit=50000')

In [298]:
# filter dataset to only show: Ages_25-64_yrs
age_list = ['Ages_25-39_yrs', 'Ages_40-49_yrs', 'Ages_50-64_yrs']
dfv = dfv[dfv['demographic_category'].isin(age_list)].reset_index(drop=True)

In [299]:
# change column to datetime
dfv['date'] = pd.to_datetime(dfv['date'])

In [300]:
# get month_year column
dfv['year'] = pd.DatetimeIndex(dfv['date']).year.tolist() # extract year
dfv['month'] = pd.DatetimeIndex(dfv['date']).month.tolist() # extract month
dfv['week'] = dfv['date'].dt.isocalendar().week # extract week
dfv['dateByMonth'] = dfv['month'].astype(str) + '/' + dfv['year'].astype(str)


#dfv['date_graph'] = dfv['year'].astype(str) + ' week ' + dfv['week'].astype(str)

# create list of columns to keep
#cols_to_keep = ['date', 'demographic_category', 'administered_dose1', 'percent_boosted', 'raw_date']
#dfv = dfv[cols_to_keep]

In [301]:
dfv = dfv.sort_values(by=['year', 'month', 'week', 'demographic_category'])

In [302]:
fig = px.line(dfv, x="dateByMonth", y="administered_dose1", color='demographic_category',
              title='Vaccines Administered by Age Group (USA)', range_x=['2020-12-01','2022-05-01'])
fig.show()

### All-Cause Mortality United States: 25-65 years (USA)
<p>https://www.usmortality.com/#unitedstates<p>

In [303]:
# import dataset via csv file
df1 = pd.read_csv('data/usmortality_com-United States_25_44.csv')
df2 = pd.read_csv('data/usmortality_com-United States_45_64.csv')

df1['demographic_category'] = 'Ages_25-44_yrs'
df2['demographic_category'] = 'Ages_45-64_yrs'

In [304]:
dfm = pd.concat([df1, df2]).reset_index(drop=True)

In [305]:
dfm.head()

Unnamed: 0,date,state,baseline,normalLower,normalUpper,treshold,deaths,zscore,deathsReported,deathsCovid,deathsNonCovid,dateByMonth,demographic_category
0,2020 week 1,United States,2785,2413,3157,3529,3034,1.33871,,0,0,1/2020,Ages_25-44_yrs
1,2020 week 2,United States,2689,2337,3041,3393,2920,1.3125,,0,0,1/2020,Ages_25-44_yrs
2,2020 week 3,United States,2654,2326,2982,3310,2858,1.243902,,0,0,1/2020,Ages_25-44_yrs
3,2020 week 4,United States,2624,2302,2946,3268,2855,1.434783,,0,0,1/2020,Ages_25-44_yrs
4,2020 week 5,United States,2660,2480,2840,3020,2799,1.544444,,0,0,1/2020,Ages_25-44_yrs


In [306]:
# to split into multiple columns by delimiter
dfm['year'] = dfm['date'].str.split(' ', expand=True)[0]
dfm['week'] = dfm['date'].str.split(' ', expand=True)[2]
dfm['month'] = dfm['dateByMonth'].str.split('/', expand=True)[0]

In [307]:
dfm = dfm.sort_values(by=['year', 'month', 'week']).reset_index(drop=True)

In [308]:
dfm

Unnamed: 0,date,state,baseline,normalLower,normalUpper,treshold,deaths,zscore,deathsReported,deathsCovid,deathsNonCovid,dateByMonth,demographic_category,year,week,month
0,2020 week 1,United States,2785,2413,3157,3529,3034,1.338710,,0,0,1/2020,Ages_25-44_yrs,2020,1,1
1,2020 week 1,United States,11392,10408,12376,13360,11223,-0.343496,,0,0,1/2020,Ages_45-64_yrs,2020,1,1
2,2020 week 2,United States,2689,2337,3041,3393,2920,1.312500,,0,0,1/2020,Ages_25-44_yrs,2020,2,1
3,2020 week 2,United States,11298,10110,12486,13674,11128,-0.286195,,0,0,1/2020,Ages_45-64_yrs,2020,2,1
4,2020 week 3,United States,2654,2326,2982,3310,2858,1.243902,,0,0,1/2020,Ages_25-44_yrs,2020,3,1
...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...
229,2022 week 10,United States,10937,10699,11175,11413,11990,8.848739,10328.0,0,0,3/2022,Ages_45-64_yrs,2022,10,3
230,2022 week 11,United States,2695,2533,2857,3019,3814,13.814815,2818.0,0,0,3/2022,Ages_25-44_yrs,2022,11,3
231,2022 week 11,United States,10784,10488,11080,11376,11892,7.486486,9234.0,0,0,3/2022,Ages_45-64_yrs,2022,11,3
232,2022 week 12,United States,2648,2372,2924,3200,3811,8.427536,2305.0,0,0,3/2022,Ages_25-44_yrs,2022,12,3


In [312]:
fig = px.line(dfm, x='dateByMonth', y='deaths', color='demographic_category')

# Use date string to set xaxis range
fig.update_layout(title_text="Mortality Count by Age Group (USA)")
fig.show()