In [1]:
import os
import pandas as pd 
import numpy as np   
import matplotlib as plt
import seaborn as sns
import warnings
warnings.filterwarnings("ignore")


vax_df = pd.read_json('https://data.cdc.gov/resource/d6p8-wqjm.json') 
#Link to this Data Set:  
#https://data.cdc.gov/Public-Health-Surveillance/Rates-of-COVID-19-Cases-or-Deaths-by-Age-Group-and/d6p8-wqjm   

dropbox_folder_path = ('/Users/BaileyGimpel/Dropbox/Visualization Work/Covid-19 Vaccine Efficacy Dashboard/')

In [2]:
# Dropping Columns I do not plan to use
vax_df = vax_df.drop(columns=['crude_booster_ir', 'crude_primary_series_only_ir','crude_unvax_ir', 
                              'crude_booster_irr', 'crude_irr', 'continuity_correction','age_adj_booster_ir', 
                              'age_adj_vax_ir', 'age_adj_unvax_ir','age_adj_booster_irr', 'age_adj_irr','month',
                             'vaccine_product'],axis=1)   


# Turing the Format Month/Number of Week in Year to a Single Date within that Week
vax_df['mmwr_week'] = vax_df['mmwr_week']*10 + 0  
vax_df['date'] = pd.to_datetime(vax_df['mmwr_week'], format='%Y%W%w') 
vax_df = vax_df.drop(columns=['mmwr_week'],axis=1)
vax_df = vax_df[(vax_df['age_group']!= '5-11')&(vax_df['age_group']!='12-17')]

print(vax_df.shape)
vax_df.head()     

(721, 9)


Unnamed: 0,outcome,age_group,boosted_with_outcome,boosted_population,primary_series_only_with_outcome,primary_series_only_population,unvaccinated_with_outcome,unvaccinated_population,date
2,death,18-49,11,25207882,12,37239463,39,23658679.75,2022-07-31
3,case,18-49,119,307241,52609,52473497,170089,34939594.0,2021-09-26
4,case,50-64,147,376834,28091,30068369,46391,9714056.0,2021-09-26
5,case,65+,280,878779,27243,29106029,24190,4820712.8,2021-09-26
6,case,all_ages,546,1562854,107943,111647895,240670,49474362.8,2021-09-26


## Creating DFs for Change OverTime (Line Graphs) 

In [3]:
temp_df = vax_df.copy() 
case_df = temp_df[temp_df['outcome']=='case'] 
death_df = temp_df[temp_df['outcome']=='death'] 

caseGrouped_df = case_df.set_index('date').groupby([pd.Grouper(freq='W'),'age_group']).sum() 
deathGrouped_df = death_df.set_index('date').groupby([pd.Grouper(freq='W'),'age_group']).sum() 

In [4]:
caseName_df = pd.DataFrame(caseGrouped_df).rename(columns = {'boosted_with_outcome':'Boosted Positive Case',
                                 'primary_series_only_with_outcome':'Primary Series Positive Case',
                                'unvaccinated_with_outcome':'Unvaccinated Positive Case'},inplace=True)

In [5]:
deathName_df = pd.DataFrame(deathGrouped_df).rename(columns = {'boosted_with_outcome':'Boosted Covid Mortalities',
                                 'primary_series_only_with_outcome':'Primary Series Covid Mortalities',
                                'unvaccinated_with_outcome':'Unvaccinated Covid Mortalities'},inplace=True)

In [6]:
# Reset index to create separate columns for `date`, `age_group`, `outcome`, and `vaccine_product`
caseName_df = caseGrouped_df.reset_index()

# Melt the dataframe to transform it into a long format
melted_case_data = pd.melt(caseName_df, id_vars=['date', 'age_group'], 
                           value_vars=['Boosted Positive Case','Primary Series Positive Case',
                                       'Unvaccinated Positive Case'],
                           var_name='Vaccination Status')
melted_case_data.rename(columns = {'value':'Weekly Covid Positive Case Totals'},inplace=True)  
melted_case_data.head()   

Unnamed: 0,date,age_group,Vaccination Status,Weekly Covid Positive Case Totals
0,2021-09-26,18-49,Boosted Positive Case,119
1,2021-09-26,50-64,Boosted Positive Case,147
2,2021-09-26,65+,Boosted Positive Case,280
3,2021-09-26,all_ages,Boosted Positive Case,1089
4,2021-10-03,18-49,Boosted Positive Case,153


In [7]:
filename_1 = 'Time-Series Case Data.csv'
csv_file_path_1 = os.path.join(dropbox_folder_path, filename_1)
melted_case_data.to_csv(csv_file_path_1, index=False)

In [8]:
# Reset index to create separate columns for `date`, `age_group`, `outcome`, and `vaccine_product`
deathName_df = deathGrouped_df.reset_index()

# Melt the dataframe to transform it into a long format
melted_death_data = pd.melt(deathName_df, id_vars=['date', 'age_group'], 
                           value_vars=['Boosted Covid Mortalities','Primary Series Covid Mortalities',
                                       'Unvaccinated Covid Mortalities'],
                           var_name='Vaccination Status')
melted_death_data.rename(columns = {'value':'Weekly Covid Mortality Totals'},inplace=True) 
melted_death_data = melted_death_data.iloc[:-3]
melted_death_data.head()  

Unnamed: 0,date,age_group,Vaccination Status,Weekly Covid Mortality Totals
0,2021-09-26,18-49,Boosted Covid Mortalities,0
1,2021-09-26,50-64,Boosted Covid Mortalities,2
2,2021-09-26,65+,Boosted Covid Mortalities,18
3,2021-09-26,all_ages,Boosted Covid Mortalities,39
4,2021-10-03,18-49,Boosted Covid Mortalities,2


In [9]:
filename_2 = 'Time-Series Death Data.csv'
csv_file_path_2 = os.path.join(dropbox_folder_path, filename_2)
melted_death_data.to_csv(csv_file_path_2, index=False)

### Population Time Series DF for Histogram

In [10]:
temp_df5 = vax_df.copy()
pop_data = temp_df5.set_index('date').groupby([pd.Grouper(freq='W'),'age_group']).sum()   
pop_data = pop_data.reset_index()    
pop_data = pop_data.drop(columns = ['boosted_with_outcome','primary_series_only_with_outcome','unvaccinated_with_outcome'],
                        axis = 1)
melted_pop_data = pd.melt(pop_data,id_vars=['date','age_group'], 
                        value_vars = ['boosted_population','primary_series_only_population','unvaccinated_population'], 
                          var_name = 'Vaccination Status') 

melted_pop_data.rename(columns={'value':'Percentage of Total Population by Vaccination Status'},inplace=True) 
popData_ = melted_pop_data[melted_pop_data['age_group'] == '18-49'] 
popData_.head()   



popData_.head()

Unnamed: 0,date,age_group,Vaccination Status,Percentage of Total Population by Vaccination Status
0,2021-09-26,18-49,boosted_population,604928.0
4,2021-10-03,18-49,boosted_population,735997.0
8,2021-10-10,18-49,boosted_population,870428.0
12,2021-10-17,18-49,boosted_population,1381241.0
16,2021-10-24,18-49,boosted_population,2099061.0


In [11]:
filename_5 = 'Time-Series Population Data.csv'
csv_file_path_5 = os.path.join(dropbox_folder_path, filename_5)
popData_.to_csv(csv_file_path_5, index=False)