In [1]:
import pandas as pd
import numpy as np
import requests #api
import json #json file
from datetime import datetime

import matplotlib.pyplot as plt
from scipy.stats import linregress #stastistical analysis

from pathlib import Path #reading csv - connecting to the csv merges mac and windows

In [2]:
covid_vaccines_path = Path("COVID-19_Vaccines_Up_to_Date_Status.csv")
covid_vaccines_df = pd.read_csv(covid_vaccines_path)
covid_vaccines_df.head()

Unnamed: 0,Date,Location,Demographic_Category,census,Up_to_date,Up_to_date_pct_agegroup
0,09/13/2023,AK,Female_Ages_12-17_yrs,27938.0,2697.0,9.7
1,09/13/2023,AK,Female_Ages_75+_yrs,16346.0,7344.0,44.9
2,09/13/2023,AK,Male_Ages_12-17_yrs,29377.0,2598.0,8.8
3,09/13/2023,CA,Male_Ages_65+_yrs,2602393.0,1331083.0,51.1
4,09/13/2023,CT,Male_Ages_65+_yrs,276002.0,175897.0,63.7


In [3]:
hospitalizations_path = "https://data.cdc.gov/resource/n8mc-b4w4.json"
hospitalization_data = requests.get(hospitalizations_path).json()
#hospitalization_data

In [4]:
hospitalization_df = pd.DataFrame(hospitalization_data)
hospitalization_df.head()

Unnamed: 0,case_month,res_state,state_fips_code,res_county,county_fips_code,age_group,sex,race,ethnicity,case_positive_specimen,process,exposure_yn,current_status,symptom_status,hosp_yn,icu_yn,death_yn,case_onset_interval,underlying_conditions_yn
0,2022-08,TN,47,COCKE,47029,18 to 49 years,Female,,,0.0,Missing,Missing,Probable Case,Missing,Missing,Missing,Missing,,
1,2022-01,NM,35,LEA,35025,18 to 49 years,Male,,,,Missing,Missing,Laboratory-confirmed case,Missing,Unknown,Missing,Missing,,
2,2020-12,KY,21,MERCER,21167,18 to 49 years,Male,Unknown,Unknown,0.0,Clinical evaluation,Yes,Laboratory-confirmed case,Unknown,Unknown,Missing,No,,
3,2020-09,MN,27,PINE,27115,18 to 49 years,Male,,,0.0,Missing,Yes,Laboratory-confirmed case,Asymptomatic,Missing,Missing,No,,
4,2021-08,TN,47,COFFEE,47031,50 to 64 years,Male,,,0.0,Missing,Missing,Probable Case,Missing,Missing,Missing,Missing,,


In [5]:
hospitalization_df.age_group.value_counts()

18 to 49 years    490
50 to 64 years    193
0 - 17 years      139
65+ years         130
NA                 34
Missing            14
Name: age_group, dtype: int64

In [6]:
covid_vaccines_df.Demographic_Category.value_counts()

Female_Ages_12-17_yrs    236
Ages_50-64_yrs           236
Male_Ages_2-4_yrs        236
Ages_25-39_yrs           236
Male_Ages_25-39_yrs      236
Sex_Unknown              236
Female_Ages_25-49_yrs    236
Female_Ages_18-24_yrs    236
Male_Ages_65-74_yrs      236
Ages_25-49_yrs           236
Female_Ages_25-39_yrs    236
Ages_18-24_yrs           236
Ages_65-74_yrs           236
Female_Ages_40-49_yrs    236
Male_Ages_25-49_yrs      236
Male_Ages_<2yrs          236
Sex_Female               236
Male_Ages_75+_yrs        236
Male_Ages_50-64_yrs      236
Age_Unknown              236
Male_Ages_<5yrs          236
Female_Ages_50-64_yrs    236
Female_Ages_75+_yrs      236
Ages_<2yrs               236
Male_Ages_12-17_yrs      236
Male_Ages_65+_yrs        236
Ages_75+_yrs             236
Female_Ages_2-4_yrs      236
Female_Ages_65-74_yrs    236
Ages_12-17_yrs           236
Female_Ages_<2yrs        236
Sex_Male                 236
Female_Ages_<5yrs        236
Ages_65+_yrs             236
Male_Ages_18-2

In [7]:
covid_vaccines_df.set_index('Location').sort_index().head(20)

Unnamed: 0_level_0,Date,Demographic_Category,census,Up_to_date,Up_to_date_pct_agegroup
Location,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1,Unnamed: 5_level_1
AK,09/13/2023,Female_Ages_12-17_yrs,27938.0,2697.0,9.7
AK,09/13/2023,Female_Ages_25-39_yrs,80186.0,9014.0,11.2
AK,07/12/2023,Male_Ages_2-4_yrs,15880.0,53.0,0.3
AK,07/12/2023,Female_Ages_25-49_yrs,121096.0,15552.0,12.8
AK,07/12/2023,Ages_2-4_yrs,31090.0,94.0,0.3
AK,09/13/2023,Female_Ages_65-74_yrs,29793.0,13587.0,45.6
AK,07/12/2023,Sex_Female,350127.0,60375.0,17.2
AK,07/12/2023,Male_Ages_65-74_yrs,31507.0,13996.0,44.4
AK,07/12/2023,Female_Ages_40-49_yrs,40910.0,6725.0,16.4
AK,07/12/2023,Ages_12-17_yrs,57315.0,4992.0,8.7


In [8]:
no_general_gender = covid_vaccines_df.loc[covid_vaccines_df['Demographic_Category'].str.contains('yrs')]
no_general_gender

Unnamed: 0,Date,Location,Demographic_Category,census,Up_to_date,Up_to_date_pct_agegroup
0,09/13/2023,AK,Female_Ages_12-17_yrs,27938.0,2697.0,9.7
1,09/13/2023,AK,Female_Ages_75+_yrs,16346.0,7344.0,44.9
2,09/13/2023,AK,Male_Ages_12-17_yrs,29377.0,2598.0,8.8
3,09/13/2023,CA,Male_Ages_65+_yrs,2602393.0,1331083.0,51.1
4,09/13/2023,CT,Male_Ages_65+_yrs,276002.0,175897.0,63.7
...,...,...,...,...,...,...
10141,06/14/2023,TX,Male_Ages_65-74_yrs,1046971.0,,
10142,06/14/2023,WI,Ages_25-49_yrs,1797281.0,285589.0,15.9
10143,06/14/2023,VI,Male_Ages_5-11_yrs,5225.0,12.0,0.2
10146,06/14/2023,WI,Male_Ages_<2yrs,65295.0,1663.0,2.5


In [9]:
no_general_gender.loc[(no_general_gender.Location == "AK")& (no_general_gender.Date == '09/13/2023')]

Unnamed: 0,Date,Location,Demographic_Category,census,Up_to_date,Up_to_date_pct_agegroup
0,09/13/2023,AK,Female_Ages_12-17_yrs,27938.0,2697.0,9.7
1,09/13/2023,AK,Female_Ages_75+_yrs,16346.0,7344.0,44.9
2,09/13/2023,AK,Male_Ages_12-17_yrs,29377.0,2598.0,8.8
30,09/13/2023,AK,Ages_<5yrs,51080.0,301.0,0.6
117,09/13/2023,AK,Female_Ages_<2yrs,9717.0,73.0,0.8
160,09/13/2023,AK,Male_Ages_25-49_yrs,134560.0,11458.0,8.5
234,09/13/2023,AK,Ages_50-64_yrs,136166.0,27624.0,20.3
335,09/13/2023,AK,Ages_40-49_yrs,85647.0,12070.0,14.1
416,09/13/2023,AK,Ages_25-49_yrs,255656.0,27595.0,10.8
489,09/13/2023,AK,Ages_2-4_yrs,31090.0,160.0,0.5


In [10]:
minus_male = no_general_gender.loc[~no_general_gender['Demographic_Category'].str.contains('Male')]
minus_male

Unnamed: 0,Date,Location,Demographic_Category,census,Up_to_date,Up_to_date_pct_agegroup
0,09/13/2023,AK,Female_Ages_12-17_yrs,27938.0,2697.0,9.7
1,09/13/2023,AK,Female_Ages_75+_yrs,16346.0,7344.0,44.9
5,09/13/2023,DE,Ages_75+_yrs,76048.0,,
6,09/13/2023,HI,Female_Ages_2-4_yrs,25076.0,519.0,2.1
7,09/13/2023,IA,Female_Ages_65-74_yrs,161188.0,89956.0,55.8
...,...,...,...,...,...,...
10134,06/14/2023,PA,Ages_75+_yrs,1039269.0,516640.0,49.7
10136,06/14/2023,WA,Ages_2-4_yrs,280089.0,11859.0,4.2
10137,06/14/2023,TN,Ages_65+_yrs,1143393.0,,
10142,06/14/2023,WI,Ages_25-49_yrs,1797281.0,285589.0,15.9


In [11]:
minus_female = minus_male.loc[~minus_male['Demographic_Category'].str.contains('Female')]
minus_female

Unnamed: 0,Date,Location,Demographic_Category,census,Up_to_date,Up_to_date_pct_agegroup
5,09/13/2023,DE,Ages_75+_yrs,76048.0,,
8,09/13/2023,ID,Ages_12-17_yrs,156543.0,,
11,09/13/2023,KY,Ages_<2yrs,107266.0,812.0,0.8
14,09/13/2023,MA,Ages_<5yrs,357362.0,15956.0,4.5
20,09/13/2023,ND,Ages_5-11_yrs,70715.0,3728.0,5.3
...,...,...,...,...,...,...
10133,06/14/2023,PR,Ages_<5yrs,117482.0,240.0,0.2
10134,06/14/2023,PA,Ages_75+_yrs,1039269.0,516640.0,49.7
10136,06/14/2023,WA,Ages_2-4_yrs,280089.0,11859.0,4.2
10137,06/14/2023,TN,Ages_65+_yrs,1143393.0,,


In [14]:
minus_female.info()

<class 'pandas.core.frame.DataFrame'>
Int64Index: 3068 entries, 5 to 10142
Data columns (total 6 columns):
 #   Column                   Non-Null Count  Dtype         
---  ------                   --------------  -----         
 0   Date                     3068 non-null   datetime64[ns]
 1   Location                 3068 non-null   object        
 2   Demographic_Category     3068 non-null   object        
 3   census                   3068 non-null   float64       
 4   Up_to_date               2385 non-null   float64       
 5   Up_to_date_pct_agegroup  2385 non-null   float64       
dtypes: datetime64[ns](1), float64(3), object(2)
memory usage: 167.8+ KB


In [13]:
#minus_female['Date'] = pd.to_datetime(minus_female["Date"]).dt.month
#minus_female['Date'] = pd.to_datetime(minus_female["Date"]).dt.year
minus_female.head()

Unnamed: 0,Date,Location,Demographic_Category,census,Up_to_date,Up_to_date_pct_agegroup
5,2023-09-13,DE,Ages_75+_yrs,76048.0,,
8,2023-09-13,ID,Ages_12-17_yrs,156543.0,,
11,2023-09-13,KY,Ages_<2yrs,107266.0,812.0,0.8
14,2023-09-13,MA,Ages_<5yrs,357362.0,15956.0,4.5
20,2023-09-13,ND,Ages_5-11_yrs,70715.0,3728.0,5.3
