# Import Packages

In [35]:
import os
import glob
import tqdm
import warnings
import us

import cv2
import matplotlib.pyplot as plt
import numpy as np
import pandas as pd
import plotly.figure_factory as ff
import plotly.express as px
import plotly.graph_objects as go
import statsmodels.api as sm

from statsmodels.tsa.arima.model import ARIMA

warnings.simplefilter("ignore")

# 1. Vaccination by Case Rate 

## Case rate per 100,000 population shows a county-level view of COVID-19 vaccination coverage and the number of COVID-19 cases for every 100,000 people over the last 7 days

In [2]:
data = pd.read_csv('vaccination_by_case_rate.csv')

In [3]:
data.info()

<class 'pandas.core.frame.DataFrame'>
RangeIndex: 3219 entries, 0 to 3218
Data columns (total 6 columns):
 #   Column                                     Non-Null Count  Dtype  
---  ------                                     --------------  -----  
 0   County                                     3219 non-null   object 
 1   State                                      3219 non-null   object 
 2   Cases - last 7 days per 100k               3209 non-null   object 
 3   % total pop fully vaccinated               3193 non-null   float64
 4   Vaccination county reporting completeness  3219 non-null   float64
 5   Data as of date                            3219 non-null   object 
dtypes: float64(2), object(4)
memory usage: 151.0+ KB


In [4]:
data.head()

Unnamed: 0,County,State,Cases - last 7 days per 100k,% total pop fully vaccinated,Vaccination county reporting completeness,Data as of date
0,Aleutians East Borough,AK,suppressed,71.3,96.7,2022-01-08
1,Aleutians West Census Area,AK,1295.70,58.9,96.7,2022-01-08
2,Anchorage Municipality,AK,1661.81,60.3,96.7,2022-01-08
3,Bethel Census Area,AK,565.65,64.4,96.7,2022-01-08
4,Bristol Bay Borough,AK,,95.0,96.7,2022-01-08


### Data Cleaning

In [5]:
# Pick only useful columns.
datacr = data.drop('Data as of date', axis=1)
datacr = datacr.drop('County', axis=1)
datacr = datacr.drop('Vaccination county reporting completeness', axis=1)


In [6]:
# Check for null values
print('Are there any null rows for the essential reference data?')
print(datacr.isnull().sum())

Are there any null rows for the essential reference data?
State                            0
Cases - last 7 days per 100k    10
% total pop fully vaccinated    26
dtype: int64


In [7]:
# Replace Null and suppressed values
datacr = datacr.fillna(0)
datacr = datacr.replace(to_replace="suppressed", value=0)

# Verification
print(datacr.isnull().sum())

State                           0
Cases - last 7 days per 100k    0
% total pop fully vaccinated    0
dtype: int64


In [8]:

# Cast to float64
columns = ["Cases - last 7 days per 100k", "% total pop fully vaccinated"]
datacr[columns] = datacr[columns].astype(np.float64)


In [9]:
states = datacr['State'].unique()
states.sort()
print(f'A total of {len(states)} states:')
print(states)

A total of 52 states:
['AK' 'AL' 'AR' 'AZ' 'CA' 'CO' 'CT' 'DC' 'DE' 'FL' 'GA' 'HI' 'IA' 'ID'
 'IL' 'IN' 'KS' 'KY' 'LA' 'MA' 'MD' 'ME' 'MI' 'MN' 'MO' 'MS' 'MT' 'NC'
 'ND' 'NE' 'NH' 'NJ' 'NM' 'NV' 'NY' 'OH' 'OK' 'OR' 'PA' 'PR' 'RI' 'SC'
 'SD' 'TN' 'TX' 'UT' 'VA' 'VT' 'WA' 'WI' 'WV' 'WY']


In [10]:
# Replace State by FIPS code
#mappings = {v: k for k, v in us.states.mapping('fips', 'abbr').items()}
#print(mappings)
#datacr['State'] = datacr['State'].replace(mappings)
#datacr.head()

In [50]:
# Group by State
grouped_df = datacr.groupby('State')
mean_df = grouped_df.mean()
mean_df.describe()

Unnamed: 0,Cases - last 7 days per 100k,% total pop fully vaccinated
count,52.0,52.0
mean,1030.148605,51.270454
std,500.508271,12.229851
min,257.689643,0.0
25%,685.515192,44.20563
50%,957.851135,51.210955
75%,1266.377713,58.325947
max,2294.215238,73.98


## Plot data by rate cases

In [73]:
fig = px.choropleth(locations=mean_df.index.values, 
                    locationmode="USA-states", 
                    color=mean_df['Cases - last 7 days per 100k'],
                    color_continuous_scale="Jet",
                    range_color=(mean_df.min()[0], mean_df.max()[0]),
                    labels={'color': 'Case Rate', 'locations': 'Code'},
                    title='Case Rate at 100k in last 7 days',
                    scope="usa")
fig.show()

In [77]:
fig = px.choropleth(locations=mean_df.index.values, 
                    locationmode="USA-states", 
                    color=mean_df['% total pop fully vaccinated'],
                    color_continuous_scale="Jet",
                    range_color=(mean_df.min()[1], mean_df.max()[1]),
                    labels={'color': '% vaccinated', 'locations': 'Code'},
                    title='Percent of fully vaccinated population',
                    scope="usa")
fig.show()

# 2. Vaccination by Social Vulnerability Index

## Social Vulnerability Index shows a county-level view of COVID-19 vaccination coverage and social vulnerability measured by CDC Social Vulnerability Index (SVI)


In [None]:
datasvi = pd.read_csv('vaccination_by_svi.csv')


# 3. Vaccination by Test positivity

In [None]:
#Testing percent positivity shows a county-level view of COVID-19 vaccination coverage and the percent of Nucleic Acid Amplification Tests (NAATs) conducted in the last 7 days that were positive for SARs-CoV-2
datatp = pd.read_csv('vaccination_by_test_positivity.csv')


# Data Cleaning