In [1]:
import numpy as np
import pandas as pd
import csv
import requests
import io

In [4]:
#Loading the initial state vaccination csv.
data = pd.read_csv('us_state_vaccinations.csv')
data['date'] = pd.to_datetime(data['date'], format='%Y-%m-%d')
data['state'] = data['location']
data['vaxxed_per_hundred'] = data['people_fully_vaccinated_per_hundred']
safe = data[data['people_fully_vaccinated_per_hundred'] > 45]

#Extrapolating the needed data into a new dataframe.
cut = data.drop(columns = ['total_distributed', 'people_fully_vaccinated_per_hundred','location','people_vaccinated', 
                           'total_vaccinations_per_hundred', 'people_vaccinated_per_hundred', 'distributed_per_hundred'
                           , 'daily_vaccinations_raw', 'daily_vaccinations_per_million', 'share_doses_used'])

new = cut.loc[(cut['date'] == '2021-07-06')]
new.head()

Unnamed: 0,date,total_vaccinations,people_fully_vaccinated,daily_vaccinations,state,vaxxed_per_hundred
175,2021-07-06,3392366.0,1617584.0,6066.0,Alabama,32.99
351,2021-07-06,678029.0,322988.0,3219.0,Alaska,44.15
527,2021-07-06,47310.0,21583.0,172.0,American Samoa,38.76
703,2021-07-06,6826215.0,3172380.0,48692.0,Arizona,43.58
879,2021-07-06,2261649.0,1043217.0,5162.0,Arkansas,34.57


In [5]:
url = "https://raw.githubusercontent.com/nytimes/covid-19-data/master/rolling-averages/us-states.csv" 
download = requests.get(url).content

#Creating a new dataframe for rolling average values

rolling_avg = pd.read_csv(io.StringIO(download.decode('utf-8')))

rolling_avg['date'] = pd.to_datetime(rolling_avg['date'], format='%Y-%m-%d')

#Extrapolating needed data into a new rolling average dataframe.
rolling_avg = rolling_avg.drop(columns = ['geoid', 'cases', 'deaths', 'deaths_avg_per_100k'])

raNew = rolling_avg.loc[(rolling_avg['date'] =='2021-07-06')]
raNew.head()

Unnamed: 0,date,state,cases_avg,cases_avg_per_100k,deaths_avg
26964,2021-07-06,Northern Mariana Islands,0.0,0.0,0.0
26965,2021-07-06,Guam,6.57,3.9,0.0
26966,2021-07-06,Puerto Rico,55.0,1.62,0.71
26967,2021-07-06,Virgin Islands,5.29,4.98,0.0
26968,2021-07-06,Wyoming,63.86,11.03,0.57


In [13]:
url_2 = "https://raw.githubusercontent.com/CSSEGISandData/COVID-19/master/csse_covid_19_data/csse_covid_19_daily_reports_us/07-05-2021.csv"
dl = requests.get(url_2).content

#Loading the initial dataframe for the master covid data
jH = pd.read_csv(io.StringIO(dl.decode('utf-8')))

# jH['date'] = pd.to_datetime(jH['Last_Update'], format='%Y-%m-%d').dt.date
jH['state'] = jH['Province_State']
#display(jH)
#Extrapolating the needed data into a new dataframe
jHnew = jH.drop(columns = ['Last_Update','Province_State','Country_Region','Lat', 'Long_', 'FIPS', 'Total_Test_Results', 'People_Hospitalized', 'UID', 'ISO3', 
                           'Testing_Rate', 'Hospitalization_Rate'])

jHnew.head()

Unnamed: 0,Confirmed,Deaths,Recovered,Active,Incident_Rate,Case_Fatality_Ratio,state
0,551298,11358,,,11243.671206,2.060229,Alabama
1,71384,377,,,9757.977978,0.52813,Alaska
2,0,0,,,0.0,,American Samoa
3,897010,17979,,,12323.737824,2.004325,Arizona
4,351825,5920,,,11658.311806,1.682655,Arkansas


In [14]:
#Merging 2 Dataframes together
dos = pd.merge(new, raNew, on=['state','date'])
dos
dos['date'] = pd.to_datetime(dos['date'], format='%Y-%m-%d')
#Merging the Created dataframe and the master data together
all3 = pd.merge(dos, jHnew, on='state')

all3 = all3[['date', 'state', 'vaxxed_per_hundred', 'people_fully_vaccinated', 'Confirmed','cases_avg', 'deaths_avg','Deaths', 'Recovered', 
            'Active', 'Incident_Rate', 'Case_Fatality_Ratio', 'daily_vaccinations','total_vaccinations']]

#doing the math for recovered
all3['Recovered'] = all3['Confirmed'] - all3['Deaths']

#Displaying the new merged dataframe and writing an example csv
all3.head()

Unnamed: 0,date,state,vaxxed_per_hundred,people_fully_vaccinated,Confirmed,cases_avg,deaths_avg,Deaths,Recovered,Active,Incident_Rate,Case_Fatality_Ratio,daily_vaccinations,total_vaccinations
0,2021-07-06,Alabama,32.99,1617584.0,551298,121.0,2.86,11358,539940,,11243.671206,2.060229,6066.0,3392366.0
1,2021-07-06,Alaska,44.15,322988.0,71384,27.57,0.43,377,71007,,9757.977978,0.52813,3219.0,678029.0
2,2021-07-06,Arizona,43.58,3172380.0,897010,543.43,10.0,17979,879031,,12323.737824,2.004325,48692.0,6826215.0
3,2021-07-06,Arkansas,34.57,1043217.0,351825,485.14,3.0,5920,345905,,11658.311806,1.682655,5162.0,2261649.0
4,2021-07-06,California,50.49,19949682.0,3821572,1285.33,32.0,63655,3757917,,9671.872929,1.665545,95566.0,43182509.0


In [122]:
url_3 = "https://raw.githubusercontent.com/datasets/covid-19/main/data/us_confirmed.csv"
dl3 = requests.get(url_3).content

#adding data from a 3rd dataframe for confirmed active covid cases by state

confirmed = pd.read_csv(io.StringIO(dl3.decode('utf-8')),  error_bad_lines=False)

In [124]:
#Configuring the dataframe, dropping unnecessary or redundant values/changing column names to match.

confirmed['Date'] = pd.to_datetime(confirmed['Date'], format='%Y-%m-%d')
confirmed['date'] = confirmed['Date']
confirmed['state'] = confirmed['Province/State']
confirmed['Active'] = confirmed['Case']
confirmed.drop(columns=['Admin2', 'Date', 'Province/State', 'Country/Region', 'Case'], inplace=True)
confirmed.head()

Unnamed: 0,date,state,Active
0,2020-01-22,Alabama,0
1,2020-01-23,Alabama,0
2,2020-01-24,Alabama,0
3,2020-01-25,Alabama,0
4,2020-01-26,Alabama,0


In [126]:
#retrieving active cases from the confirmed dataframe from the 6th, to be used for an example.

result = confirmed[confirmed['date'] == '2021-07-06']

#retreiving the highest active instances for the day, grouped by state.
result = result.loc[result.groupby('state')['Active'].idxmax(), :].reset_index()
result = result.drop(columns=['index'])
result.head()

Unnamed: 0,date,state,Active
0,2021-07-06,Alabama,81328
1,2021-07-06,Alaska,31286
2,2021-07-06,American Samoa,0
3,2021-07-06,Arizona,562308
4,2021-07-06,Arkansas,41703


In [128]:
#Mapping the highest active value by state to the values of the all3['state'] column based on like data.
all3['Active'] = all3.state.map(dict(zip(result.state, result.Active)))
all3.to_csv('example.csv')
all3.head()

Unnamed: 0,date,state,vaxxed_per_hundred,people_fully_vaccinated,Confirmed,cases_avg,deaths_avg,Deaths,Recovered,Active,Incident_Rate,Case_Fatality_Ratio,daily_vaccinations,total_vaccinations
0,2021-07-06,Alabama,32.99,1617584.0,551298,121.0,2.86,11358,539940,81328,11243.671206,2.060229,6066.0,3392366.0
1,2021-07-06,Alaska,44.15,322988.0,71384,27.57,0.43,377,71007,31286,9757.977978,0.52813,3219.0,678029.0
2,2021-07-06,Arizona,43.58,3172380.0,897010,543.43,10.0,17979,879031,562308,12323.737824,2.004325,48692.0,6826215.0
3,2021-07-06,Arkansas,34.57,1043217.0,351825,485.14,3.0,5920,345905,41703,11658.311806,1.682655,5162.0,2261649.0
4,2021-07-06,California,50.49,19949682.0,3821572,1285.33,32.0,63655,3757917,1253106,9671.872929,1.665545,95566.0,43182509.0
