In [183]:
import numpy as np
import pandas as pd
import csv
import requests
import io

In [219]:
#Loading the initial state vaccination csv.
data = pd.read_csv('us_state_vaccinations.csv')
data['date'] = data['date'].astype('datetime64[ns]')
data['state'] = data['location']
data['vaxxed_per_hundred'] = data['people_fully_vaccinated_per_hundred']
safe = data[data['people_fully_vaccinated_per_hundred'] > 45]

#Extrapolating the needed data into a new dataframe.
cut = data.drop(columns = ['total_distributed', 'people_fully_vaccinated_per_hundred','location','people_vaccinated', 
                           'total_vaccinations_per_hundred', 'people_vaccinated_per_hundred', 'distributed_per_hundred'
                           , 'daily_vaccinations_raw', 'daily_vaccinations_per_million', 'share_doses_used'])

new = cut.loc[(cut['date'] == '2021-07-06')]
new.head()

Unnamed: 0,date,total_vaccinations,people_fully_vaccinated,daily_vaccinations,state,vaxxed_per_hundred
175,2021-07-06,3392366.0,1617584.0,6066.0,Alabama,32.99
351,2021-07-06,678029.0,322988.0,3219.0,Alaska,44.15
527,2021-07-06,47310.0,21583.0,172.0,American Samoa,38.76
703,2021-07-06,6826215.0,3172380.0,48692.0,Arizona,43.58
879,2021-07-06,2261649.0,1043217.0,5162.0,Arkansas,34.57


In [185]:
url = "https://raw.githubusercontent.com/nytimes/covid-19-data/master/rolling-averages/us-states.csv" 
download = requests.get(url).content

#Creating a new dataframe for rolling average values

rolling_avg = pd.read_csv(io.StringIO(download.decode('utf-8')))

rolling_avg['date'] = pd.to_datetime(rolling_avg['date'], format='%Y-%m-%d')

#Extrapolating needed data into a new rolling average dataframe.
rolling_avg = rolling_avg.drop(columns = ['geoid', 'cases', 'deaths', 'deaths_avg_per_100k'])

raNew = rolling_avg.loc[(rolling_avg['date'] =='2021-07-06')]
raNew.head()

Unnamed: 0,date,state,cases_avg,cases_avg_per_100k,deaths_avg
26964,2021-07-06,Northern Mariana Islands,0.0,0.0,0.0
26965,2021-07-06,Guam,6.57,3.9,0.0
26966,2021-07-06,Puerto Rico,55.0,1.62,0.71
26967,2021-07-06,Virgin Islands,12.0,11.3,0.0
26968,2021-07-06,Wyoming,63.86,11.03,0.57


In [181]:
url_2 = "https://raw.githubusercontent.com/CSSEGISandData/COVID-19/master/csse_covid_19_data/csse_covid_19_daily_reports_us/07-05-2021.csv"
dl = requests.get(url_2).content

#Loading the initial dataframe for the master covid data
jH = pd.read_csv(io.StringIO(dl.decode('utf-8')))

# jH['date'] = pd.to_datetime(jH['Last_Update'], format='%Y-%m-%d').dt.date
jH['state'] = jH['Province_State']
#display(jH)
#Extrapolating the needed data into a new dataframe
jHnew = jH.drop(columns = ['Last_Update','Province_State','Country_Region','Lat', 'Long_', 'FIPS', 'Total_Test_Results', 'People_Hospitalized', 'UID', 'ISO3', 
                           'Testing_Rate', 'Hospitalization_Rate'])

jHnew.head()

Unnamed: 0,Confirmed,Deaths,Recovered,Active,Incident_Rate,Case_Fatality_Ratio,state
0,551298,11358,,,11243.671206,2.060229,Alabama
1,71384,377,,,9757.977978,0.52813,Alaska
2,0,0,,,0.0,,American Samoa
3,897010,17979,,,12323.737824,2.004325,Arizona
4,351825,5920,,,11658.311806,1.682655,Arkansas


In [182]:
#Merging 2 Dataframes together
dos = pd.merge(new, raNew, on=['state','date'])
dos
dos['date'] = pd.to_datetime(dos['date'], format='%Y-%m-%d')
#Merging the Created dataframe and the master data together
all3 = pd.merge(dos, jHnew, on='state')

all3 = all3[['date', 'state', 'vaxxed_per_hundred', 'people_fully_vaccinated', 'Confirmed','cases_avg', 'deaths_avg','Deaths', 'Recovered', 
            'Active', 'Incident_Rate', 'Case_Fatality_Ratio', 'daily_vaccinations','total_vaccinations']]

#doing the math for recovered
all3['Recovered'] = all3['Confirmed'] - all3['Deaths']

#Displaying the new merged dataframe and writing an example csv
all3.head()

Unnamed: 0,date,state,vaxxed_per_hundred,people_fully_vaccinated,Confirmed,cases_avg,deaths_avg,Deaths,Recovered,Active,Incident_Rate,Case_Fatality_Ratio,daily_vaccinations,total_vaccinations
0,2021-07-06,Alabama,32.99,1617584.0,551298,121.0,2.86,11358,539940,,11243.671206,2.060229,6066.0,3392366.0
1,2021-07-06,Alaska,44.15,322988.0,71384,27.57,0.43,377,71007,,9757.977978,0.52813,3219.0,678029.0
2,2021-07-06,Arizona,43.58,3172380.0,897010,543.43,10.0,17979,879031,,12323.737824,2.004325,48692.0,6826215.0
3,2021-07-06,Arkansas,34.57,1043217.0,351825,485.14,3.0,5920,345905,,11658.311806,1.682655,5162.0,2261649.0
4,2021-07-06,California,50.49,19949682.0,3821572,1285.33,32.2,63655,3757917,,9671.872929,1.665545,95566.0,43182509.0


In [97]:
url_3 = "https://raw.githubusercontent.com/datasets/covid-19/main/data/us_confirmed.csv"
dl3 = requests.get(url_3).content

#adding data from a 3rd dataframe for confirmed active covid cases by state

confirmed = pd.read_csv(io.StringIO(dl3.decode('utf-8')),  error_bad_lines=False)



  exec(code_obj, self.user_global_ns, self.user_ns)


In [98]:
#Configuring the dataframe, dropping unnecessary or redundant values/changing column names to match.

confirmed['Date'] = pd.to_datetime(confirmed['Date'], format='%Y-%m-%d')
confirmed['date'] = confirmed['Date']
confirmed['state'] = confirmed['Province/State']
confirmed['Active'] = confirmed['Case']
confirmed.drop(columns=['Admin2', 'Date', 'Province/State', 'Country/Region', 'Case'], inplace=True)
confirmed.head()

Unnamed: 0,date,state,Active
0,2020-01-22,Alabama,0
1,2020-01-23,Alabama,0
2,2020-01-24,Alabama,0
3,2020-01-25,Alabama,0
4,2020-01-26,Alabama,0


In [99]:
#retrieving active cases from the confirmed dataframe from the 6th, to be used for an example.

result = confirmed[confirmed['date'] == '2021-07-06']

#retreiving the highest active instances for the day, grouped by state.
result = result.loc[result.groupby('state')['Active'].idxmax(), :].reset_index()
result = result.drop(columns=['index'])
result.head()

Unnamed: 0,date,state,Active
0,2021-07-06,Alabama,81328
1,2021-07-06,Alaska,31286
2,2021-07-06,American Samoa,0
3,2021-07-06,Arizona,562308
4,2021-07-06,Arkansas,41703


In [100]:
#Mapping the highest active value by state to the values of the all3['state'] column based on like data.
all3['Active'] = all3.state.map(dict(zip(result.state, result.Active)))
all3.head()

Unnamed: 0,date,state,vaxxed_per_hundred,people_fully_vaccinated,Confirmed,cases_avg,deaths_avg,Deaths,Recovered,Active,Incident_Rate,Case_Fatality_Ratio,daily_vaccinations,total_vaccinations
0,2021-07-06,Alabama,32.99,1617584.0,551298,121.0,2.86,11358,539940,81328,11243.671206,2.060229,6066.0,3392366.0
1,2021-07-06,Alaska,44.15,322988.0,71384,27.57,0.43,377,71007,31286,9757.977978,0.52813,3219.0,678029.0
2,2021-07-06,Arizona,43.58,3172380.0,897010,543.43,10.0,17979,879031,562308,12323.737824,2.004325,48692.0,6826215.0
3,2021-07-06,Arkansas,34.57,1043217.0,351825,485.14,3.0,5920,345905,41703,11658.311806,1.682655,5162.0,2261649.0
4,2021-07-06,California,50.49,19949682.0,3821572,1285.33,32.2,63655,3757917,1253106,9671.872929,1.665545,95566.0,43182509.0


In [101]:
#Adding an additional datasource for population estimation, will be used to determine the %of the pop infected 
#as well as the % of the pop vaccinated
state_df = pd.read_csv('Population_of_US.csv', delimiter=',')
m1 = state_df['POPESTIMATE2019']
m2 = state_df['NAME']
state_df = pd.DataFrame({'state' : m2, 'population_est' : m1}, index=np.arange(len(m1)))
state_df.head()

Unnamed: 0,state,population_est
0,Alabama,4903185
1,Alaska,731545
2,Arizona,7278717
3,Arkansas,3017804
4,California,39512223


In [102]:
all3['total_state_pop'] = all3.state.map(dict(zip(state_df.state, state_df.population_est)))
all3.dropna(inplace=True)
all3['%_pop_vaxxed'] = round((all3['people_fully_vaccinated'] / all3['total_state_pop']) *100, 2)
all3['%_pop_infected'] = round((all3['Active'] / all3['total_state_pop']) *100, 2)
all3.to_csv('example.csv')
all3.head()

Unnamed: 0,date,state,vaxxed_per_hundred,people_fully_vaccinated,Confirmed,cases_avg,deaths_avg,Deaths,Recovered,Active,Incident_Rate,Case_Fatality_Ratio,daily_vaccinations,total_vaccinations,total_state_pop,%_pop_vaxxed,%_pop_infected
0,2021-07-06,Alabama,32.99,1617584.0,551298,121.0,2.86,11358,539940,81328,11243.671206,2.060229,6066.0,3392366.0,4903185.0,32.99,1.66
1,2021-07-06,Alaska,44.15,322988.0,71384,27.57,0.43,377,71007,31286,9757.977978,0.52813,3219.0,678029.0,731545.0,44.15,4.28
2,2021-07-06,Arizona,43.58,3172380.0,897010,543.43,10.0,17979,879031,562308,12323.737824,2.004325,48692.0,6826215.0,7278717.0,43.58,7.73
3,2021-07-06,Arkansas,34.57,1043217.0,351825,485.14,3.0,5920,345905,41703,11658.311806,1.682655,5162.0,2261649.0,3017804.0,34.57,1.38
4,2021-07-06,California,50.49,19949682.0,3821572,1285.33,32.2,63655,3757917,1253106,9671.872929,1.665545,95566.0,43182509.0,39512223.0,50.49,3.17


In [229]:
data.head()
data['date'].unique()

xdf = pd.read_csv('us_state_vaccinations.csv')
xdf['date'] = xdf['date'].astype('datetime64[ns]')
xdf.dtypes

#result = result.loc[result.groupby('state')['Active'].idxmax(), :].reset_index()

date                                   datetime64[ns]
location                                       object
total_vaccinations                            float64
total_distributed                             float64
people_vaccinated                             float64
people_fully_vaccinated_per_hundred           float64
total_vaccinations_per_hundred                float64
people_fully_vaccinated                       float64
people_vaccinated_per_hundred                 float64
distributed_per_hundred                       float64
daily_vaccinations_raw                        float64
daily_vaccinations                            float64
daily_vaccinations_per_million                float64
share_doses_used                              float64
dtype: object

In [230]:
xdf = xdf.loc[xdf['date'] >= '2021-1-12']
xdf = xdf.drop(['total_distributed', 'people_vaccinated', 'total_vaccinations_per_hundred', 
                'people_vaccinated_per_hundred', 'distributed_per_hundred', 'daily_vaccinations_raw',
                'daily_vaccinations_per_million', 'share_doses_used'], axis=1)

In [231]:
xdf.drop(xdf.index[xdf['location'] == 'Puerto Rico'], inplace = True)
xdf.drop(xdf.index[xdf['location'] == 'Bureau of Prisons'], inplace = True)
xdf.drop(xdf.index[xdf['location'] == 'Republic of Palau'], inplace = True)
xdf.drop(xdf.index[xdf['location'] == 'District of Columbia'], inplace = True)
xdf.drop(xdf.index[xdf['location'] == 'Veterans Health'], inplace = True)
xdf.drop(xdf.index[xdf['location'] == 'United States'], inplace = True)
xdf.drop(xdf.index[xdf['location'] == 'American Samoa'], inplace = True) 
xdf.drop(xdf.index[xdf['location'] == 'Dept of Defense'], inplace = True)
xdf.drop(xdf.index[xdf['location'] == 'Federated States of Micronesia'], inplace = True)                      
xdf.drop(xdf.index[xdf['location'] == 'Indian Health Svc'], inplace = True)                     
xdf.drop(xdf.index[xdf['location'] == 'Guam'], inplace = True)                      
xdf.drop(xdf.index[xdf['location'] == 'Long Term Care'], inplace = True)                     
xdf.drop(xdf.index[xdf['location'] == 'Marshall Islands'], inplace = True)                     
xdf.drop(xdf.index[xdf['location'] == 'Northern Mariana Islands'], inplace = True)                     
                     
                     
xdf.head()                     

Unnamed: 0,date,location,total_vaccinations,people_fully_vaccinated_per_hundred,people_fully_vaccinated,daily_vaccinations
0,2021-01-12,Alabama,78134.0,0.15,7270.0,
1,2021-01-13,Alabama,84040.0,0.19,9245.0,5906.0
2,2021-01-14,Alabama,92300.0,,,7083.0
3,2021-01-15,Alabama,100567.0,0.28,13488.0,7478.0
4,2021-01-16,Alabama,,,,7498.0


In [232]:
xdf = xdf.sort_values(["date", "location"], ascending = (True, True))

In [237]:
xdf.loc[xdf['date'] == '2021-1-15']



Unnamed: 0,date,location,total_vaccinations,people_fully_vaccinated_per_hundred,people_fully_vaccinated,daily_vaccinations
3,2021-01-15,Alabama,100567.0,0.28,13488.0,7478.0
179,2021-01-15,Alaska,49039.0,1.29,9406.0,4400.0
531,2021-01-15,Arizona,197086.0,0.29,20837.0,18577.0
707,2021-01-15,Arkansas,118338.0,0.62,18781.0,25820.0
1059,2021-01-15,California,1072959.0,0.52,204374.0,85553.0
1235,2021-01-15,Colorado,272233.0,0.82,47021.0,15935.0
1411,2021-01-15,Connecticut,185866.0,0.48,17162.0,11490.0
1587,2021-01-15,Delaware,31090.0,0.55,5354.0,1560.0
2291,2021-01-15,Florida,853081.0,0.35,74776.0,73214.0
2467,2021-01-15,Georgia,231305.0,0.21,22324.0,20245.0


In [14]:
# reading travel change data
tch = pd.read_csv('TravelChangeData.csv')
# dropping military airports
tch = tch[~tch['Airport'].str.contains(r'NAF|AFS|AFB|AAF|MCAS')]
# dropping cols with no passenger info
tch = tch.drop(columns = ['Airport Country Name', 'Origin Airport Id', 'Origin State Abr', 'Freight tons', 'Latitude', 
                          'Longitude', 'Mail tons', 'Display City Market Name Full'])
# creating separate dataframes for each year's data
ptd = tch[tch['Year']== 2019].drop(columns = 'Year')
ptd = ptd.rename(columns={"Passengers": "Passengers 2019"})
ntd = tch[tch['Year'] == 2020].drop(columns = 'Year')
ntd = ntd.rename(columns={"Passengers": "Passengers 2020"})

#merging to compare years
ychd = pd.merge(ptd, ntd, on=['State' , 'Code', 'Airport', 'City'])
ychd = ychd.sort_values(by='State')
ychd


Unnamed: 0,State,Airport,City,Code,Passengers 2019,Passengers 2020
394,Alabama,Mobile Regional,Mobile,MOB,273000.0,89265.0
488,Alabama,Mobile Aerospace,Mobile,BFM,22824.0,4681.0
186,Alabama,Dannelly Field,Montgomery,MGM,159046.0,50512.0
28,Alabama,Birmingham Airport,Birmingham,BHM,1484301.0,449020.0
571,Alabama,Dothan Regional,Dothan,DHN,58833.0,17112.0
...,...,...,...,...,...,...
85,Wyoming,Yellowstone Regional,Cody,COD,16322.0,10412.0
483,Wyoming,Cheyenne Regional/Jerry Olson Field,Cheyenne,CYS,16682.0,2789.0
91,Wyoming,Gillette Campbell County,Gillette,GCC,29480.0,10214.0
610,Wyoming,Rock Springs Sweetwater County,Rock Springs,RKS,24056.0,7447.0


In [17]:
# reading travel purpose data
tpd = pd.read_csv('TravelPurposeData.csv')
tpd = tpd[tpd['Year']== 2017]
tpd


Unnamed: 0,State,Year,Trip Purpose,Number of Records,Number of Trips,Person miles (millions),Person trips (millions),Persons (thousands)
528,Alabama,2017,Family/Personal business,1,1972.0,79025,5302,4466
529,Alabama,2017,Other,1,189.0,79025,5302,4466
530,Alabama,2017,School/Church,1,985.0,79025,5302,4466
531,Alabama,2017,Social/Recreational,1,1114.0,79025,5302,4466
532,Alabama,2017,Work-related,1,67.0,79025,5302,4466
...,...,...,...,...,...,...,...,...
835,Wyoming,2017,Other,1,27.0,8145,761,553
836,Wyoming,2017,School/Church,1,103.0,8145,761,553
837,Wyoming,2017,Social/Recreational,1,193.0,8145,761,553
838,Wyoming,2017,Work-related,1,15.0,8145,761,553
