In [1]:
#Dependencies
import pandas as pd
import requests
import json
import datetime
from config import api_key


In [2]:
states_abbrev_file_path = "Resources/states_abbrev.csv"

states_abbrev_df = pd.read_csv(states_abbrev_file_path, encoding='ISO-8859-1')
states_abbrev_df

Unnamed: 0,Airport,City,Abbrev,State,IATA
0,Aberdeen Regional Airport,Aberdeen,SD,South Dakota,ABR
1,Abilene Regional Airport,Abilene,TX,Texas,ABI
2,Abraham Lincoln Capital Airport,Springfield,IL,Illinois,SPI
3,Akron-Canton Regional Airport,Akron / Canton,OH,Ohio,CAK
4,Albany International Airport,Albany,NY,New York,ALB
5,Albert J. Ellis Airport,Jacksonville,NC,North Carolina,OAJ
6,Albuquerque International Sunport,Albuquerque,NM,New Mexico,ABQ
7,Alexandria International Airport,Alexandria,LA,Louisiana,AEX
8,Alpena County Regional Airport,Alpena,MI,Michigan,APN
9,Aniak Airport,Aniak,AL,Alaska,ANI


In [3]:
top_25_airports = [
    'ATL',
    'LAX',
    'ORD',
    'DFW',
    'DEN',
    'JFK',
    'SFO',
    'SEA',
    'LAS',
    'MCO',
    'EWR',
    'CLT',
    'PHX',
    'IAH',
    'MIA',
    'BOS',
    'MSP',
    'FLL',
    'DTW',
    'PHL',
    'LGA',
    'BWI',
    'SLC',
    'SAN',
    'IAD']


In [4]:
states_abbrev_df = states_abbrev_df.rename(columns = {'Abbrev': 'state_abbrev'})
states_abbrev_df.head()

Unnamed: 0,Airport,City,state_abbrev,State,IATA
0,Aberdeen Regional Airport,Aberdeen,SD,South Dakota,ABR
1,Abilene Regional Airport,Abilene,TX,Texas,ABI
2,Abraham Lincoln Capital Airport,Springfield,IL,Illinois,SPI
3,Akron-Canton Regional Airport,Akron / Canton,OH,Ohio,CAK
4,Albany International Airport,Albany,NY,New York,ALB


In [5]:
# API Call for covid data 
covid_base_url = "https://api.covidtracking.com/v1/states/daily.json"
response = requests.get(covid_base_url).json()

#print(json.dumps(response, indent=4))

# pull the data from a source df to a target df by creating a new df

covid_df = pd.DataFrame(response, columns=['date', 'state', 'positive', 'death', 'hospitalized', 'totalTestResults'])

#covid_df.fillna(0) replacing the NAN values with zero
covid_df['hospitalized'] =covid_df['hospitalized'].fillna(0)

covid_df.head()


Unnamed: 0,date,state,positive,death,hospitalized,totalTestResults
0,20201022,AK,12877.0,68.0,0.0,548709.0
1,20201022,AL,177064.0,2843.0,19448.0,1280695.0
2,20201022,AR,102798.0,1772.0,6526.0,1255534.0
3,20201022,AS,0.0,0.0,0.0,1616.0
4,20201022,AZ,234906.0,5859.0,20938.0,1668562.0


In [6]:
covid_df = covid_df.rename(columns = {'state': 'state_abbrev'})

covid_df.head()


Unnamed: 0,date,state_abbrev,positive,death,hospitalized,totalTestResults
0,20201022,AK,12877.0,68.0,0.0,548709.0
1,20201022,AL,177064.0,2843.0,19448.0,1280695.0
2,20201022,AR,102798.0,1772.0,6526.0,1255534.0
3,20201022,AS,0.0,0.0,0.0,1616.0
4,20201022,AZ,234906.0,5859.0,20938.0,1668562.0


In [7]:
covid_df['Date'] = ''
covid_df.head()

        

Unnamed: 0,date,state_abbrev,positive,death,hospitalized,totalTestResults,Date
0,20201022,AK,12877.0,68.0,0.0,548709.0,
1,20201022,AL,177064.0,2843.0,19448.0,1280695.0,
2,20201022,AR,102798.0,1772.0,6526.0,1255534.0,
3,20201022,AS,0.0,0.0,0.0,1616.0,
4,20201022,AZ,234906.0,5859.0,20938.0,1668562.0,


In [8]:

for x, y in covid_df.iterrows():
    
    org_date = str(y.get(key='date'))
    
    year_s = org_date[0:4]
    month_s = org_date[4:6]
    day_s = org_date[6:8]
    formatted_date = f"{year_s}-{month_s}-{day_s}"
    
    covid_df.at[x, 'Date'] = str(formatted_date)
  
    
        
covid_df = covid_df.drop('date', axis=1)

covid_df.head(20)


Unnamed: 0,state_abbrev,positive,death,hospitalized,totalTestResults,Date
0,AK,12877.0,68.0,0.0,548709.0,2020-10-22
1,AL,177064.0,2843.0,19448.0,1280695.0,2020-10-22
2,AR,102798.0,1772.0,6526.0,1255534.0,2020-10-22
3,AS,0.0,0.0,0.0,1616.0,2020-10-22
4,AZ,234906.0,5859.0,20938.0,1668562.0,2020-10-22
5,CA,880724.0,17189.0,0.0,17358770.0,2020-10-22
6,CO,88849.0,2066.0,8380.0,1742583.0,2020-10-22
7,CT,65373.0,4569.0,12257.0,2092263.0,2020-10-22
8,DC,16537.0,642.0,0.0,479435.0,2020-10-22
9,DE,23528.0,670.0,0.0,333059.0,2020-10-22


In [20]:


no_of_days = 60

curr_date = pd.to_datetime('today').date()
curr_date =  pd.to_datetime(curr_date)


#START DATE 
start_date = str(curr_date)
start_date = start_date.split(' ')[0]


#END DATE
end_date = datetime.datetime.today() - datetime.timedelta(days=no_of_days)
end_date = str(end_date)
end_date = end_date.split(' ')[0]

print(start_date)
print(end_date)


2020-10-22
2020-08-23


In [21]:
#Generate a list that will hold all the flight dates we would like to query

curr_date =datetime.datetime.today() - datetime.timedelta(days=no_of_days)
query_dates = []

for i in range(1,no_of_days + 1):
    

    Next_date = curr_date + datetime.timedelta(days=1)

    str_date =  pd.to_datetime(Next_date)
    str_date = str(str_date)
    str_date = str_date.split(' ')[0]
    
    curr_date = Next_date
    query_dates.append(str_date)
    
query_dates
   

['2020-08-24',
 '2020-08-25',
 '2020-08-26',
 '2020-08-27',
 '2020-08-28',
 '2020-08-29',
 '2020-08-30',
 '2020-08-31',
 '2020-09-01',
 '2020-09-02',
 '2020-09-03',
 '2020-09-04',
 '2020-09-05',
 '2020-09-06',
 '2020-09-07',
 '2020-09-08',
 '2020-09-09',
 '2020-09-10',
 '2020-09-11',
 '2020-09-12',
 '2020-09-13',
 '2020-09-14',
 '2020-09-15',
 '2020-09-16',
 '2020-09-17',
 '2020-09-18',
 '2020-09-19',
 '2020-09-20',
 '2020-09-21',
 '2020-09-22',
 '2020-09-23',
 '2020-09-24',
 '2020-09-25',
 '2020-09-26',
 '2020-09-27',
 '2020-09-28',
 '2020-09-29',
 '2020-09-30',
 '2020-10-01',
 '2020-10-02',
 '2020-10-03',
 '2020-10-04',
 '2020-10-05',
 '2020-10-06',
 '2020-10-07',
 '2020-10-08',
 '2020-10-09',
 '2020-10-10',
 '2020-10-11',
 '2020-10-12',
 '2020-10-13',
 '2020-10-14',
 '2020-10-15',
 '2020-10-16',
 '2020-10-17',
 '2020-10-18',
 '2020-10-19',
 '2020-10-20',
 '2020-10-21',
 '2020-10-22']

In [None]:
# API Call for flights data 

url = f"http://api.aviationstack.com/v1/flights?access_key={api_key}&"
#query_date = ['2020-10-01','2020-10-02','2020-10-03' ]
#query_iata = ['ATL','LAX','ORD']
query_iata = top_25_airports
value_list = []

for q_date in query_dates:
    for q_iata in query_iata:
        flights_base_url = f"{url}flight_date={q_date}&arr_iata={q_iata}&flight_status=landed&limit=1"
        flights_response = requests.get(flights_base_url).json()
        landed_flights = flights_response['pagination']['total']
        arr_IATA = flights_response['data'][0]['arrival']['iata']
    
       
        value_list.append({'Date': q_date, 'IATA':arr_IATA, 'Total Landed Flights': landed_flights})
        
flights_df = pd.DataFrame(value_list)   
flights_df


In [12]:
#Merge states data with flights data

merged_df = states_abbrev_df.merge(flights_df, on='IATA')
merged_df.head(50)

Unnamed: 0,Airport,City,state_abbrev,State,IATA,Date,Total Landed Flights
0,Baltimore/Washington International Thurgood Ma...,Baltimore / Glen Burnie,MD,Maryland,BWI,2020-10-13,233
1,Baltimore/Washington International Thurgood Ma...,Baltimore / Glen Burnie,MD,Maryland,BWI,2020-10-14,238
2,Baltimore/Washington International Thurgood Ma...,Baltimore / Glen Burnie,MD,Maryland,BWI,2020-10-15,270
3,Baltimore/Washington International Thurgood Ma...,Baltimore / Glen Burnie,MD,Maryland,BWI,2020-10-16,265
4,Baltimore/Washington International Thurgood Ma...,Baltimore / Glen Burnie,MD,Maryland,BWI,2020-10-17,12
5,Baltimore/Washington International Thurgood Ma...,Baltimore / Glen Burnie,MD,Maryland,BWI,2020-10-18,258
6,Baltimore/Washington International Thurgood Ma...,Baltimore / Glen Burnie,MD,Maryland,BWI,2020-10-19,240
7,Baltimore/Washington International Thurgood Ma...,Baltimore / Glen Burnie,MD,Maryland,BWI,2020-10-20,232
8,Baltimore/Washington International Thurgood Ma...,Baltimore / Glen Burnie,MD,Maryland,BWI,2020-10-21,223
9,Baltimore/Washington International Thurgood Ma...,Baltimore / Glen Burnie,MD,Maryland,BWI,2020-10-22,198


In [13]:
#Merge Flight data with COVID data

merged_df = merged_df.merge(covid_df, how='inner', left_on=["Date", "state_abbrev"], right_on=["Date","state_abbrev"])
merged_df

Unnamed: 0,Airport,City,state_abbrev,State,IATA,Date,Total Landed Flights,positive,death,hospitalized,totalTestResults
0,Baltimore/Washington International Thurgood Ma...,Baltimore / Glen Burnie,MD,Maryland,BWI,2020-10-13,233,132343.0,4012.0,16150.0,2934933.0
1,Baltimore/Washington International Thurgood Ma...,Baltimore / Glen Burnie,MD,Maryland,BWI,2020-10-14,238,132918.0,4022.0,16215.0,2956824.0
2,Baltimore/Washington International Thurgood Ma...,Baltimore / Glen Burnie,MD,Maryland,BWI,2020-10-15,270,133548.0,4028.0,16255.0,2985460.0
3,Baltimore/Washington International Thurgood Ma...,Baltimore / Glen Burnie,MD,Maryland,BWI,2020-10-16,265,134329.0,4032.0,16288.0,3013107.0
4,Baltimore/Washington International Thurgood Ma...,Baltimore / Glen Burnie,MD,Maryland,BWI,2020-10-17,12,135127.0,4036.0,16352.0,3049634.0
5,Baltimore/Washington International Thurgood Ma...,Baltimore / Glen Burnie,MD,Maryland,BWI,2020-10-18,258,135657.0,4037.0,16435.0,3079162.0
6,Baltimore/Washington International Thurgood Ma...,Baltimore / Glen Burnie,MD,Maryland,BWI,2020-10-19,240,136154.0,4041.0,16475.0,3103833.0
7,Baltimore/Washington International Thurgood Ma...,Baltimore / Glen Burnie,MD,Maryland,BWI,2020-10-20,232,136744.0,4050.0,16516.0,3121799.0
8,Baltimore/Washington International Thurgood Ma...,Baltimore / Glen Burnie,MD,Maryland,BWI,2020-10-21,223,137236.0,4058.0,16549.0,3138875.0
9,Baltimore/Washington International Thurgood Ma...,Baltimore / Glen Burnie,MD,Maryland,BWI,2020-10-22,198,137979.0,4070.0,16603.0,3169302.0


In [14]:
#adding column converting to int

merged_df["positive"] = merged_df["positive"].astype(int)
merged_df["death"] = merged_df["death"].astype(int)
merged_df["totalTestResults"] = merged_df["totalTestResults"].astype(int)
merged_df

Unnamed: 0,Airport,City,state_abbrev,State,IATA,Date,Total Landed Flights,positive,death,hospitalized,totalTestResults
0,Baltimore/Washington International Thurgood Ma...,Baltimore / Glen Burnie,MD,Maryland,BWI,2020-10-13,233,132343,4012,16150.0,2934933
1,Baltimore/Washington International Thurgood Ma...,Baltimore / Glen Burnie,MD,Maryland,BWI,2020-10-14,238,132918,4022,16215.0,2956824
2,Baltimore/Washington International Thurgood Ma...,Baltimore / Glen Burnie,MD,Maryland,BWI,2020-10-15,270,133548,4028,16255.0,2985460
3,Baltimore/Washington International Thurgood Ma...,Baltimore / Glen Burnie,MD,Maryland,BWI,2020-10-16,265,134329,4032,16288.0,3013107
4,Baltimore/Washington International Thurgood Ma...,Baltimore / Glen Burnie,MD,Maryland,BWI,2020-10-17,12,135127,4036,16352.0,3049634
5,Baltimore/Washington International Thurgood Ma...,Baltimore / Glen Burnie,MD,Maryland,BWI,2020-10-18,258,135657,4037,16435.0,3079162
6,Baltimore/Washington International Thurgood Ma...,Baltimore / Glen Burnie,MD,Maryland,BWI,2020-10-19,240,136154,4041,16475.0,3103833
7,Baltimore/Washington International Thurgood Ma...,Baltimore / Glen Burnie,MD,Maryland,BWI,2020-10-20,232,136744,4050,16516.0,3121799
8,Baltimore/Washington International Thurgood Ma...,Baltimore / Glen Burnie,MD,Maryland,BWI,2020-10-21,223,137236,4058,16549.0,3138875
9,Baltimore/Washington International Thurgood Ma...,Baltimore / Glen Burnie,MD,Maryland,BWI,2020-10-22,198,137979,4070,16603.0,3169302


In [15]:
#adding a new colomn to display the month
merged_df['Date'] = pd.to_datetime(merged_df['Date'])
merged_df["Month"] = merged_df["Date"].dt.month
merged_df


Unnamed: 0,Airport,City,state_abbrev,State,IATA,Date,Total Landed Flights,positive,death,hospitalized,totalTestResults,Month
0,Baltimore/Washington International Thurgood Ma...,Baltimore / Glen Burnie,MD,Maryland,BWI,2020-10-13,233,132343,4012,16150.0,2934933,10
1,Baltimore/Washington International Thurgood Ma...,Baltimore / Glen Burnie,MD,Maryland,BWI,2020-10-14,238,132918,4022,16215.0,2956824,10
2,Baltimore/Washington International Thurgood Ma...,Baltimore / Glen Burnie,MD,Maryland,BWI,2020-10-15,270,133548,4028,16255.0,2985460,10
3,Baltimore/Washington International Thurgood Ma...,Baltimore / Glen Burnie,MD,Maryland,BWI,2020-10-16,265,134329,4032,16288.0,3013107,10
4,Baltimore/Washington International Thurgood Ma...,Baltimore / Glen Burnie,MD,Maryland,BWI,2020-10-17,12,135127,4036,16352.0,3049634,10
5,Baltimore/Washington International Thurgood Ma...,Baltimore / Glen Burnie,MD,Maryland,BWI,2020-10-18,258,135657,4037,16435.0,3079162,10
6,Baltimore/Washington International Thurgood Ma...,Baltimore / Glen Burnie,MD,Maryland,BWI,2020-10-19,240,136154,4041,16475.0,3103833,10
7,Baltimore/Washington International Thurgood Ma...,Baltimore / Glen Burnie,MD,Maryland,BWI,2020-10-20,232,136744,4050,16516.0,3121799,10
8,Baltimore/Washington International Thurgood Ma...,Baltimore / Glen Burnie,MD,Maryland,BWI,2020-10-21,223,137236,4058,16549.0,3138875,10
9,Baltimore/Washington International Thurgood Ma...,Baltimore / Glen Burnie,MD,Maryland,BWI,2020-10-22,198,137979,4070,16603.0,3169302,10


In [16]:
project_df = pd.DataFrame({"Date": merged_df['Date'], "Month": merged_df["Month"], "State": merged_df["State"],
                          "Number of Flights": merged_df["Total Landed Flights"], "Positive": merged_df["positive"], 
                           "Deaths": merged_df["death"],"Total Tested": merged_df["totalTestResults"] })
project_df

Unnamed: 0,Date,Month,State,Number of Flights,Positive,Deaths,Total Tested
0,2020-10-13,10,Maryland,233,132343,4012,2934933
1,2020-10-14,10,Maryland,238,132918,4022,2956824
2,2020-10-15,10,Maryland,270,133548,4028,2985460
3,2020-10-16,10,Maryland,265,134329,4032,3013107
4,2020-10-17,10,Maryland,12,135127,4036,3049634
5,2020-10-18,10,Maryland,258,135657,4037,3079162
6,2020-10-19,10,Maryland,240,136154,4041,3103833
7,2020-10-20,10,Maryland,232,136744,4050,3121799
8,2020-10-21,10,Maryland,223,137236,4058,3138875
9,2020-10-22,10,Maryland,198,137979,4070,3169302


In [17]:
#calculating total according to the month
total_infection = project_df.groupby(["Month"]).sum()["Positive"]
landing_flights = project_df.groupby(["Month"]).sum()["Number of Flights"]
month_data_df = pd.DataFrame({"Total Infection": total_infection, "Landed Flights": landing_flights})
month_data_df


Unnamed: 0_level_0,Total Infection,Landed Flights
Month,Unnamed: 1_level_1,Unnamed: 2_level_1
10,101184502,146808


In [18]:
project_df.to_csv('Resources\FullDataSet.csv', index=False)