 # Notebook description

In this notebook the data is downloaded from API.

For the purpose of this project these are the endpoints which are needed:
 - `airport` - airports data,
 - `weather` - information about the weather at given airport on given day,
 - `aircraft` - aircraft data
 - `flights` - infromation about departures from given airport at given date

To retreive data from airport endpoint there is 'airport_id' parameter needed which is taken from airports.csv file.

--------------------------------------------------------------------------------------

 Libraries import and loading enviroment variables

In [1]:
import requests
import json
import pandas as pd
import time
from dotenv import load_dotenv
from os import environ

In [2]:
dotenv_path = '../.env'
load_dotenv(dotenv_path)

True

 API parameters

In [3]:
apikey = environ.get('API_KEY')
api = environ.get('API_URL')

 Uploading airports.csv file

In [4]:
airports = pd.read_csv(r'..\data\airports.csv')
airports.describe()

Unnamed: 0,origin_airport_id
count,364.0
mean,12730.376374
std,1647.879998
min,10135.0
25%,11272.25
50%,12515.0
75%,14160.75
max,16869.0


 # Downloading `Airport` endpoint


In [5]:
data = []
for airportId in airports['origin_airport_id']:
    res = requests.get(f'{api}/airport/{airportId}', headers = {'Authorization': apikey})
    if res.status_code == 200:
        data.append(res.json())
        time.sleep(1)
    else:
        continue

 Saving data to dataframe

In [6]:
airport_df = pd.DataFrame.from_records(data)
airport_df.head()

Unnamed: 0,ORIGIN_AIRPORT_ID,DISPLAY_AIRPORT_NAME,ORIGIN_CITY_NAME,NAME
0,11638,Fresno Air Terminal,"Fresno, CA","FRESNO YOSEMITE INTERNATIONAL, CA US"
1,13342,General Mitchell Field,"Milwaukee, WI","MILWAUKEE MITCHELL AIRPORT, WI US"
2,13244,Memphis International,"Memphis, TN","MEMPHIS INTERNATIONAL AIRPORT, TN US"
3,15096,Syracuse Hancock International,"Syracuse, NY","SYRACUSE HANCOCK INTERNATIONAL AIRPORT, NY US"
4,10397,Atlanta Municipal,"Atlanta, GA",ATLANTA HARTSFIELD JACKSON INTERNATIONAL AIRPO...


 Saving dataframe to .csv file

In [7]:
airport_df.to_csv(r'..\data\raw\airport_list.csv')

 # Downloading `Weather` endpoint

 Endpoints consist of data from January 2019 to April 2020 so in the next step list of months is created to iterate through them and retreive data for each month at a time

In [8]:
month_list = pd.date_range(start='1/2019', end='4/2020', freq='M').to_period('m')
month_list

PeriodIndex(['2019-01', '2019-02', '2019-03', '2019-04', '2019-05', '2019-06',
             '2019-07', '2019-08', '2019-09', '2019-10', '2019-11', '2019-12',
             '2020-01', '2020-02', '2020-03'],
            dtype='period[M]')

In [9]:
data = []

for month in month_list:
    res = requests.get(f'{api}/airportWeather?date={month}', headers = {'Authorization': apikey})
    print(month)
    if res.status_code == 200:
        for day in res.json():
            data.append(day)
        time.sleep(1)
    else:
        continue

2019-01
2019-02
2019-03
2019-04
2019-05
2019-06
2019-07
2019-08
2019-09
2019-10
2019-11
2019-12
2020-01
2020-02
2020-03


 Saving data to dataframe

In [10]:
airport_weather_df = pd.DataFrame.from_records(data)
airport_weather_df.head()

Unnamed: 0,WT18,STATION,NAME,DATE,AWND,PRCP,SNOW,SNWD,TAVG,TMAX,...,PGTM,WT10,WESD,SN32,SX32,PSUN,TSUN,TOBS,WT07,WT11
0,,USW00013874,ATLANTA HARTSFIELD JACKSON INTERNATIONAL AIRPO...,2019-01-01,4.7,0.14,0.0,0.0,64.0,66.0,...,,,,,,,,,,
1,,USW00013874,ATLANTA HARTSFIELD JACKSON INTERNATIONAL AIRPO...,2019-01-02,4.92,0.57,0.0,0.0,56.0,59.0,...,,,,,,,,,,
2,,USW00013874,ATLANTA HARTSFIELD JACKSON INTERNATIONAL AIRPO...,2019-01-03,5.37,0.15,0.0,0.0,52.0,55.0,...,,,,,,,,,,
3,,USW00013874,ATLANTA HARTSFIELD JACKSON INTERNATIONAL AIRPO...,2019-01-04,12.08,1.44,0.0,0.0,56.0,66.0,...,,,,,,,,,,
4,,USW00013874,ATLANTA HARTSFIELD JACKSON INTERNATIONAL AIRPO...,2019-01-05,13.42,0.0,0.0,0.0,49.0,59.0,...,,,,,,,,,,


 Saving dataframe to .csv file

In [11]:
airport_weather_df.to_csv(r'..\data\raw\airport_weather.csv')

 # Downloading `Aircraft`
 

In [12]:
res = requests.get(f'{api}/aircraft', headers = {'Authorization': apikey})
aircraft_df = pd.DataFrame.from_records(res.json())
aircraft_df.head()

Unnamed: 0,MANUFACTURE_YEAR,TAIL_NUM,NUMBER_OF_SEATS
0,1944,N54514,0.0
1,1945,N1651M,0.0
2,1953,N100CE,0.0
3,1953,N141FL,0.0
4,1953,N151FL,0.0


Saving dataframe to .csv file

In [13]:
aircraft_df.to_csv(r'..\data\raw\aircraft.csv')

 # Downloading `Flight`

  Endpoint consists of data from January 2019 to April 2020 so in the next step list of months is created to iterate through them and retreive data for each month at a time

In [14]:
data = []

for airport in airports['origin_airport_id']:
    print(airport)
    for month in month_list:
        print(month)
        res = requests.get(f'{api}/flight?airportId={airport}&date={month}', headers = {'Authorization': apikey})
        if res.status_code == 200:
            for day in res.json():
                data.append(day)
            time.sleep(0.2)
        else:
            break

10874
2019-01
11233
2019-01
13360
2019-01
15008
2019-01
11638
2019-01
14150
2019-01
15323
2019-01
14814
2019-01
12007
2019-01
11337
2019-01
13342
2019-01
15070
2019-01
13244
2019-01
12280
2019-01
15096
2019-01
11641
2019-01
13832
2019-01
10268
2019-01
10397
2019-01
2019-02
2019-03
2019-04
2019-05
2019-06
2019-07
2019-08
2019-09
2019-10
2019-11
2019-12
2020-01
2020-02
2020-03
15041
2019-01
10529
2019-01
12119
2019-01
11537
2019-01
11092
2019-01
10581
2019-01
13829
2019-01
15389
2019-01
10140
2019-01
12389
2019-01
11648
2019-01
15023
2019-01
11982
2019-01
10967
2019-01
11525
2019-01
10792
2019-01
14259
2019-01
11637
2019-01
10466
2019-01
10599
2019-01
10208
2019-01
15841
2019-01
14831
2019-01
2019-02
2019-03
2019-04
2019-05
2019-06
2019-07
2019-08
2019-09
2019-10
2019-11
2019-12
2020-01
2020-02
2020-03
12898
2019-01
13241
2019-01
13367
2019-01
11481
2019-01
14108
2019-01
13931
2019-01
13873
2019-01
10157
2019-01
10245
2019-01
11146
2019-01
13277
2019-01
11292
2019-01
2019-02
2019-03
2019

In [15]:
flight_df = pd.DataFrame.from_records(data)
flight_df.describe()

Unnamed: 0,MONTH,DAY_OF_MONTH,DAY_OF_WEEK,OP_CARRIER_FL_NUM,ORIGIN_AIRPORT_ID,DEST_AIRPORT_ID,CRS_DEP_TIME,DEP_TIME,DEP_DELAY_NEW,CRS_ARR_TIME,...,CRS_ELAPSED_TIME,ACTUAL_ELAPSED_TIME,DISTANCE,DISTANCE_GROUP,YEAR,CARRIER_DELAY,WEATHER_DELAY,NAS_DELAY,SECURITY_DELAY,LATE_AIRCRAFT_DELAY
count,1386120.0,1386120.0,1386120.0,1386120.0,1386120.0,1386120.0,1386120.0,1347712.0,1347711.0,1386120.0,...,1386120.0,1344638.0,1386120.0,1386120.0,1386120.0,257318.0,257318.0,257318.0,257318.0,257318.0
mean,5.673192,15.70936,3.903999,1774.918,12627.2,12618.58,1334.447,1337.156,13.64892,1478.206,...,139.0266,133.5066,751.5743,3.4202,2019.195,18.330004,2.824019,20.68811,0.074515,25.693189
std,3.563204,8.747288,1.987461,1455.752,1484.031,1494.426,494.5813,510.2164,44.69326,529.6749,...,74.57335,74.71373,596.7415,2.388941,0.3962006,56.595923,26.57426,42.651874,3.527666,51.710956
min,1.0,1.0,1.0,1.0,10299.0,10299.0,1.0,1.0,0.0,1.0,...,32.0,25.0,100.0,1.0,2019.0,0.0,0.0,0.0,0.0,0.0
25%,2.0,8.0,2.0,628.0,11292.0,11292.0,915.0,917.0,0.0,1055.0,...,90.0,82.0,337.0,2.0,2019.0,0.0,0.0,0.0,0.0,0.0
50%,5.0,16.0,4.0,1494.0,12892.0,12892.0,1329.0,1331.0,0.0,1510.0,...,120.0,114.0,602.0,3.0,2019.0,0.0,0.0,6.0,0.0,0.0
75%,9.0,23.0,6.0,2380.0,13930.0,13930.0,1739.0,1749.0,6.0,1922.0,...,163.0,159.0,888.0,4.0,2019.0,16.0,0.0,24.0,0.0,31.0
max,12.0,31.0,7.0,7881.0,15304.0,15304.0,2359.0,2400.0,2579.0,2400.0,...,727.0,538.0,2586.0,11.0,2020.0,2560.0,1239.0,1567.0,1078.0,1438.0


Saving dataframe to .csv file

In [16]:
flight_df.to_csv(r'..\data\raw\flight.csv')