In [1]:
from pathlib import Path
import glob
import requests
import pandas as pd
import arrow

In [2]:
# Setting up API Base URL + Credentials

base_url: str = "https://api.aviationstack.com/v1/flights"

api_key: str = "*************************"

In [3]:
today = arrow.now()                             # grabbing today's date
start_date = today.shift(days = -90)            # date when to start the 90 day lookback window
offsets = [0, 100, 200]                         # working around the flightstack API response pagination

In [4]:
for i in range(91):

    dte = start_date.shift(days = i).format('YYYY-MM-DD')

    for offset in offsets:

        query_params: dict = {
            "access_key": api_key,
            "flight_date": dte,
            "limit": 100,
            "offset": offset,
            "dep_iata": "YUL",
        }

        response = requests.get(base_url, query_params)

        if response.status_code == 200:

            # print(f"Pull Successful - Writing to file # {i+1}...")

            response_json = response.json()

            required_data = response_json.get('data', [])

            structured_data: list = []

            for flight in required_data:

                if isinstance(flight, dict):

                    if flight['flight'].get('codeshared') is None:

                        structured_data.append({
                            'Flight Status': flight['flight_status'],
                            'Departure Airport': flight['departure'].get('iata', ''),
                            'Departure Gate': flight['departure'].get('gate', ''),
                            'Arrival IATA Code': flight['arrival'].get('iata', ''),
                            'Scheduled Departure': flight['departure'].get('scheduled', ''),
                            'Actual Departure': flight['departure'].get('actual', ''),
                            'Delay (in Minutes)': flight['departure'].get('delay', 0),
                            'Airline Name': flight['airline'].get('name', ''),
                            'Flight Number': flight['flight'].get('iata', ''),
                        })
                
            departures_df: pd.DataFrame = pd.DataFrame(structured_data)

            departures_df.to_csv(f"./flight_data/Enterprise-II-Data/YUL_Departures_{dte}_{offset}.csv", index = False)

            print("File written, onto the next one....")


Pull Successful - Writing to file # 1...
File written, onto the next one....
Pull Successful - Writing to file # 1...
File written, onto the next one....
Pull Successful - Writing to file # 1...
File written, onto the next one....
Pull Successful - Writing to file # 2...
File written, onto the next one....
Pull Successful - Writing to file # 2...
File written, onto the next one....
Pull Successful - Writing to file # 2...
File written, onto the next one....
Pull Successful - Writing to file # 3...
File written, onto the next one....
Pull Successful - Writing to file # 3...
File written, onto the next one....
Pull Successful - Writing to file # 3...
File written, onto the next one....
Pull Successful - Writing to file # 4...
File written, onto the next one....
Pull Successful - Writing to file # 5...
File written, onto the next one....
Pull Successful - Writing to file # 5...
File written, onto the next one....
Pull Successful - Writing to file # 5...
File written, onto the next one....

In [5]:
full_departures_df = pd.DataFrame()             # initializing empty dataframe to concatenate all the daily dataframes

for csv_file in glob.glob('./flight_data/Enterprise-II-Data/*.csv'):

    try:

        file: pd.DataFrame = pd.read_csv(csv_file)

        if not file.empty:

            full_departures_df = pd.concat([full_departures_df, file], ignore_index = True)
    
    except pd.errors.EmptyDataError:

        print(f"Skipping empty file {csv_file}")

Skipping empty file ./flight_data/Enterprise-II-Data/YUL_Departures_2024-01-23_0.csv
Skipping empty file ./flight_data/Enterprise-II-Data/YUL_Departures_2024-01-28_100.csv
Skipping empty file ./flight_data/Enterprise-II-Data/YUL_Departures_2024-01-28_0.csv
Skipping empty file ./flight_data/Enterprise-II-Data/YUL_Departures_2024-01-30_100.csv
Skipping empty file ./flight_data/Enterprise-II-Data/YUL_Departures_2024-01-29_100.csv
Skipping empty file ./flight_data/Enterprise-II-Data/YUL_Departures_2024-01-23_200.csv
Skipping empty file ./flight_data/Enterprise-II-Data/YUL_Departures_2024-01-28_200.csv
Skipping empty file ./flight_data/Enterprise-II-Data/YUL_Departures_2024-01-29_0.csv
Skipping empty file ./flight_data/Enterprise-II-Data/YUL_Departures_2024-01-30_200.csv
Skipping empty file ./flight_data/Enterprise-II-Data/YUL_Departures_2024-01-23_100.csv
Skipping empty file ./flight_data/Enterprise-II-Data/YUL_Departures_2024-01-29_200.csv


In [6]:
full_departures_df.head(100)

Unnamed: 0,Flight Status,Departure Airport,Departure Gate,Arrival IATA Code,Scheduled Departure,Actual Departure,Delay (in Minutes),Airline Name,Flight Number
0,landed,YUL,A63,DXB,2024-04-08T10:20:00+00:00,2024-04-08T10:28:00+00:00,8.0,Emirates,EK244
1,landed,YUL,C82,EWR,2024-04-08T08:15:00+00:00,2024-04-08T08:43:00+00:00,28.0,Air Canada,AC8938
2,landed,YUL,C83,EWR,2024-04-08T13:10:00+00:00,2024-04-08T13:20:00+00:00,10.0,Air Canada,AC8940
3,landed,YUL,C84,EWR,2024-04-08T15:10:00+00:00,2024-04-08T15:32:00+00:00,22.0,Air Canada,AC8942
4,active,YUL,C84,EWR,2024-04-08T20:00:00+00:00,,122.0,Air Canada,AC8946
...,...,...,...,...,...,...,...,...,...
95,diverted,YUL,,BOS,2024-03-11T07:00:00+00:00,2024-03-11T06:53:00+00:00,,Airsprint,
96,landed,YUL,C86,BOS,2024-03-11T08:15:00+00:00,2024-03-11T09:21:00+00:00,66.0,Air Canada,AC748
97,scheduled,YUL,C75,BOS,2024-03-11T16:45:00+00:00,2024-03-11T18:32:00+00:00,108.0,Air Canada,AC752
98,landed,YUL,A50,BRU,2024-03-11T19:50:00+00:00,2024-03-11T20:09:00+00:00,19.0,Air Canada,AC832


In [8]:
full_departures_df.to_csv('./flight_data/Enterprise-II-Data/concatenated_data/Full_Departures_Data.csv', index = False)