# Necessary Imports

In [1]:
import pandas as pd
from datetime import datetime, date, timedelta
import requests
import pytz
from pytz import timezone
import marcus_keys

# Functions for API Call & Data Processing

In [2]:
def get_arrivals_data(iata_lst):
    api_key = marcus_keys.rapid_api_key_2
    tz = pytz.timezone("Europe/Berlin")
    times = [["00:00","11:59"],["12:00","23:59"]]
    now = datetime.now().astimezone(tz)
    today = datetime.now().astimezone(timezone("Europe/Berlin")).date()
    tomorrow = (today + timedelta(days=1))

    new_columns = {
        "number":"flight_number",
        "movement.airport.icao":"from_icao",
        "movement.airport.iata":"from_iata",
        "movement.revisedTime.local":"arrival_time",
        "airline.name":"airline",
        "movement.airport.name":"from_city",
        "movement.terminal":"arrival_terminal"
    }

    arr_lst_for_df = []

    for iata in iata_lst:
        for time in times:
            url = f"https://aerodatabox.p.rapidapi.com/flights/airports/iata/{iata}/{tomorrow}T{time[0]}/{tomorrow}T{time[1]}"

            querystring = {"direction":"Arrival"}

            headers = {
                "X-RapidAPI-Key": api_key,
                "X-RapidAPI-Host": "aerodatabox.p.rapidapi.com"
            }

        response = requests.get(url, headers=headers, params=querystring)
        print(response.status_code)

        arr_json = response.json()

        df = pd.json_normalize(arr_json["arrivals"])[["number", "movement.revisedTime.local", "movement.airport.icao", "movement.airport.iata", "movement.airport.name", "movement.terminal", "airline.name"]]
        data_df = df.rename(columns=new_columns)
        data_df["airport"] = iata
        data_df["data_collection_from"] = datetime.now().strftime('%Y-%m-%d %H:%M:%S')

        arr_lst_for_df.append(data_df)

    return pd.concat(arr_lst_for_df, ignore_index=True)

def process_arrivals_data(arrivals_data_raw_df):
    arrivals_data_raw_df.rename(columns={"airport":"city_id"}, inplace=True)
    arrivals_data_raw_df = arrivals_data_raw_df[["city_id", "flight_number", "arrival_time", "from_icao", "from_iata", "from_city", "arrival_terminal", "airline", "data_collection_from" ]]
    
    for i in range(0, len(arrivals_data_raw_df["city_id"])):
        if arrivals_data_raw_df.loc[i, "city_id"] == "BER":
            arrivals_data_raw_df.loc[i, "city_id"] = "Q64"
        elif arrivals_data_raw_df.loc[i, "city_id"] == "HAM":
            arrivals_data_raw_df.loc[i, "city_id"] = "Q1055"
        elif arrivals_data_raw_df.loc[i, "city_id"] == "STR":
            arrivals_data_raw_df.loc[i, "city_id"] = "Q1022"
        else:
            arrivals_data_raw_df.loc[i, "city_id"] = "Q1718"
            
    arrivals_data_raw_df["data_collection_from"] = pd.to_datetime(arrivals_data_raw_df["data_collection_from"])
    arrivals_data_raw_df["arrival_time"] = pd.to_datetime(arrivals_data_raw_df["arrival_time"])
    
    return arrivals_data_raw_df

In [3]:
iata_lst = ['BER', 'HAM', 'STR', 'DUS'] # ["LEJ", "DRS", "CGN"] did not work in single calls

In [4]:
arrivals_data_raw_df = get_arrivals_data(iata_lst)

200
200
200
200


In [5]:
arrivals_data_df = process_arrivals_data(arrivals_data_raw_df)

In [6]:
arrivals_data_df

Unnamed: 0,city_id,flight_number,arrival_time,from_icao,from_iata,from_city,arrival_terminal,airline,data_collection_from
0,Q64,A3 1586,2023-07-11 12:05:00+02:00,EDDM,MUC,Munich,1,Aegean,2023-07-10 17:34:07
1,Q64,AC 9049,2023-07-11 12:05:00+02:00,EDDM,MUC,Munich,1,Air Canada,2023-07-10 17:34:07
2,Q64,LH 1936,2023-07-11 12:05:00+02:00,EDDM,MUC,Munich,1,Lufthansa,2023-07-10 17:34:07
3,Q64,OU 5360,2023-07-11 12:05:00+02:00,EDDM,MUC,Munich,1,Croatia,2023-07-10 17:34:07
4,Q64,TP 7720,2023-07-11 12:05:00+02:00,EDDM,MUC,Munich,1,TAP Air Portugal,2023-07-10 17:34:07
...,...,...,...,...,...,...,...,...,...
609,Q1718,DE 1781,2023-07-11 22:45:00+02:00,LGPZ,PVK,Preveza/Lefkada,B,Condor,2023-07-10 17:34:08
610,Q1718,X3 2119,2023-07-11 22:55:00+02:00,GCTS,TFS,Tenerife Island,B,TUIfly,2023-07-10 17:34:08
611,Q1718,DE 1421,2023-07-11 22:35:00+02:00,GCRR,ACE,Lanzarote Island,B,Condor,2023-07-10 17:34:08
612,Q1718,X3 2839,2023-07-11 22:35:00+02:00,LPMA,FNC,Funchal,B,TUIfly,2023-07-10 17:34:08


# Local MySQL Connection

In [79]:
schema="p5_gans_database"
host="127.0.0.1"
user="root"
password=marcus_keys.my_sql_key
port=3306
con = f'mysql+pymysql://{user}:{password}@{host}:{port}/{schema}'

# AWS RDS MySQL Connection

In [15]:
schema="aws_p5_gans_database"
host="wbs-cs-p5-db.cjdcbdhnueky.eu-north-1.rds.amazonaws.com"
user="mkadmin"
password=marcus_keys.aws_rds_key
port=3306
con = f'mysql+pymysql://{user}:{password}@{host}:{port}/{schema}'

# Uploading the DataFrame into the database

In [16]:
arrivals_data_df.to_sql('arrivals_table', 
                        if_exists='append', 
                        con=con, 
                        index=False)

864