# Arrivlas and Departures

### Import Packages

In [39]:
import pandas as pd
import requests
import sqlalchemy
import os
from dotenv import load_dotenv, find_dotenv
from functools import wraps
import datetime as dt
import json

### Load variables from .env file

In [2]:
# load env data from .env file.
load_dotenv(find_dotenv(filename='.env'))

True

### Logging Wrapper

In [3]:
def log_step(func):
    @wraps(func)
    def wrapper(*args, **kwargs):
        tic = dt.datetime.now()
        result = func(*args, **kwargs)
        time_taken = str(dt.datetime.now() - tic)
        print(f"{func.__name__}:\n shape={result.shape} took {time_taken}s\n")
        return result

    return wrapper

## Get Airports from DB

In [5]:
schema="gans"
host="gans-aws.cs3d3b90junp.us-east-1.rds.amazonaws.com"
user="admin"
password = "pEjhiw-wygsy4-quhsos"
port=3306
con = f'mysql+pymysql://{user}:{password}@{host}:{port}/{schema}'


In [8]:
def get_airports_from_db(con):
    sql = '''
    SELECT * FROM airports
    WHERE municipality_country = "Berlin,DE"
    '''
    airports = pd.read_sql(sql, con = con)
    return airports
init_airports =  get_airports_from_db(con)


In [82]:
def get_flight_schedules(airports):
    responses_list = []
    for index, airport_row in airports.iterrows():
        url = "https://aerodatabox.p.rapidapi.com/flights/airports/icao/EDDB/2022-04-07T19:00/2022-04-08T07:00"
        querystring = {"withLeg":"true","direction":"Both","withCancelled":"true","withCodeshared":"true","withCargo":"true","withPrivate":"true","withLocation":"true"}
        headers = {
            "X-RapidAPI-Host": "aerodatabox.p.rapidapi.com",
            "X-RapidAPI-Key": "c1bc1a1acemsh99ced7306b1d2c9p1c10d7jsn078f60dc1a0e"
        }
        try:
            # response = requests.request("GET", url, headers=headers, params=querystring).json()
            # response["municipality_country"] = airport_row["municipality_country"]
            # responses_list.append(response)
            json_file = f = open("./response_list.json")
            responses_list = json.load(f)
        except Exception as e:
            print("No data for:", airport_row["airport_ident"] )
            raise e
            continue
        return responses_list


In [94]:
def unpack_responses(responses_list):
    arrivals = pd.DataFrame()
    departures = pd.DataFrame()
    for response in responses_list:
        city_arr =pd.json_normalize(response["departures"], sep="_")
        city_dep = pd.json_normalize(response["arrivals"], sep="_")        
        arrivals = pd.concat([arrivals, city_arr])
        departures = pd.concat([departures, city_dep])
    return [arrivals, departures]
    


Unnamed: 0,number,callSign,status,codeshareStatus,isCargo,departure_airport_name,departure_quality,arrival_scheduledTimeLocal,arrival_actualTimeLocal,arrival_runwayTimeLocal,...,departure_gate,departure_runway,arrival_terminal,arrival_runway,location_pressureAltFt,location_gsKt,location_trackDeg,location_reportedAtUtc,location_lat,location_lon
0,EC 75MF,EJU15HE,Arrived,IsOperator,False,Unknown,[],2022-04-07 08:56+02:00,2022-04-07 19:20+02:00,2022-04-07 19:20+02:00,...,,,,,,,,,,
1,LH 192,DLH9PW,Arrived,IsOperator,False,Frankfurt-am-Main,"[Basic, Live]",2022-04-07 17:55+02:00,2022-04-07 19:04+02:00,2022-04-07 19:04+02:00,...,A11,25C,1.0,,,,,,,
2,BA 986,BAW986,Arrived,IsOperator,False,London,"[Basic, Live]",2022-04-07 18:30+02:00,2022-04-07 19:18+02:00,2022-04-07 19:18+02:00,...,,,1.0,,,,,,,
3,LH 194,DLH4JW,Arrived,IsOperator,False,Frankfurt-am-Main,"[Basic, Live]",2022-04-07 18:55+02:00,2022-04-07 19:43+02:00,2022-04-07 19:43+02:00,...,A19,,1.0,,,,,,,
4,4U 20,GWI020,Arrived,IsOperator,False,Cologne,[],2022-04-07 19:09+02:00,2022-04-07 19:09+02:00,2022-04-07 19:09+02:00,...,,,,,,,,,,


In [None]:
def clean_arrivals(df):
    # rename_columns
    df.rename(columns={
        "number": "flight_number",
        "call_sign": "flight_call_sign",
        "status": "flight_status",
        "is_cargo": "flight_is_cargo",
    })
    


## Data Cleaning Pipeline

### Init Pipeline

In [12]:
@log_step
def init_pipeline(df):
    return  df.copy()

### Rename Columns

In [26]:
@log_step
def rename_columns(df):
    return  (
    df.rename(columns={
        "id": "city_id",
    })
    )

### Drop Columns

In [7]:
@log_step
def drop_columns(df):
    return  df.drop(columns=["city_state"])

### Add Columns

In [25]:
@log_step
def add_columns(df):
    return  (
        df
        .assign(municipality_country = lambda x: x["city_name"] + "," + x["city_country"])
        .assign(created_at = dt.datetime.now())
    )

### Adjust Datatypes

In [24]:

def adjust_datatypes(df):
    # df["city_id"] = df["city_id"].astype("int64").astype("string")
    return df 


### Send to DB

In [15]:
def send_to_DB(df, table_name, if_exists="replace"):      
    con = f'mysql+pymysql://{os.environ["DB_USER"]}:{os.environ["DB_PASSWORD"]}@{os.environ["DB_HOST"]}:{os.environ["DB_PORT"]}/{os.environ["DB_SCHEMA"]}'
    df.to_sql(
        table_name, 
        con=con, 
        if_exists=if_exists,
        index=False,
        dtype={
            'city_id': sqlalchemy.types.VARCHAR(length=30),
            
        }
    )
    engine = sqlalchemy.create_engine(con)
    with engine.connect() as engine:
        engine.execute('ALTER TABLE `cities` ADD PRIMARY KEY (`municipality_country`);')
    return df


## Lambda Handler

In [90]:

def lambda_handler():
    airports = get_airports_from_db(con)
    responses_list = get_flight_schedules(airports)
    [arrivals, departures] = unpack_responses(responses_list)
    arrivals = clean_arrivals(arrivals)
    #departures = clean_arrivals(departures)
    #send_to_DB([arrivals, departures])
    return arrivals

    
    

In [95]:
arrivals = lambda_handler()