In [1]:
import requests
from sqlalchemy import create_engine, types
import pandas as pd

In [2]:
engine = create_engine('postgresql://data_engineer_test_user:8WBNsM8B^?eDpN$q@localhost:5432/data_engineer_test')

In [3]:
dtype={
    "id": types.INT(),
    "iata": types.VARCHAR(length=4),
    "icao": types.VARCHAR(length=4),
    "name": types.VARCHAR(length=250),
    "location": types.VARCHAR(length=100),
    "street_number": types.VARCHAR(length=20),
    "street": types.VARCHAR(length=200),
    "city": types.VARCHAR(length=150),
    "county": types.VARCHAR(length=150),
    "state": types.VARCHAR(length=100),
    "country_iso": types.VARCHAR(length=2),
    "country": types.VARCHAR(length=150),
    "postal_code": types.VARCHAR(length=100),
    "phone": types.VARCHAR(length=150),
    "latitude": types.NUMERIC(),
    "longitude": types.NUMERIC(),
    "uct": types.NUMERIC(),
    "website": types.VARCHAR(length=255)
}

In [4]:
URL = "https://airport-info.p.rapidapi.com/airport"

headers = {
    'x-rapidapi-host': "airport-info.p.rapidapi.com",
    'x-rapidapi-key': "cbf61af819mshbc32ed44053a4f7p17578djsn221f2e3246ac"
}

In [5]:
# faz a requisição para api, utilizando como parametro a chave *icao*
def make_request(api_path, string):
    params = {"icao":string}

    req = requests.get(url=api_path, headers=headers, params=params)
    if req.status_code == requests.codes.ok:
        return req.json()
    else:
        req.raise_for_status()

In [6]:
# Pega as informações do aeródromo através do código ICAO, tanto da origem quanto do destino a partir do banco.
# Juntando em um unico Dataframe. Removendo os valores duplicados.
def get_vra_df():
    with engine.connect() as conn:
        query = ("""
            select distinct
                icao_aerodromo_origem as icao
            FROM
                data_engineer.vra
        """)
        df_icao_aerodromo_origem = pd.read_sql(query, conn)
        
        query = ("""
            select distinct
                icao_aerodromo_destino as icao
            FROM
                data_engineer.vra
        """)
        df_icao_aerodromo_destino = pd.read_sql(query, conn)

        frames = [df_icao_aerodromo_origem, df_icao_aerodromo_destino]

        df_result = pd.concat(frames)

    return df_result.drop_duplicates()

In [7]:
def execute():
    with engine.connect() as conn:
        df_data = get_vra_df()
        
        data_json = []
        for icao in df_data["icao"]:
            data_json.append(make_request(URL, icao))

        # converte de json -> dataframe
        df_data = pd.json_normalize(data_json)
        # df_data.to_csv(f"airport_info.csv", index=False)

        # tira as linhas que tem valores nulo na coluna icao
        df_data.dropna(subset = ["icao"], inplace=True)

        # exclui a coluna de erro
        df_data.drop(["error.text"], inplace=True, axis=1)

        # df_data.to_csv(f"clean_airport_info.csv", index=False)

        df_data.to_sql(
            name='airport_info',
            con=conn,
            schema='data_engineer',
            index=False,
            if_exists='replace',
            dtype=dtype
        )

In [8]:
execute()