preparação

In [None]:
import pandas as pd
import sqlparse
from sqlalchemy import create_engine, text
import warnings

warnings.simplefilter(action='ignore', category=FutureWarning)

# Configurações de conexão
DB_URI = "postgresql://admin:admin@localhost:5432/db_aviao"
ARQUIVO_DDL = '../Data Layer/gold/ddl.sql'
engine = create_engine(DB_URI)

In [None]:
# 1) Execução do DDL (Limpeza e Criação das Tabelas)
# ---------------------------------------------------------
with open(ARQUIVO_DDL, 'r', encoding='utf-8') as f:
    # Remove comentários para evitar erro de "empty query"
    sql_ddl_clean = sqlparse.format(f.read(), strip_comments=True)

commands = [cmd.strip() for cmd in sqlparse.split(sql_ddl_clean) if cmd.strip()]
with engine.begin() as conn:
    for cmd in commands:
        conn.execute(text(cmd))


Extração e transformação

In [None]:

# 2) Extração dos Dados e Trasnformaçãos para Dimensões
# ---------------------------------------------------------
df_obt = pd.read_sql("SELECT * FROM silver.aviao", engine)
print(len(df_obt), "registros lidos da Silver.aviao")


def prep_dim(df, mapping):
    """Filtra colunas da Silver, remove duplicatas e renomeia para o DW."""
    return df[list(mapping.keys())].drop_duplicates().rename(columns=mapping)

# Mapeamentos baseados no Dicionário de Dados [cite: 4, 5]
dim_time = prep_dim(df_obt, {
    "event_date": "evt_dat", 
    "publication_date": "pub_dat"
})
dim_sev = prep_dim(df_obt, {
    "injury_severity": "inj_sev",
    "investigation_type": "inv_typ",
    "report_status": "rpt_sta"
})
dim_wth = prep_dim(df_obt, {
    "weather_condition": "wth_con"
})
dim_flt = prep_dim(df_obt, {
    "broad_phase_of_flight": "brd_phs_off_flt"
})
dim_ggp = prep_dim(df_obt, {
    "country": "ctr",
    "latitude": "lat",
    "longitude": "lon", 
    "airport_code": "apt_cod",
    "airport_name": "apt_nam",
    "location": "loc"
})
dim_arc = prep_dim(df_obt, {
    "aircraft_category": "arc_cat", "make": "mak", "model": "mod", 
    "registration_number": "reg_num", "engine_type": "eng_typ", 
    "number_of_engines": "num_off_eng", "amateur_built": "ama_blt", "aircraft_damage": "arc_dam"
})

dim_opt = prep_dim(df_obt, {
    "purpose_of_flight": "prp_off_flt", "schedule": "sch", 
    "air_carrier": "air_car", "far_description": "far_dsc"
})



Carga

In [None]:
resultado = engine.connect().execute(text("SELECT COUNT(*) FROM dw.fat_acc")).scalar()
print(f"ETL FINALIZADO! Total de linhas na Gold: {resultado}")
resultado = engine.connect().execute(text("SELECT COUNT(*) FROM silver.aviao")).scalar()
print(f"ETL FINALIZADO! Total de linhas na Silver: {resultado}")