## Consultado os arquivos na camada silver

## Criando diretório Bronze

In [0]:
BRONZE_PATH = "/Volumes/workspace/default/delta/bronze"
SILVER_PATH = "/Volumes/workspace/default/delta/silver"

# Criar diretórios se não existirem
dbutils.fs.mkdirs(SILVER_PATH)

In [0]:
display(dbutils.fs.ls(f"{BRONZE_PATH}/flight/"))

## Carregando as tabelas da camada bronze


In [0]:
# Carregar Bronze
df_voos_silver = spark.read.format("delta").load("dbfs:/Volumes/workspace/default/delta/bronze/flight/registro_voos")
df_codigos_silver = spark.read.format("delta").load("dbfs:/Volumes/workspace/default/delta/bronze/flight/codigo_voos/")


## Explorando as tabelas

In [0]:
%py
df_voos_silver.createOrReplaceTempView("df_voos_silver")
df_codigos_silver.createOrReplaceTempView("df_codigos_silver")

In [0]:
%sql
SELECT * FROM df_voos_silver LIMIT 10;

In [0]:
%sql
SELECT * FROM df_codigos_silver LIMIT 10;

In [0]:
display(df_voos_silver.columns)


In [0]:
%sql
SELECT 
dv.date, dv.delay, dv.distance,
dc.City city_origin, dc.State state_origin, dc.Country country_origin, dc.IATA cod_origin,
do.City city_destination, do.State state_destination, do.Country country_destination, do.IATA cod_destination,
CASE
        WHEN dv.delay > 0 THEN "Adiantado"
        WHEN dv.delay = 0 THEN "A tempo"
        WHEN dv.delay < 0 THEN "Atrasado"
        ELSE "ERRO"
    END AS status_voo,
CASE
        WHEN do.Country  <> dc.Country THEN "Internacional"
        WHEN do.Country = dc.Country THEN "Nacional"
        ELSE "ERRO"
    END AS nacionalidade_voo,
CASE
        WHEN do.State <> dc.State AND do.Country = dc.Country THEN True
        WHEN do.State <> dc.State AND do.Country <> dc.Country THEN False
        ELSE null
    END AS interestadual
FROM df_voos_silver dv 
LEFT JOIN df_codigos_silver dc ON dv.origin = dc.IATA 
LEFT JOIN df_codigos_silver do ON dv.destination = do.IATA 
LIMIT 50;


In [0]:
df_query = spark.sql("""
SELECT 
dv.date, dv.delay, dv.distance,
dc.City city_origin, dc.State state_origin, dc.Country country_origin, dc.IATA cod_origin,
do.City city_destination, do.State state_destination, do.Country country_destination, do.IATA cod_destination,
CASE
        WHEN dv.delay > 0 THEN "Adiantado"
        WHEN dv.delay = 0 THEN "A tempo"
        WHEN dv.delay < 0 THEN "Atrasado"
        ELSE "ERRO"
    END AS status_voo,
CASE
        WHEN do.Country  <> dc.Country THEN "Internacional"
        WHEN do.Country = dc.Country THEN "Nacional"
        ELSE "ERRO"
    END AS nacionalidade_voo,
CASE
        WHEN do.State <> dc.State AND do.Country = dc.Country THEN True
        WHEN do.State <> dc.State AND do.Country <> dc.Country THEN False
        ELSE null
    END AS interestadual
FROM df_voos_silver dv 
LEFT JOIN df_codigos_silver dc ON dv.origin = dc.IATA 
LEFT JOIN df_codigos_silver do ON dv.destination = do.IATA 
""")


In [0]:
df_query.write.format("delta") \
    .mode("overwrite") \
    .option("mergeSchema", "true") \
    .save(f"{SILVER_PATH}/flight/dados_voo")