# Imports

In [0]:
import requests
from io import StringIO
import pandas as pd
from pyspark.sql import SparkSession


# Read raw data

In [0]:
def load_csvs_to_spark(files_dict, sep=";", encoding="ISO-8859-1"):
    """
    Downloads CSVs from URLs and creates Spark DataFrames and temporary tables.
    
    Parameters:
    - files_dict: dictionary {table_name: csv_url}
    - sep: CSV delimiter
    - encoding: CSV encoding
    """
    dfs = {}
    
    for table_name, url in files_dict.items():
        print(f"Downloading and loading {table_name}...")
        
        # Donwload CSV
        response = requests.get(url)
        response.raise_for_status()
        
        # Convert to StringIO
        csv_data = StringIO(response.content.decode(encoding))
        
        # Pandas read-
        pdf = pd.read_csv(csv_data, sep=sep)
        
        # Convert to Spark DataFrame
        df = spark.createDataFrame(pdf)
        
        # temporary spark table
        df.createOrReplaceTempView(table_name.split()[0])
        
        # Save in dict
        dfs[table_name.split(".")[0]] = df
        
        print(f" Table '{table_name.split('.')[0]}' is ready in Spark\n")
    
    return dfs


In [0]:
files = {
    "recommendation": "https://dedalo.sti.fab.mil.br/dadosabertos/recomendacao.csv",
    "significant_factor": "https://dedalo.sti.fab.mil.br/dadosabertos/fator_contribuinte.csv",
    "type_occurrence": "https://dedalo.sti.fab.mil.br/dadosabertos/ocorrencia_tipo.csv",
    "occurrence": "https://dedalo.sti.fab.mil.br/dadosabertos/ocorrencia.csv",
    "aircraft": "https://dedalo.sti.fab.mil.br/dadosabertos/aeronave.csv"
}

dfs = load_csvs_to_spark(files)

In [0]:
dfs.keys()

In [0]:
df_recommendation = dfs["recommendation"]
df_significant_factor = dfs["significant_factor"]
df_type_occurrence = dfs["type_occurrence"]
df_occurrence = dfs["occurrence"]
df_aircraft = dfs["aircraft"]

# Save delta tables in bronze layer


In [0]:
dataframes = {
    "gov_recommendation": df_recommendation,
    "gov_significant_factor": df_significant_factor,
    "gov_type_occurrence": df_type_occurrence,
    "gov_occurrence": df_occurrence,
    "gov_aircraft": df_aircraft
}

for table_name, dataframe in dataframes.items():
    dataframe.write.format("delta") \
        .mode("overwrite") \
        .saveAsTable(f"anac_aeronautical_occurrences_in_brazilian_civil_aviation.bronze_layer.{table_name}")

print("All tables saved in bronze layer.")