# Data Transformation of Table "Município de Residência do Declarante e Tipo de Formulário"

## Environment Configuration

In [0]:
from pyspark.sql.utils import AnalysisException
from pyspark.sql.types import StringType
from src.utils.udfs import functions_for_df_structure_management as ffdsm

## Data Ingestion from Bronze Layer

In [0]:
df_municipality_form_type = spark.table("brazilian_tax_big_numbers.bronze_layer.delta_municipio_de_residencia_do_declarante_e_tipo_de_formulario")

In [0]:
display(df_municipality_form_type)

## Data Transformation

In [0]:
df_municipality_form_type = df_municipality_form_type.dropna(how='all')

In [0]:
df_casted_municipality_form_type = ffdsm.cast_columns_to_float(df_municipality_form_type, ["AnoCalendario", "Municipio", "TipoDeFormularioDeDeclaracao"])

In [0]:
df_casted_municipality_form_type = ffdsm.rename_columns_with_df_name(df_casted_municipality_form_type, "MunicipioDeResidenciaDoDeclaranteETipoDeFormulario", ["AnoCalendario", "Municipio", "TipoDeFormularioDeDeclaracao"])

In [0]:
dbutils.data.summarize(df_casted_municipality_form_type)

In [0]:
df_filled_municipality_form_type = ffdsm.fill_nulls(df_casted_municipality_form_type, ["AnoCalendario", "Municipio", "TipoDeFormularioDeDeclaracao"])

In [0]:
df_nulls = ffdsm.count_nulls(df_filled_municipality_form_type)
display(df_nulls)

In [0]:
brazilian_state_abbreviations = ["AC", "AL", "AP", "AM", "BA", "CE", "DF", "ES", "GO", "MA", "MT", "MS", "MG", "PA", "PB", "PR", "PE", "PI", "RJ", "RN", "RS", "RO", "RR", "SC", "SP", "SE", "TO"]

In [0]:
def get_acronym_state_name(city):
    acronym_state_name = "Não se Aplica"
    for acronym in brazilian_state_abbreviations:
        if acronym in city:
            acronym_state_name = acronym
            break
    return acronym_state_name

get_acronym_state_name_udf = udf(get_acronym_state_name, StringType())

df_filled_municipality_form_type = df_filled_municipality_form_type.withColumn(
    "SiglaEstado", get_acronym_state_name_udf(df_filled_municipality_form_type["Municipio"])
)

In [0]:
df_filled_municipality_form_type = df_filled_municipality_form_type.withColumn(
    "EstadoSigla", get_acronym_state_name_udf(df_filled_municipality_form_type["Municipio"])
)

In [0]:
display(df_filled_municipality_form_type)

## Save as Delta in Silver Layer

In [0]:
spark.sql(f"CREATE SCHEMA IF NOT EXISTS brazilian_tax_big_numbers.silver_layer")

In [0]:
error = None

try:
    df_filled_municipality_form_type.write \
        .mode("overwrite") \
        .saveAsTable(f"brazilian_tax_big_numbers.silver_layer.delta_municipio_e_tipo_de_declaracao")
    error = None
except Exception as e:
    error = str(e)
    print(error)