# Data Transformation and Aggregation by Identifier "Ano"

## Environment Configuration

In [0]:
from pyspark.sql.utils import AnalysisException
from src.utils.udfs import functions_for_df_structure_management as ffdsm

## Data Ingestion from Bronze Layer

In [0]:
df_assets_and_rights = spark.table("brazilian_tax_big_numbers.bronze_layer.delta_bens_e_direitos")

In [0]:
df_debts_and_liabilities = spark.table("brazilian_tax_big_numbers.bronze_layer.delta_dividas_e_onus")

In [0]:
df_payments_and_donations = spark.table("brazilian_tax_big_numbers.bronze_layer.delta_pagamentos_e_doacoes")

## Data Transformation

### "Bens e Direitos"

In [0]:
df_assets_and_rights = df_assets_and_rights.dropna(how='all')

In [0]:
df_casted_assets_and_rights = ffdsm.cast_columns_to_float(df_assets_and_rights, ["AnoCalendario"])

In [0]:
df_casted_assets_and_rights = ffdsm.rename_columns_with_df_name(df_casted_assets_and_rights, "BensEDireitos", ["AnoCalendario"])

In [0]:
dbutils.data.summarize(df_casted_assets_and_rights)

In [0]:
df_nulls = ffdsm.count_nulls(df_casted_assets_and_rights)
display(df_nulls)

### "Dívidas e Onus"

In [0]:
df_debts_and_liabilities = df_debts_and_liabilities.dropna(how='all')

In [0]:
df_casted_debts_and_liabilities = ffdsm.cast_columns_to_float(df_debts_and_liabilities, ["AnoCalendario"])

In [0]:
df_casted_debts_and_liabilities = ffdsm.rename_columns_with_df_name(df_casted_debts_and_liabilities, "DividasEOnus", ["AnoCalendario"])

In [0]:
dbutils.data.summarize(df_casted_debts_and_liabilities)

In [0]:
df_nulls = ffdsm.count_nulls(df_casted_debts_and_liabilities)
display(df_nulls)

### "Pagamentos e Doações"

In [0]:
df_payments_and_donations = df_payments_and_donations.dropna(how='all')

In [0]:
df_casted_payments_and_donations = ffdsm.cast_columns_to_float(df_payments_and_donations, ["AnoCalendario"])

In [0]:
df_casted_payments_and_donations = ffdsm.rename_columns_with_df_name(df_casted_payments_and_donations, "PagamentosEDoacoes", ["AnoCalendario"])

In [0]:
dbutils.data.summarize(df_casted_payments_and_donations)

In [0]:
df_nulls = ffdsm.count_nulls(df_casted_payments_and_donations)
display(df_nulls)

## Aggregation By "Ano"

In [0]:
df_joined_assets_and_rights_debts_and_liabilities = df_casted_assets_and_rights.join(df_casted_debts_and_liabilities, on="AnoCalendario", how="inner")

In [0]:
df_joined_assets_and_rights_debts_and_liabilities_payments_and_donations = df_joined_assets_and_rights_debts_and_liabilities.join(df_casted_payments_and_donations, on="AnoCalendario", how="inner")
display(df_joined_assets_and_rights_debts_and_liabilities_payments_and_donations)

## Save as Delta in Silver Layer

In [0]:
spark.sql(f"CREATE SCHEMA IF NOT EXISTS brazilian_tax_big_numbers.silver_layer")

In [0]:
error = None

try:
    df_joined_assets_and_rights_debts_and_liabilities_payments_and_donations.write \
        .mode("overwrite") \
        .saveAsTable(f"brazilian_tax_big_numbers.silver_layer.delta_bens_dividas_pagamentos")
    error = None
except Exception as e:
    error = str(e)
    print(error)