##### Imports

In [0]:
from pyspark.sql import functions as F

##### Variables

In [0]:
storage_account_name  = "stacjprd001"
database_name = "gold_users"
container_name = "cont-dt-mst"

container_path = f"abfss://{container_name}@{storage_account_name}.dfs.core.windows.net/gold/{database_name}"


table_country = "users_country" 
table_cards_money_transfer = "users_cards"
table_employment = "users_area_premium"

delta_table_country_path = f"abfss://{container_name}@{storage_account_name}.dfs.core.windows.net/silver/{database_name}/{table_country}"
delta_table_cards_path = f"abfss://{container_name}@{storage_account_name}.dfs.core.windows.net/silver/{database_name}/{table_cards}"
delta_table_employment_path = f"abfss://{container_name}@{storage_account_name}.dfs.core.windows.net/silver/{database_name}/{table_employment}"

##### Create Database

In [0]:
spark.sql(f"""CREATE DATABASE IF NOT EXISTS {database_name} LOCATION '{container_path}'""")

DataFrame[]

##### Get Data from Silver

In [0]:
df_table_users_clean = spark.table('silver_users.users_clean')

##### Gold Tables

In [0]:
df_groupby_country = df_table_users_clean.groupby('DT_REFE_CRGA', 'NM_PAIS', 'NM_ESTADO').agg(
    F.count('NM_USUA').alias('QTD_USUA')
)

df_tp_conta_money_trasfer = df_table_users_clean \
    .filter(F.col('DS_MTD_PGTO') == 'Money transfer') \
    .groupby('DT_REFE_CRGA','TP_CONTA', 'DS_FREQ_PGTO').agg(
        F.count('NM_USUA').alias('QTD_USUA')
    )

df_area_premium = df_table_users_clean.groupby('DT_REFE_CRGA','DS_AREA').agg(
    F.sum(F.when(F.col('TP_CONTA') == 'Premium', 1).otherwise(0)).alias('QTD_PREMIUM'),
    F.count('NM_USUA').alias('QTD_TOTAL')
)

##### Write Table

In [0]:
df_groupby_country.write \
    .format("delta") \
    .mode("overwrite") \
    .partitionBy('DT_REFE_CRGA') \
    .option("path", delta_table_country_path) \
    .saveAsTable(f"{database_name}.{table_country}")

df_tp_conta_money_trasfer.write \
    .format("delta") \
    .mode("overwrite") \
    .partitionBy('DT_REFE_CRGA') \
    .option("path", delta_table_cards_path) \
    .saveAsTable(f"{database_name}.{table_cards}")

df_area_premium.write \
    .format("delta") \
    .mode("overwrite") \
    .partitionBy('DT_REFE_CRGA') \
    .option("path", delta_table_employment_path) \
    .saveAsTable(f"{database_name}.{table_employment}")

##### Output

In [0]:
%sql
select * from gold_users.users_area_premium

DT_REFE_CRGA,DS_AREA,QTD_PREMIUM,QTD_TOTAL
2025-02-13,Work under pressure,1,9
2025-02-13,Proactive,0,11
2025-02-13,Organisation,1,12
2025-02-13,Teamwork,0,12
2025-02-13,Leadership,1,6
2025-02-13,Technical savvy,1,11
2025-02-13,Communication,2,10
2025-02-13,Networking skills,0,8
2025-02-13,Fast learner,0,3
2025-02-13,Confidence,3,8


In [0]:
%sql
select * from gold_users.users_cards

DT_REFE_CRGA,TP_CONTA,DS_FREQ_PGTO,QTD_USUA
2025-02-13,Standard,Full subscription,1
2025-02-13,Bronze,Full subscription,2
2025-02-13,Free Trial,Annual,1
2025-02-13,Business,Monthly,1
2025-02-13,Standard,Payment in advance,1
2025-02-13,Basic,Payment in advance,2
2025-02-13,Starter,Annual,1
2025-02-13,Free Trial,Full subscription,1
2025-02-13,Free Trial,Payment in advance,1
2025-02-13,Premium,Annual,1


In [0]:
%sql
select * from gold_users.users_country

DT_REFE_CRGA,NM_PAIS,NM_ESTADO,QTD_USUA
2025-02-13,United States,Kansas,2
2025-02-13,United States,Arizona,1
2025-02-13,United States,Arkansas,2
2025-02-13,United States,North Carolina,5
2025-02-13,United States,Oregon,3
2025-02-13,United States,Colorado,1
2025-02-13,United States,Vermont,2
2025-02-13,United States,Texas,1
2025-02-13,United States,West Virginia,3
2025-02-13,United States,Tennessee,5
