##### Imports

In [0]:
from pyspark.sql import functions as F

##### Variables

In [0]:
storage_account_name  = "stacjprd001"
database_name = "silver_users"

table_name = "users_clean" 
container_name = "cont-dt-mst"

container_path = f"abfss://{container_name}@{storage_account_name}.dfs.core.windows.net/silver/{database_name}"
delta_table_path = f"abfss://{container_name}@{storage_account_name}.dfs.core.windows.net/silver/{database_name}/{table_name}"

##### Create Database

In [0]:
spark.sql(f"""CREATE DATABASE IF NOT EXISTS {database_name} LOCATION '{container_path}'""")

DataFrame[]

##### Get Data from Bronze

In [0]:
df_table_users = spark.table('bronze_users.users')

##### Clean Data

In [0]:
df_users_clean = df_table_users\
    .select('dat_ref_carga','first_name','gender', 'employment', 'address', 'subscription') \
    .withColumn("employment", F.expr("substring(employment, 2, length(employment) - 2)")) \
    .withColumn("address", F.expr("substring(address, 2, length(address) - 2)")) \
    .withColumn("subscription", F.expr("substring(subscription, 2, length(subscription) - 2)")) \
    .withColumn("DS_CARGO", F.split(F.col("employment"), ", ")[0]) \
    .withColumn("DS_AREA", F.split(F.col("employment"), ", ")[1]) \
    .withColumn("NM_ESTADO", F.split(F.col("address"), ", ")[4]) \
    .withColumn("NM_PAIS", F.split(F.col("address"), ", ")[5]) \
    .withColumn("TP_CONTA", F.split(F.col("subscription"), ", ")[0]) \
    .withColumn("ST_CONTA", F.split(F.col("subscription"), ", ")[1]) \
    .withColumn("DS_MTD_PGTO", F.split(F.col("subscription"), ", ")[2]) \
    .withColumn("DS_FREQ_PGTO", F.split(F.col("subscription"), ", ")[3]) \
    .withColumnRenamed('gender', 'NM_GENERO')\
    .withColumnRenamed('first_name', 'NM_USUA')\
    .withColumnRenamed('dat_ref_carga', 'DT_REFE_CRGA')\
    .drop('employment','address', 'subscription')

##### Write Table

In [0]:
df_users_clean.write \
    .format("delta") \
    .mode("overwrite") \
    .partitionBy('DT_REFE_CRGA') \
    .option("path", delta_table_path) \
    .saveAsTable(f"{database_name}.{table_name}")

##### Output

In [0]:
%sql
select * from silver_users.users_clean

DT_REFE_CRGA,NM_USUA,NM_GENERO,DS_CARGO,DS_AREA,NM_ESTADO,NM_PAIS,TP_CONTA,ST_CONTA,DS_MTD_PGTO,DS_FREQ_PGTO
2025-02-13,Keeley,Agender,Consulting Manager,Teamwork,Indiana,United States,Bronze,Active,Bitcoins,Full subscription
2025-02-13,Kenton,Agender,Senior Education Representative,Problem solving,Nebraska,United States,Essential,Idle,Cash,Full subscription
2025-02-13,Maryanne,Polygender,Retail Liaison,Networking skills,Iowa,United States,Business,Blocked,Apple Pay,Monthly
2025-02-13,Colin,Male,Regional Government Supervisor,Self-motivated,Pennsylvania,United States,Essential,Idle,Google Pay,Payment in advance
2025-02-13,Guy,Polygender,Mining Specialist,Confidence,Utah,United States,Standard,Blocked,Credit card,Full subscription
2025-02-13,Delbert,Female,Future Liaison,Problem solving,Florida,United States,Diamond,Pending,Cash,Annual
2025-02-13,Sharmaine,Genderfluid,Chief Officer,Networking skills,Idaho,United States,Gold,Active,Cash,Payment in advance
2025-02-13,Allyn,Genderfluid,National Government Supervisor,Work under pressure,Maine,United States,Free Trial,Blocked,WeChat Pay,Annual
2025-02-13,Faviola,Bigender,Future Orchestrator,Technical savvy,Arkansas,United States,Standard,Active,WeChat Pay,Payment in advance
2025-02-13,Noe,Male,District Marketing Producer,Problem solving,Indiana,United States,Diamond,Active,Cheque,Payment in advance
