In [0]:
from pyspark.sql.functions import datediff, current_date

df_master = spark.table("silver_asset_master")
df_registry = spark.table("silver_asset_registry")

df_dim_asset = (
    df_master.join(
        df_registry,
        df_master.AssetID == df_registry.asset_id,
        "left"
    )
    .select(
        "AssetID",
        "AssetName",
        "Location",
        "InstalledDate",
        "CapacityKW",
        "Status",
        "LastMaintenanceDate",
        "registry_region",
        "Category_registry"
    )
    .dropDuplicates(["AssetID"])
    .withColumn("days_since_maintenance", datediff(current_date(), "LastMaintenanceDate"))
)

df_dim_asset.write \
    .mode("overwrite") \
    .format("delta") \
    .save("abfss://sedpcontainer@sedpstorageaccount.dfs.core.windows.net/Gold/gold_dim_asset_delta")

spark.sql("""
    CREATE TABLE IF NOT EXISTS gold_dim_asset
    USING DELTA
    LOCATION 'abfss://sedpcontainer@sedpstorageaccount.dfs.core.windows.net/Gold/gold_dim_asset_delta'
""")

DataFrame[]

In [0]:
df_technician = (
    spark.table("silver_maintenance_logs")
    .select("Technician")
    .dropDuplicates()
    .filter("Technician IS NOT NULL")
)

df_technician.write \
    .mode("overwrite") \
    .format("delta") \
    .save("abfss://sedpcontainer@sedpstorageaccount.dfs.core.windows.net/Gold/gold_dim_technician_delta")

spark.sql("""
    CREATE TABLE IF NOT EXISTS gold_dim_technician
    USING DELTA
    LOCATION 'abfss://sedpcontainer@sedpstorageaccount.dfs.core.windows.net/Gold/gold_dim_technician_delta'
""")

DataFrame[]

In [0]:
df_registry = spark.table("silver_asset_registry")

df_dim_category = (
    df_registry
    .select("Category_registry", "manufacturer", "model")
    .dropDuplicates()
)

df_dim_category.write \
    .mode("overwrite") \
    .format("delta") \
    .save("abfss://sedpcontainer@sedpstorageaccount.dfs.core.windows.net/Gold/gold_dim_category_registry_delta")

spark.sql("""
    CREATE TABLE IF NOT EXISTS gold_dim_category_registry
    USING DELTA
    LOCATION 'abfss://sedpcontainer@sedpstorageaccount.dfs.core.windows.net/Gold/gold_dim_category_registry_delta'
""")

DataFrame[]

In [0]:
df_maintenance = spark.table("silver_maintenance_logs")

df_dim_activity_type = (
    df_maintenance
    .select("ActivityType", "Issue", "ActionTaken")
    .dropDuplicates()
    .filter("ActivityType IS NOT NULL AND Issue IS NOT NULL AND ActionTaken IS NOT NULL")
)

df_dim_activity_type.write \
    .mode("overwrite") \
    .format("delta") \
    .save("abfss://sedpcontainer@sedpstorageaccount.dfs.core.windows.net/Gold/gold_dim_maintenance_activity_type_delta")

spark.sql("""
    CREATE TABLE IF NOT EXISTS gold_dim_maintenance_activity_type
    USING DELTA
    LOCATION 'abfss://sedpcontainer@sedpstorageaccount.dfs.core.windows.net/Gold/gold_dim_maintenance_activity_type_delta'
""")

DataFrame[]