In [None]:
# 03_silver_transformation — renommage strict + nettoyage léger

from pyspark.sql import functions as F
from pyspark.sql import types as T
import os

spark.sql("USE CATALOG hive_metastore")
spark.sql("CREATE SCHEMA IF NOT EXISTS silver")
spark.sql("USE SCHEMA silver")

storage_account_name = os.environ["STORAGE_ACCOUNT_NAME"]
storage_account_key  = os.environ["STORAGE_ACCOUNT_KEY"]
spark.conf.set(f"fs.azure.account.key.{storage_account_name}.dfs.core.windows.net", storage_account_key)

silver_plv_dir = f"abfss://silver@{storage_account_name}.dfs.core.windows.net/plv_silver/"
silver_res_dir = f"abfss://silver@{storage_account_name}.dfs.core.windows.net/result_silver/"

# ----------------- MAPPINGS FOURNIS -----------------

rename_plv = {
    "cddept": "code_dept",
    "cdreseau": "code_reseau",
    "inseecommuneprinc": "code_insee_commune",
    "nomcommuneprinc": "nom_commune",
    "cdreseauamont": "code_reseau_amont",
    "nomreseauamont": "nom_reseau_amont",
    "pourcentdebit": "pourcentage_debit",
    "referenceprel": "reference_prelevement",
    "dateprel": "date_prelevement",
    "heureprel": "heure_prelevement",
    "conclusionprel": "conclusion_prelevement",
    "ugelib": "uge_libelle",
    "distrlib": "distri_libelle",
    "moalib": "moa_libelle",
    "plvconformitebacterio": "conformite_bacteriologiq",
    "plvconformitechimique": "conformite_chimique",
    "plvconformitereferencebact": "conformite_reference_bacteriologiq",
    "plvconformitereferencechim": "conformite_reference_chimique",
    "_source_blob": "source",
    "_year": "annee"
}

rename_res = {
    "cddept": "code_dept",
    "referenceprel": "reference_prelevement",
    "cdparametresiseeaux": "code_parametre_sise_eau",
    "cdparametre": "code_parametre",
    "libmajparametre": "libelle_maj_parametre",
    "libminparametre": "libelle_min_parametre",
    "libwebparametre": "libelle_web_parametre",
    "qualitparam": "qualite_parametre",
    "insituana": "insitu_analyse",
    "rqana": "rg_analyse",
    "cdunitereferencesiseeaux": "code_unite_reference_sise_eau",
    "cdunitereference": "code_unite_reference",
    "limitequal": "limite_qualite",
    "refqual": "reference_qualite",
    "valtraduite": "validite_traduite",
    "casparam": "cas_parametre",
    "referenceanl": "reference_analyse",
    "dateanl": "date_analyse",
    "heureanl": "heure_analyse",
    "_source_blob": "source",
    "_year": "annee"
}

def apply_renaming(df, mapping):
    for old, new in mapping.items():
        if old in df.columns:
            df = df.withColumnRenamed(old, new)
    return df

def normalize(df):
    # trim strings
    for c, t in df.dtypes:
        if t == "string":
            df = df.withColumn(c, F.trim(F.col(c)))
    # cast annee en entier si présent
    if "annee" in df.columns:
        df = df.withColumn("annee", F.col("annee").cast(T.IntegerType()))
    return df

# ---------------- SILVER PLV ----------------

plv = spark.table("hive_metastore.bronze.plv_bronze")
plv = apply_renaming(plv, rename_plv)
plv = normalize(plv)

(plv.write
    .format("delta")
    .mode("overwrite")
    .option("overwriteSchema", "true")
    .option("path", silver_plv_dir)
    .saveAsTable("hive_metastore.silver.plv_silver"))

# ---------------- SILVER RESULT ----------------

res = spark.table("hive_metastore.bronze.result_bronze")
res = apply_renaming(res, rename_res)
res = normalize(res)

(res.write
    .format("delta")
    .mode("overwrite")
    .option("overwriteSchema", "true")
    .option("path", silver_res_dir)
    .saveAsTable("hive_metastore.silver.result_silver"))
