## SILVER LAYER TRANSFORMATIONS

### CONNECT DATA LAKE BRONZE LAYER

In [0]:
spark.conf.set("fs.azure.account.auth.type.<storage-account>.dfs.core.windows.net", "OAuth")
spark.conf.set("fs.azure.account.oauth.provider.type.<storage-account>.dfs.core.windows.net", "org.apache.hadoop.fs.azurebfs.oauth2.ClientCredsTokenProvider")
spark.conf.set("fs.azure.account.oauth2.client.id.<storage-account>.dfs.core.windows.net", "<ms-entra-app-id>")
spark.conf.set("fs.azure.account.oauth2.client.secret.<storage-account>.dfs.core.windows.net", "<secret-value>")
spark.conf.set("fs.azure.account.oauth2.client.endpoint.<storage-account>.dfs.core.windows.net", "https://login.microsoftonline.com/<tenant-id>/oauth2/token")

### IMPORT MODULES AND VARIABLES

In [0]:
from pyspark.sql import SparkSession
from pyspark.sql.functions import col, to_date
import json

### MAKE TRANSFORMATIONS

In [0]:

spark = SparkSession.builder.appName("StockDataPipeline").getOrCreate()

for crtpyo in ['btc', 'eth', 'doge', 'xrp', 'usdt']:
    bronze_path = f"abfss://bronze@<storage-account>.dfs.core.windows.net/crypto/{crtpyo}.json"
    silver_path = f"abfss://silver@<storage-account>.dfs.core.windows.net/crypto/{crtpyo}.csv"
    crypto_json = spark.read.text(bronze_path).collect()[0][0]
    crypto_dict = json.loads(crypto_json)

    # Extract metadata
    currency_name = crypto_dict["Meta Data"]["3. Digital Currency Name"]

    # Extract and flatten time series data
    records = []
    for date, values in crypto_dict["Time Series (Digital Currency Daily)"].items():
        records.append({
            "name": currency_name,
            "date": date,
            "close": float(values["4. close"]),
            "volume": int(float(values["5. volume"]))  # Cast to int
        })

    # Convert to Spark DataFrame
    df = spark.createDataFrame(records)

    # Cast date column
    df = df.withColumn("date", to_date("date", "yyyy-MM-dd"))

    # Check if silver CSV already exists
    from pyspark.sql.utils import AnalysisException

    try:
        existing_df = spark.read.option("header", "true").csv(silver_path)
        existing_df = existing_df.withColumn("date", to_date("date"))

        # Merge: filter only new dates
        new_df = df.join(existing_df, on="date", how="left_anti")
        print(f"🆕 Found {new_df.count()} new rows to append.")
        new_df.coalesce(1).write.mode("append").option("header", "true").csv(silver_path)
    except AnalysisException:
        print("📁 Silver file doesn't exist. Writing full dataset.")
        df.coalesce(1).write.mode("overwrite").option("header", "true").csv(silver_path)

📁 Silver file doesn't exist. Writing full dataset.
📁 Silver file doesn't exist. Writing full dataset.
📁 Silver file doesn't exist. Writing full dataset.
📁 Silver file doesn't exist. Writing full dataset.
📁 Silver file doesn't exist. Writing full dataset.
