In [0]:
# 02_silver_sector_transformation_with_logging.py

from pyspark.sql.functions import col, to_date
from datetime import datetime

try:
    # –ß–∏—Ç–∞—î–º–æ –≤—Å—ñ –¥–µ–ª—å—Ç–∞-—Ç–∞–±–ª–∏—Ü—ñ –±—Ä–æ–Ω–∑ —à–∞—Ä—É –ø–æ —Ç–∏–∫–µ—Ä–∞–º
    tickers = ["aapl", "msft", "jpm", "bac"]

    bronze_dfs = []
    for t in tickers:
        table_name = f"bronze_{t}"
        df = spark.read.format("delta").table(table_name)
        bronze_dfs.append(df)

    # –û–±'—î–¥–Ω–∞–Ω–Ω—è
    bronze_df = bronze_dfs[0]
    for df in bronze_dfs[1:]:
        bronze_df = bronze_df.unionByName(df)

    # —Ç—Ä–∞–Ω—Å—Ñ–æ—Ä–º–∞—Ü—ñ—è —Ç–∞ –æ—á–∏—Å—Ç–∫–∞
    silver_df = bronze_df \
        .withColumnRenamed("Date", "date") \
        .withColumn("date", to_date(col("date"))) \
        .dropna(subset=["Close", "Volume"]) \
        .dropDuplicates(["date", "Ticker"])

    # –∑–±–µ—Ä–µ–∂–µ–Ω–Ω—è  –≤ Silver  (delta table)
    silver_df.write.format("delta").mode("overwrite").saveAsTable("silver_sector_data")

    # –ª–æ–≥ –≤–∏–∫–æ–Ω–∞–Ω–Ω—è
    log_entry = [{
        "step": "transformation",
        "layer": "silver",
        "timestamp": datetime.now().strftime("%Y-%m-%d %H:%M:%S"),
        "status": "success",
        "rows_processed": silver_df.count(),
        "comment": "–ø–æ—î–¥–Ω–∞–Ω–Ω—è —Ç–∞ –æ—á–∏—Å—Ç–∫–∞ –¥–∞–Ω–Ω–∏—Ö –∑ –±—Ä–æ–Ω–∑–æ–≤–æ–≥–æ —à–∞—Ä—É"
    }]
    
except Exception as e:
    # —è–∫—â–æ –ø–æ–º–∏–ª–∫–∞ –ª–æ–≥  —Å –ø–æ–º–∏–ª–∫–æ—é —Ç–∞ –∏ –Ω—É–ª–µ–º —Å—Ç—Ä–æ–∫–∏
    log_entry = [{
        "step": "transformation",
        "layer": "silver",
        "timestamp": datetime.now().strftime("%Y-%m-%d %H:%M:%S"),
        "status": f"error: {str(e)}",
        "rows_processed": 0,
        "comment": "–ø–æ–º–∏–ª–∫–∞ —Ç—Ä–∞–Ω—Å—Ñ–æ—Ä–º–∞—Ü—ñ—ó –¥–∞–Ω–∏—Ö –±—Ä–æ–Ω–∑"
    }]
    print(f"‚ùå –ø–æ–º–∏–ª–∫–∞ —Ç—Ä–∞–Ω—Å—Ñ–æ—Ä–º–∞—Ü—ñ—ó silver: {e}")

finally:
    # –∑–±–µ—Ä–µ–∂–µ–Ω–Ω—è –ª–æ–≥ –≤ pipeline_logs, 
    log_df = spark.createDataFrame(log_entry)
    log_df.write.format("delta").mode("append").saveAsTable("pipeline_logs")
    print("\nüìÑ –õ–æ–≥ —Ç—Ä–∞–Ω—Å—Ñ–æ—Ä–º–∞—Ü—ñ—ó silver –æ–Ω–æ–≤–ª–µ–Ω.")

# —Ä–µ–∑—É–ª—å—Ç–∞—Ç –±–µ–∑ –ø–æ–º–∏–ª–æ–∫
if log_entry[0]["status"] == "success":
    display(spark.table("silver_sector_data").limit(10))



üìÑ –õ–æ–≥ —Ç—Ä–∞–Ω—Å—Ñ–æ—Ä–º–∞—Ü—ñ—ó silver –æ–Ω–æ–≤–ª–µ–Ω.


date,Open,High,Low,Close,Volume,Dividends,Stock_Splits,Ticker,Sector
2024-08-05,198.1632471674405,212.506173331003,195.0876345333798,208.29586791992188,119548600,0.0,0.0,AAPL,Technology
2025-01-21,223.4609453061096,223.87993275191027,218.85206818023312,222.10421752929688,98070400,0.0,0.0,AAPL,Technology
2024-08-21,225.7265541171402,227.18143154067607,224.26170199000447,225.60696411132807,34765500,0.0,0.0,AAPL,Technology
2024-10-17,232.6123377292532,233.0308799404877,229.71254238803704,231.3368225097656,32993800,0.0,0.0,AAPL,Technology
2025-03-18,213.87952928252255,214.8682229774316,211.2130278733229,212.4114532470703,42432400,0.0,0.0,AAPL,Technology
2025-04-02,221.03016017035927,224.89508702876748,220.73055001137132,223.59678649902344,35905900,0.0,0.0,AAPL,Technology
2024-08-30,229.3836948285937,229.59295072736157,226.68318070354871,228.19786071777344,52990800,0.0,0.0,AAPL,Technology
2024-10-04,227.1017072618697,227.20136306464835,223.3449237414969,226.0055694580078,37245100,0.0,0.0,AAPL,Technology
2025-02-12,230.89721134547665,236.64967765066984,230.377888084301,236.55978393554688,45243300,0.0,0.0,AAPL,Technology
2024-06-04,193.7339558758535,194.41079839334415,192.1314497713321,193.4453125,47471400,0.0,0.0,AAPL,Technology
