In [0]:
from pyspark.sql import SparkSession
from pyspark.sql.functions import col, sin, cos, lit
import math

# again need to connect to manipulate with tables in databricks
spark = SparkSession.getActiveSession()

silver_schema = "model_workspace.silver"
gold_schema = "model_workspace.gold"

# get silver tables by name where I added instead of month for bronze _features
silver_tables = spark.sql(f"SHOW TABLES IN {silver_schema}") \
    .filter("tableName LIKE '%_features'") \
    .select("tableName").rdd.flatMap(lambda x: x).collect()

# get only columns with good corelation and keep some for logging
selected_columns = [
    "Irradiance", "BodyTemperature", "RelativeHumidity",
      "Pressure",
    "SunZenith", "Month", "Day", 
    "DayofTheYear", "Hour", "UnixTime", "DayLength"
]
final_columns = [
    "Irradiance", "BodyTemperature", 
    "RelativeHumidity", "Pressure",
    "SunZenith", "DayOfTheYear", "Hour", 
    "SinHour", "CosHour", "UnixTime", "DayLength"
]


#same as in silver first we need to check if table is in gold schema and then save it if not

for table_name in silver_tables:
    silver_table = f"{silver_schema}.{table_name}"
    gold_table = f"{gold_schema}.{table_name.replace('_features', '_gold')}"

    if spark._jsparkSession.catalog().tableExists(gold_table):
        print(f"skipping existing : {gold_table}")
        continue

    print(f"procesing: {silver_table} to {gold_table}")

    try:
        df = spark.read.table(silver_table)
        df = df.select(*selected_columns)

        
        df = df.withColumn("SinHour", sin(2 * math.pi * col("Hour") / lit(24)))
        df = df.withColumn("CosHour", cos(2 * math.pi * col("Hour") / lit(24)))

        df = df.drop("Hour")

        # if for some reason tables were not sorted it could cause problems during training
        df = df.orderBy("UnixTime")
        df = df.select(*final_columns)
        df.write.format("delta").mode("overwrite").saveAsTable(gold_table)
        print(f" saved to gold schema:  {gold_table}")

    except Exception as e:
        print(f"err {table_name}: {e}")
