In [ ]:
# Import necessary functions
from pyspark.sql.functions import col, count, avg, sum, desc

In [ ]:
# 1. Define configuration
silver_catalog = "main"
silver_schema = "silver"
gold_catalog = "main"
gold_schema = "gold"

silver_table = f"{silver_catalog}.{silver_schema}.user_activity"
gold_table_1_name = f"{gold_catalog}.{gold_schema}.events_by_country"
gold_table_2_name = f"{gold_catalog}.{gold_schema}.user_session_stats"

In [ ]:
# 2. Create the Gold Schema if it doesn't exist
spark.sql(f"CREATE SCHEMA IF NOT EXISTS {gold_catalog}.{gold_schema}")

In [ ]:
# 3. Load Silver table
silver_df = spark.read.table(silver_table)

In [ ]:
# 4. Create Gold Aggregate 1: Event Counts by Country
gold_events_by_country = silver_df.groupBy("country", "event_type") \
                                 .agg(count("*").alias("event_count")) \
                                 .orderBy(desc("event_count"))

In [ ]:
# 5. Create Gold Aggregate 2: User Session Statistics
gold_user_session_stats = silver_df.groupBy("user_id", "first_name", "last_name", "country") \
                                  .agg(
                                     avg("session_duration_sec").alias("avg_session_sec"),
                                     sum("session_duration_sec").alias("total_session_sec"),
                                     count("*").alias("total_events")
                                   )

In [ ]:
# 6. Write to Gold Delta Tables
gold_events_by_country.write \
 .format("delta") \
 .mode("overwrite") \
 .saveAsTable(gold_table_1_name)

gold_user_session_stats.write \
 .format("delta") \
 .mode("overwrite") \
 .saveAsTable(gold_table_2_name)

print(f"Successfully created Gold tables:")
print(f"- {gold_table_1_name}")
print(f"- {gold_table_2_name}")