In [0]:
from pyspark.sql.functions import current_timestamp

bronze_path = "dbfs:/Volumes/workspace/ecommerce/bronze/events"

raw_df = spark.read.csv(
    "/Volumes/workspace/ecommerce/ecommerce_data/2019-Oct.csv",
    header=True,
    inferSchema=True
)

bronze_df = raw_df.withColumn("ingestion_ts", current_timestamp())

bronze_df.write.format("delta").mode("overwrite").save(bronze_path)

print(" Bronze layer created")


 Bronze layer created


In [0]:
silver_path = "dbfs:/Volumes/workspace/ecommerce/silver/events"

silver_df = (
    spark.read.format("delta").load(bronze_path)
    .dropna()  # simple cleaning
)

silver_df.write.format("delta").mode("overwrite").save(silver_path)

print(" Silver layer created")


 Silver layer created


In [0]:
import pyspark.sql.functions as F

gold_path = "dbfs:/Volumes/workspace/ecommerce/gold/daily_sales"

silver_df = spark.read.format("delta").load(silver_path)

gold_df = (
    silver_df
    .withColumn("event_date", F.to_date("event_time"))
    .groupBy("event_date", "event_type")
    .agg(
        F.count("*").alias("total_events"),
        F.round(F.sum("price"), 2).alias("total_revenue")
    )
)

gold_df.write.format("delta").mode("overwrite").save(gold_path)

print(" Gold layer created")


 Gold layer created


In [0]:
import pyspark.sql.functions as F

# PATHS
bronze_path = "dbfs:/Volumes/workspace/ecommerce/bronze/events"
silver_path = "dbfs:/Volumes/workspace/ecommerce/silver/events"

# READ BRONZE
bronze = spark.read.format("delta").load(bronze_path)

# CREATE SILVER
silver = (
    bronze
    .filter(F.col("price") > 0)
    .filter(F.col("price") < 10000)
    .dropDuplicates(["user_session", "event_time"])
    .withColumn("event_date", F.to_date("event_time"))
    .withColumn(
        "price_tier",
        F.when(F.col("price") < 10, "budget")
         .when(F.col("price") < 50, "mid")
         .otherwise("premium")
    )
)

# WRITE SILVER ( FIXED)
silver.write.format("delta").mode("overwrite").option("mergeSchema", "true").save(silver_path)

print("Silver layer created successfully")


Silver layer created successfully


In [0]:
silver.show(10)


+-------------------+----------+----------+-------------------+--------------------+---------+------+---------+--------------------+--------------------+----------+----------+
|         event_time|event_type|product_id|        category_id|       category_code|    brand| price|  user_id|        user_session|        ingestion_ts|event_date|price_tier|
+-------------------+----------+----------+-------------------+--------------------+---------+------+---------+--------------------+--------------------+----------+----------+
|2019-10-13 06:26:02|      view|   1005187|2053013555631882655|electronics.smart...|  samsung|771.94|519614794|e17ae736-f34b-4c5...|2026-01-18 17:34:...|2019-10-13|   premium|
|2019-10-13 06:26:10|      view|   1003310|2053013555631882655|electronics.smart...|    apple|696.13|526082794|62dfe73f-eafc-439...|2026-01-18 17:34:...|2019-10-13|   premium|
|2019-10-13 06:27:03|      view|  30901084|2053013554004492609|computers.compone...|  enermax|137.31|514362183|bd846c7d-