In [1]:
from pyspark.sql import SparkSession
from pyspark.sql.functions import when

spark = SparkSession.builder.appName("CustomerOrdersJobSQL").getOrCreate()

In [2]:
# 🔁 Dev Mode (Uncomment for local testing)
customers_path = "/opt/spark-apps/input/customers.csv"
orders_path = "/opt/spark-apps/input/orders.json"
output_path_csv = "/tmp/orders_enriched_csv"
output_path_parquet = "/tmp/orders_enriched_parquet"


df_customers = spark.read.option("header", True).csv(customers_path)
df_orders = spark.read.json(orders_path)

In [3]:
df_customers.createOrReplaceTempView("customers")
df_orders.createOrReplaceTempView("orders")

In [4]:
spark.sql("""
    SELECT *
    FROM customers
    WHERE country = 'USA'
""").show()

+-----------+----+-----+---+-------+
|customer_id|name|email|age|country|
+-----------+----+-----+---+-------+
+-----------+----+-----+---+-------+



In [5]:
spark.sql("""
    SELECT o.order_id, c.name, o.amount,
           CASE 
               WHEN o.amount >= 200 THEN 'High Value'
               WHEN o.amount >= 100 THEN 'Medium Value'
               ELSE 'Low Value'
           END AS order_type
    FROM orders o
    JOIN customers c
      ON o.customer_id = c.customer_id
""").show()

+--------+----------+------+------------+
|order_id|      name|amount|  order_type|
+--------+----------+------+------------+
|    5001|  John Doe| 250.5|  High Value|
|    5002|Jane Smith| 145.0|Medium Value|
|    5003|Rita Mehra|389.99|  High Value|
|    5004| Wei Zhang| 89.99|   Low Value|
+--------+----------+------+------------+



In [6]:
df_enriched_op = spark.sql("""
    SELECT o.order_id, c.name, o.amount,
           CASE 
               WHEN o.amount >= 200 THEN 'High Value'
               WHEN o.amount >= 100 THEN 'Medium Value'
               ELSE 'Low Value'
           END AS order_type
    FROM orders o
    JOIN customers c
      ON o.customer_id = c.customer_id
""")

df_enriched_op.write.mode("overwrite").option("header", True).csv(output_path_csv)
df_enriched_op.write.mode("overwrite").parquet(output_path_parquet)