In [0]:
df = (spark.read.format("csv")
      .option("header", "true")      # first row as header
      .option("inferSchema", "true") # auto-detect types
      .load("/Volumes/workspace/default/sales/sales.csv.txt"))

df.show(5)
df.printSchema()


+--------+----------+--------+-----+
|order_id|   product|quantity|price|
+--------+----------+--------+-----+
|     101|    Laptop|       2|75000|
|     102|    Mobile|       5|15000|
|     103|Headphones|      10| 2500|
|     104|    Tablet|       3|30000|
+--------+----------+--------+-----+

root
 |-- order_id: integer (nullable = true)
 |-- product: string (nullable = true)
 |-- quantity: integer (nullable = true)
 |-- price: integer (nullable = true)



In [0]:
df.write.format("delta").mode("overwrite").saveAsTable("workspace.default.sales_delta")

In [0]:
%sql
-- Show all products
SELECT * FROM workspace.default.sales_delta;

order_id,product,quantity,price
101,Laptop,2,75000
102,Mobile,5,15000
103,Headphones,10,2500
104,Tablet,3,30000


In [0]:
%sql
-- Count total orders
SELECT COUNT(*) AS total_orders FROM workspace.default.sales_delta;

total_orders
4


In [0]:
%sql
-- Find total revenue
SELECT SUM(CAST(quantity AS INT) * CAST(price AS INT)) AS total_revenue 
FROM workspace.default.sales_delta;

total_revenue
340000


In [0]:
%sql
-- Average price per product
SELECT product, AVG(CAST(price AS INT)) AS avg_price 
FROM workspace.default.sales_delta 
GROUP BY product;

product,avg_price
Headphones,2500.0
Mobile,15000.0
Tablet,30000.0
Laptop,75000.0
