In [1]:
from pyspark.sql import *
from pyspark.sql.functions import *
from pyspark.sql.window import *

In [2]:
spark = SparkSession.builder.appName("pyspark_sql").getOrCreate()


data = [
    Row(pid=1, product="laptop", category="electronics", price=55000, quantity=5, seller="techworld"),
    Row(pid=2, product="smartphone", category="electronics", price=20000, quantity=20, seller="mobihub"),
    Row(pid=3, product="jeans", category="clothing", price=1500, quantity=50, seller="fashionstore"),
    Row(pid=4, product="tshirt", category="clothing", price=800, quantity=100, seller="fashionstore"),
    Row(pid=5, product="bookshelf", category="furniture", price=7000, quantity=10, seller="homedecor"),
    Row(pid=6, product="sofa", category="furniture", price=25000, quantity=3, seller="homedecor"),
    Row(pid=7, product="novel", category="books", price=500, quantity=200, seller="bookworld"),
    Row(pid=8, product="textbook", category="books", price=1200, quantity=150, seller="bookworld"),
    Row(pid=9, product="headphones", category="electronics", price=3000, quantity=40, seller="soundmax"),
    Row(pid=10, product="jacket", category="clothing", price=3000, quantity=30, seller="fashionstore"),
    Row(pid=11, product="dining table", category="furniture", price=40000, quantity=2, seller="homedecor"),
    Row(pid=12, product="encyclopedia", category="books", price=2500, quantity=75, seller="bookworld")
]

In [3]:
df = spark.createDataFrame(data)
df.show(truncate=False)

+---+------------+-----------+-----+--------+------------+
|pid|product     |category   |price|quantity|seller      |
+---+------------+-----------+-----+--------+------------+
|1  |laptop      |electronics|55000|5       |techworld   |
|2  |smartphone  |electronics|20000|20      |mobihub     |
|3  |jeans       |clothing   |1500 |50      |fashionstore|
|4  |tshirt      |clothing   |800  |100     |fashionstore|
|5  |bookshelf   |furniture  |7000 |10      |homedecor   |
|6  |sofa        |furniture  |25000|3       |homedecor   |
|7  |novel       |books      |500  |200     |bookworld   |
|8  |textbook    |books      |1200 |150     |bookworld   |
|9  |headphones  |electronics|3000 |40      |soundmax    |
|10 |jacket      |clothing   |3000 |30      |fashionstore|
|11 |dining table|furniture  |40000|2       |homedecor   |
|12 |encyclopedia|books      |2500 |75      |bookworld   |
+---+------------+-----------+-----+--------+------------+



In [7]:
df.createOrReplaceTempView("products_local")
df.createOrReplaceGlobalTempView("products_global")

In [9]:
spark.sql("select * from products_local").show()

+---+------------+-----------+-----+--------+------------+
|pid|     product|   category|price|quantity|      seller|
+---+------------+-----------+-----+--------+------------+
|  1|      laptop|electronics|55000|       5|   techworld|
|  2|  smartphone|electronics|20000|      20|     mobihub|
|  3|       jeans|   clothing| 1500|      50|fashionstore|
|  4|      tshirt|   clothing|  800|     100|fashionstore|
|  5|   bookshelf|  furniture| 7000|      10|   homedecor|
|  6|        sofa|  furniture|25000|       3|   homedecor|
|  7|       novel|      books|  500|     200|   bookworld|
|  8|    textbook|      books| 1200|     150|   bookworld|
|  9|  headphones|electronics| 3000|      40|    soundmax|
| 10|      jacket|   clothing| 3000|      30|fashionstore|
| 11|dining table|  furniture|40000|       2|   homedecor|
| 12|encyclopedia|      books| 2500|      75|   bookworld|
+---+------------+-----------+-----+--------+------------+



In [11]:
spark.sql("select * from products_local where category='electronics'").show()

+---+----------+-----------+-----+--------+---------+
|pid|   product|   category|price|quantity|   seller|
+---+----------+-----------+-----+--------+---------+
|  1|    laptop|electronics|55000|       5|techworld|
|  2|smartphone|electronics|20000|      20|  mobihub|
|  9|headphones|electronics| 3000|      40| soundmax|
+---+----------+-----------+-----+--------+---------+



In [13]:
spark.sql("select * from products_local where price>10000").show()

+---+------------+-----------+-----+--------+---------+
|pid|     product|   category|price|quantity|   seller|
+---+------------+-----------+-----+--------+---------+
|  1|      laptop|electronics|55000|       5|techworld|
|  2|  smartphone|electronics|20000|      20|  mobihub|
|  6|        sofa|  furniture|25000|       3|homedecor|
| 11|dining table|  furniture|40000|       2|homedecor|
+---+------------+-----------+-----+--------+---------+



In [15]:
spark.sql("select * from products_local order by price desc").show()

+---+------------+-----------+-----+--------+------------+
|pid|     product|   category|price|quantity|      seller|
+---+------------+-----------+-----+--------+------------+
|  1|      laptop|electronics|55000|       5|   techworld|
| 11|dining table|  furniture|40000|       2|   homedecor|
|  6|        sofa|  furniture|25000|       3|   homedecor|
|  2|  smartphone|electronics|20000|      20|     mobihub|
|  5|   bookshelf|  furniture| 7000|      10|   homedecor|
|  9|  headphones|electronics| 3000|      40|    soundmax|
| 10|      jacket|   clothing| 3000|      30|fashionstore|
| 12|encyclopedia|      books| 2500|      75|   bookworld|
|  3|       jeans|   clothing| 1500|      50|fashionstore|
|  8|    textbook|      books| 1200|     150|   bookworld|
|  4|      tshirt|   clothing|  800|     100|fashionstore|
|  7|       novel|      books|  500|     200|   bookworld|
+---+------------+-----------+-----+--------+------------+



In [20]:
spark.sql("select category, count(*) as product_count from products_local group by category").show()

+-----------+-------------+
|   category|product_count|
+-----------+-------------+
|  furniture|            3|
|electronics|            3|
|   clothing|            3|
|      books|            3|
+-----------+-------------+



In [26]:
spark.sql("select category, avg(price) as avg_price from products_local group by category").show()

+-----------+------------------+
|   category|         avg_price|
+-----------+------------------+
|  furniture|           24000.0|
|electronics|           26000.0|
|   clothing|1766.6666666666667|
|      books|            1400.0|
+-----------+------------------+



In [30]:
spark.sql("select seller, count(*) as total_products from products_local group by seller").show()

+------------+--------------+
|      seller|total_products|
+------------+--------------+
|   homedecor|             3|
|     mobihub|             1|
|   techworld|             1|
|fashionstore|             3|
|   bookworld|             3|
|    soundmax|             1|
+------------+--------------+



In [33]:
spark.sql("select * from global_temp.products_global where quantity > 50").show()

+---+------------+--------+-----+--------+------------+
|pid|     product|category|price|quantity|      seller|
+---+------------+--------+-----+--------+------------+
|  4|      tshirt|clothing|  800|     100|fashionstore|
|  7|       novel|   books|  500|     200|   bookworld|
|  8|    textbook|   books| 1200|     150|   bookworld|
| 12|encyclopedia|   books| 2500|      75|   bookworld|
+---+------------+--------+-----+--------+------------+



In [40]:
spark.sql("select category, sum(price * quantity) as total_revenue from global_temp.products_global group by category").show()

+-----------+-------------+
|   category|total_revenue|
+-----------+-------------+
|  furniture|       225000|
|electronics|       795000|
|   clothing|       245000|
|      books|       467500|
+-----------+-------------+



In [52]:
spark.sql("""select pid, product, category, price,case
   when price > 20000 then 'premium'
   when price between 5000 and 20000 then 'midrange'
     else 'budget' end as price_category from global_temp.products_global""").show()

+---+------------+-----------+-----+--------------+
|pid|     product|   category|price|price_category|
+---+------------+-----------+-----+--------------+
|  1|      laptop|electronics|55000|       premium|
|  2|  smartphone|electronics|20000|      midrange|
|  3|       jeans|   clothing| 1500|        budget|
|  4|      tshirt|   clothing|  800|        budget|
|  5|   bookshelf|  furniture| 7000|      midrange|
|  6|        sofa|  furniture|25000|       premium|
|  7|       novel|      books|  500|        budget|
|  8|    textbook|      books| 1200|        budget|
|  9|  headphones|electronics| 3000|        budget|
| 10|      jacket|   clothing| 3000|        budget|
| 11|dining table|  furniture|40000|       premium|
| 12|encyclopedia|      books| 2500|        budget|
+---+------------+-----------+-----+--------------+



In [58]:
spark.sql("select seller, avg(price) as avg_price from global_temp.products_global group by seller order by avg_price desc").show()

+------------+------------------+
|      seller|         avg_price|
+------------+------------------+
|   techworld|           55000.0|
|   homedecor|           24000.0|
|     mobihub|           20000.0|
|    soundmax|            3000.0|
|fashionstore|1766.6666666666667|
|   bookworld|            1400.0|
+------------+------------------+



In [68]:
spark.sql("""select * from global_temp.products_global p  where price > (select avg(price) from global_temp.products_global where category = p.category)""").show()

+---+------------+-----------+-----+--------+------------+
|pid|     product|   category|price|quantity|      seller|
+---+------------+-----------+-----+--------+------------+
|  6|        sofa|  furniture|25000|       3|   homedecor|
|  1|      laptop|electronics|55000|       5|   techworld|
| 12|encyclopedia|      books| 2500|      75|   bookworld|
| 11|dining table|  furniture|40000|       2|   homedecor|
| 10|      jacket|   clothing| 3000|      30|fashionstore|
+---+------------+-----------+-----+--------+------------+



In [71]:
spark.sql("select * from global_temp.products_global where category='books'").show()

+---+------------+--------+-----+--------+---------+
|pid|     product|category|price|quantity|   seller|
+---+------------+--------+-----+--------+---------+
|  7|       novel|   books|  500|     200|bookworld|
|  8|    textbook|   books| 1200|     150|bookworld|
| 12|encyclopedia|   books| 2500|      75|bookworld|
+---+------------+--------+-----+--------+---------+



In [81]:
window_spec = Window.partitionBy("category").orderBy(df.price.desc())
df.withColumn("price_rank", rank().over(window_spec)).show()

+---+------------+-----------+-----+--------+------------+----------+
|pid|     product|   category|price|quantity|      seller|price_rank|
+---+------------+-----------+-----+--------+------------+----------+
| 12|encyclopedia|      books| 2500|      75|   bookworld|         1|
|  8|    textbook|      books| 1200|     150|   bookworld|         2|
|  7|       novel|      books|  500|     200|   bookworld|         3|
| 10|      jacket|   clothing| 3000|      30|fashionstore|         1|
|  3|       jeans|   clothing| 1500|      50|fashionstore|         2|
|  4|      tshirt|   clothing|  800|     100|fashionstore|         3|
|  1|      laptop|electronics|55000|       5|   techworld|         1|
|  2|  smartphone|electronics|20000|      20|     mobihub|         2|
|  9|  headphones|electronics| 3000|      40|    soundmax|         3|
| 11|dining table|  furniture|40000|       2|   homedecor|         1|
|  6|        sofa|  furniture|25000|       3|   homedecor|         2|
|  5|   bookshelf|  

In [93]:
spark.sql("""create or replace temp view premium_products as select * from global_temp.products_global where price>20000""")
spark.sql("select * from premium_products").show()

+---+------------+-----------+-----+--------+---------+
|pid|     product|   category|price|quantity|   seller|
+---+------------+-----------+-----+--------+---------+
|  1|      laptop|electronics|55000|       5|techworld|
|  6|        sofa|  furniture|25000|       3|homedecor|
| 11|dining table|  furniture|40000|       2|homedecor|
+---+------------+-----------+-----+--------+---------+



In [91]:
spark.sql("""create or replace temp view bulk_products as select * from global_temp.products_global where quantity>=50""")
spark.sql("select * from bulk_products").show()

+---+------------+--------+-----+--------+------------+
|pid|     product|category|price|quantity|      seller|
+---+------------+--------+-----+--------+------------+
|  3|       jeans|clothing| 1500|      50|fashionstore|
|  4|      tshirt|clothing|  800|     100|fashionstore|
|  7|       novel|   books|  500|     200|   bookworld|
|  8|    textbook|   books| 1200|     150|   bookworld|
| 12|encyclopedia|   books| 2500|      75|   bookworld|
+---+------------+--------+-----+--------+------------+

