In [0]:
emp_data = [(1,'manish',50000,'IT','m'),
(2,'vikash',60000,'sales','m'),
(3,'raushan',70000,'marketing','m'),
(4,'mukesh',80000,'IT','m'),
(5,'priti',90000,'sales','f'),
(6,'nikita',45000,'marketing','f'),
(7,'ragini',55000,'marketing','f'),
(8,'rashi',100000,'IT','f'),
(9,'aditya',65000,'IT','m'),
(10,'rahul',50000,'marketing','m'),
(11,'rakhi',50000,'IT','f'),
(12,'akhilesh',90000,'sales','m')]

schema = ["id","name","salary","dept","gender"]

emp = spark.createDataFrame(data = emp_data, schema = schema)

emp.show()

+---+--------+------+---------+------+
| id|    name|salary|     dept|gender|
+---+--------+------+---------+------+
|  1|  manish| 50000|       IT|     m|
|  2|  vikash| 60000|    sales|     m|
|  3| raushan| 70000|marketing|     m|
|  4|  mukesh| 80000|       IT|     m|
|  5|   priti| 90000|    sales|     f|
|  6|  nikita| 45000|marketing|     f|
|  7|  ragini| 55000|marketing|     f|
|  8|   rashi|100000|       IT|     f|
|  9|  aditya| 65000|       IT|     m|
| 10|   rahul| 50000|marketing|     m|
| 11|   rakhi| 50000|       IT|     f|
| 12|akhilesh| 90000|    sales|     m|
+---+--------+------+---------+------+



In [0]:
from pyspark.sql.window import Window
from pyspark.sql.functions import*

window = Window.partitionBy("dept").orderBy("salary")

emp.withColumn("row_number",row_number().over(window))\
      .withColumn("rank",rank().over(window))\
      .withColumn("dense_rank",dense_rank().over(window))\
      .show()
      

+---+--------+------+---------+------+----------+----+----------+
| id|    name|salary|     dept|gender|row_number|rank|dense_rank|
+---+--------+------+---------+------+----------+----+----------+
|  1|  manish| 50000|       IT|     m|         1|   1|         1|
| 11|   rakhi| 50000|       IT|     f|         2|   1|         1|
|  9|  aditya| 65000|       IT|     m|         3|   3|         2|
|  4|  mukesh| 80000|       IT|     m|         4|   4|         3|
|  8|   rashi|100000|       IT|     f|         5|   5|         4|
|  6|  nikita| 45000|marketing|     f|         1|   1|         1|
| 10|   rahul| 50000|marketing|     m|         2|   2|         2|
|  7|  ragini| 55000|marketing|     f|         3|   3|         3|
|  3| raushan| 70000|marketing|     m|         4|   4|         4|
|  2|  vikash| 60000|    sales|     m|         1|   1|         1|
|  5|   priti| 90000|    sales|     f|         2|   2|         2|
| 12|akhilesh| 90000|    sales|     m|         3|   2|         2|
+---+-----

In [0]:

window = Window.partitionBy("dept","gender").orderBy("salary")

emp.withColumn("row_number",row_number().over(window))\
      .withColumn("rank",rank().over(window))\
      .withColumn("dense_rank",dense_rank().over(window))\
      .show()

+---+--------+------+---------+------+----------+----+----------+
| id|    name|salary|     dept|gender|row_number|rank|dense_rank|
+---+--------+------+---------+------+----------+----+----------+
| 11|   rakhi| 50000|       IT|     f|         1|   1|         1|
|  8|   rashi|100000|       IT|     f|         2|   2|         2|
|  1|  manish| 50000|       IT|     m|         1|   1|         1|
|  9|  aditya| 65000|       IT|     m|         2|   2|         2|
|  4|  mukesh| 80000|       IT|     m|         3|   3|         3|
|  6|  nikita| 45000|marketing|     f|         1|   1|         1|
|  7|  ragini| 55000|marketing|     f|         2|   2|         2|
| 10|   rahul| 50000|marketing|     m|         1|   1|         1|
|  3| raushan| 70000|marketing|     m|         2|   2|         2|
|  5|   priti| 90000|    sales|     f|         1|   1|         1|
|  2|  vikash| 60000|    sales|     m|         1|   1|         1|
| 12|akhilesh| 90000|    sales|     m|         2|   2|         2|
+---+-----

#Find employees with first and second highest salary

In [0]:
window = Window.partitionBy("dept").orderBy(desc("salary"))

emp.withColumn("row_number",row_number().over(window))\
      .withColumn("rank",rank().over(window))\
      .withColumn("dense_rank",dense_rank().over(window))\
      .filter(col("dense_rank")<=2)\
      .select("name","salary","dept","dense_rank")\
      .show()

+--------+------+---------+----------+
|    name|salary|     dept|dense_rank|
+--------+------+---------+----------+
|   rashi|100000|       IT|         1|
|  mukesh| 80000|       IT|         2|
| raushan| 70000|marketing|         1|
|  ragini| 55000|marketing|         2|
|   priti| 90000|    sales|         1|
|akhilesh| 90000|    sales|         1|
|  vikash| 60000|    sales|         2|
+--------+------+---------+----------+



#Lead and Lag

In [0]:

product_data = [
(1,"iphone","01-01-2023",1500000),
(2,"samsung","01-01-2023",1100000),
(3,"oneplus","01-01-2023",1100000),
(1,"iphone","01-02-2023",1300000),
(2,"samsung","01-02-2023",1120000),
(3,"oneplus","01-02-2023",1120000),
(1,"iphone","01-03-2023",1600000),
(2,"samsung","01-03-2023",1080000),
(3,"oneplus","01-03-2023",1160000),
(1,"iphone","01-04-2023",1700000),
(2,"samsung","01-04-2023",1800000),
(3,"oneplus","01-04-2023",1170000),
(1,"iphone","01-05-2023",1200000),
(2,"samsung","01-05-2023",980000),
(3,"oneplus","01-05-2023",1175000),
(1,"iphone","01-06-2023",1100000),
(2,"samsung","01-06-2023",1100000),
(3,"oneplus","01-06-2023",1200000)
]

sales_schema = ['product_id','product_name','sales_date',"sales"]

sales = spark.createDataFrame(data = product_data, schema = sales_schema)

sales.show()

+----------+------------+----------+-------+
|product_id|product_name|sales_date|  sales|
+----------+------------+----------+-------+
|         1|      iphone|01-01-2023|1500000|
|         2|     samsung|01-01-2023|1100000|
|         3|     oneplus|01-01-2023|1100000|
|         1|      iphone|01-02-2023|1300000|
|         2|     samsung|01-02-2023|1120000|
|         3|     oneplus|01-02-2023|1120000|
|         1|      iphone|01-03-2023|1600000|
|         2|     samsung|01-03-2023|1080000|
|         3|     oneplus|01-03-2023|1160000|
|         1|      iphone|01-04-2023|1700000|
|         2|     samsung|01-04-2023|1800000|
|         3|     oneplus|01-04-2023|1170000|
|         1|      iphone|01-05-2023|1200000|
|         2|     samsung|01-05-2023| 980000|
|         3|     oneplus|01-05-2023|1175000|
|         1|      iphone|01-06-2023|1100000|
|         2|     samsung|01-06-2023|1100000|
|         3|     oneplus|01-06-2023|1200000|
+----------+------------+----------+-------+



#what is the %age of sales each month based on last 6 months

In [0]:
from pyspark.sql.window import Window
from pyspark.sql.functions import*

window = Window.partitionBy("product_id")

sales.withColumn("total_sale",sum(sales).over(window)).show()

Unexpected exception formatting exception. Falling back to standard exception


Traceback (most recent call last):
  File "/databricks/python/lib/python3.9/site-packages/IPython/core/interactiveshell.py", line 3378, in run_code
    exec(code_obj, self.user_global_ns, self.user_ns)
  File "<command-3209656271287342>", line 6, in <module>
    sales.withColumn("total_sale",sum(sales).over(window)).show()
  File "/databricks/spark/python/pyspark/sql/utils.py", line 164, in wrapped
    return f(*args, **kwargs)
  File "/databricks/spark/python/pyspark/sql/functions.py", line 602, in sum
    return _invoke_function_over_columns("sum", col)
  File "/databricks/spark/python/pyspark/sql/functions.py", line 98, in _invoke_function_over_columns
    return _invoke_function(name, *(_to_java_column(col) for col in cols))
  File "/databricks/spark/python/pyspark/sql/functions.py", line 98, in <genexpr>
    return _invoke_function(name, *(_to_java_column(col) for col in cols))
  File "/databricks/spark/python/pyspark/sql/column.py", line 66, in _to_java_column
    raise TypeError



In [0]:
window = Window.partitionBy("product_id").orderBy("sales_date")

df = sales.withColumn("previous_month_sales",lag(col("sales"),1).over(window))

df.show()

+----------+------------+----------+-------+--------------------+
|product_id|product_name|sales_date|  sales|previous_month_sales|
+----------+------------+----------+-------+--------------------+
|         1|      iphone|01-01-2023|1500000|                null|
|         1|      iphone|01-02-2023|1300000|             1500000|
|         1|      iphone|01-03-2023|1600000|             1300000|
|         1|      iphone|01-04-2023|1700000|             1600000|
|         1|      iphone|01-05-2023|1200000|             1700000|
|         1|      iphone|01-06-2023|1100000|             1200000|
|         2|     samsung|01-01-2023|1100000|                null|
|         2|     samsung|01-02-2023|1120000|             1100000|
|         2|     samsung|01-03-2023|1080000|             1120000|
|         2|     samsung|01-04-2023|1800000|             1080000|
|         2|     samsung|01-05-2023| 980000|             1800000|
|         2|     samsung|01-06-2023|1100000|              980000|
|         

In [0]:
df.withColumn("profit",
              round((col("sales") - col("previous_month_sales"))
                    *100/col("sales"),2))\
                .show()

+----------+------------+----------+-------+--------------------+------+
|product_id|product_name|sales_date|  sales|previous_month_sales|profit|
+----------+------------+----------+-------+--------------------+------+
|         1|      iphone|01-01-2023|1500000|                null|  null|
|         1|      iphone|01-02-2023|1300000|             1500000|-15.38|
|         1|      iphone|01-03-2023|1600000|             1300000| 18.75|
|         1|      iphone|01-04-2023|1700000|             1600000|  5.88|
|         1|      iphone|01-05-2023|1200000|             1700000|-41.67|
|         1|      iphone|01-06-2023|1100000|             1200000| -9.09|
|         2|     samsung|01-01-2023|1100000|                null|  null|
|         2|     samsung|01-02-2023|1120000|             1100000|  1.79|
|         2|     samsung|01-03-2023|1080000|             1120000|  -3.7|
|         2|     samsung|01-04-2023|1800000|             1080000|  40.0|
|         2|     samsung|01-05-2023| 980000|       

In [0]:
sales.createOrReplaceTempView("sales")

In [0]:
spark.sql("""
          
          with cte as (select *,
          lag(sales,1) over(partition by product_id order by sales_date) as pre_month_sale
          from sales)

          select *,round((( sales - pre_month_sale)*100)/sales,2) as profit_loss_gain
          from cte;
    
          """).show()

+----------+------------+----------+-------+--------------+----------------+
|product_id|product_name|sales_date|  sales|pre_month_sale|profit_loss_gain|
+----------+------------+----------+-------+--------------+----------------+
|         1|      iphone|01-01-2023|1500000|          null|            null|
|         1|      iphone|01-02-2023|1300000|       1500000|          -15.38|
|         1|      iphone|01-03-2023|1600000|       1300000|           18.75|
|         1|      iphone|01-04-2023|1700000|       1600000|            5.88|
|         1|      iphone|01-05-2023|1200000|       1700000|          -41.67|
|         1|      iphone|01-06-2023|1100000|       1200000|           -9.09|
|         2|     samsung|01-01-2023|1100000|          null|            null|
|         2|     samsung|01-02-2023|1120000|       1100000|            1.79|
|         2|     samsung|01-03-2023|1080000|       1120000|            -3.7|
|         2|     samsung|01-04-2023|1800000|       1080000|            40.0|

#Range and Row between

Find the difference in sales of each product from their first month sales to the lastes sales

In [0]:
product_data = [
(2,"samsung","01-01-1995",11000),
(1,"iphone","01-02-2023",1300000),
(2,"samsung","01-02-2023",1120000),
(3,"oneplus","01-02-2023",1120000),
(1,"iphone","01-03-2023",1600000),
(2,"samsung","01-03-2023",1080000),
(3,"oneplus","01-03-2023",1160000),
(1,"iphone","01-01-2006",15000),
(1,"iphone","01-04-2023",1700000),
(2,"samsung","01-04-2023",1800000),
(3,"oneplus","01-04-2023",1170000),
(1,"iphone","01-05-2023",1200000),
(2,"samsung","01-05-2023",980000),
(3,"oneplus","01-05-2023",1175000),
(1,"iphone","01-06-2023",1100000),
(3,"oneplus","01-01-2010",23000),
(2,"samsung","01-06-2023",1100000),
(3,"oneplus","01-06-2023",1200000)
]

product_schema=["product_id","product_name","sales_date","sales"]

product= spark.createDataFrame(data=product_data,schema=product_schema)

product.show()

+----------+------------+----------+-------+
|product_id|product_name|sales_date|  sales|
+----------+------------+----------+-------+
|         2|     samsung|01-01-1995|  11000|
|         1|      iphone|01-02-2023|1300000|
|         2|     samsung|01-02-2023|1120000|
|         3|     oneplus|01-02-2023|1120000|
|         1|      iphone|01-03-2023|1600000|
|         2|     samsung|01-03-2023|1080000|
|         3|     oneplus|01-03-2023|1160000|
|         1|      iphone|01-01-2006|  15000|
|         1|      iphone|01-04-2023|1700000|
|         2|     samsung|01-04-2023|1800000|
|         3|     oneplus|01-04-2023|1170000|
|         1|      iphone|01-05-2023|1200000|
|         2|     samsung|01-05-2023| 980000|
|         3|     oneplus|01-05-2023|1175000|
|         1|      iphone|01-06-2023|1100000|
|         3|     oneplus|01-01-2010|  23000|
|         2|     samsung|01-06-2023|1100000|
|         3|     oneplus|01-06-2023|1200000|
+----------+------------+----------+-------+



In [0]:
window = Window.partitionBy("product_id").orderBy("sales_date")

product.withColumn("first_month_sale",first("sales").over(window))\
     .withColumn("last_month_sales",last("sales").over(window)).show()

+----------+------------+----------+-------+----------------+----------------+
|product_id|product_name|sales_date|  sales|first_month_sale|last_month_sales|
+----------+------------+----------+-------+----------------+----------------+
|         1|      iphone|01-01-2006|  15000|           15000|           15000|
|         1|      iphone|01-02-2023|1300000|           15000|         1300000|
|         1|      iphone|01-03-2023|1600000|           15000|         1600000|
|         1|      iphone|01-04-2023|1700000|           15000|         1700000|
|         1|      iphone|01-05-2023|1200000|           15000|         1200000|
|         1|      iphone|01-06-2023|1100000|           15000|         1100000|
|         2|     samsung|01-01-1995|  11000|           11000|           11000|
|         2|     samsung|01-02-2023|1120000|           11000|         1120000|
|         2|     samsung|01-03-2023|1080000|           11000|         1080000|
|         2|     samsung|01-04-2023|1800000|        

Here we can see the last value is not cming as per requiremnet since default is unbounded precending and current row

In [0]:
#We can also solve this by groupby and then find min and max

window = Window.partitionBy("product_id").orderBy("sales_date")\
               .rowsBetween(Window.unboundedPreceding,Window.unboundedFollowing)

product.withColumn("first_month_sale",first("sales").over(window))\
     .withColumn("last_month_sales",last("sales").over(window)).show()

+----------+------------+----------+-------+----------------+----------------+
|product_id|product_name|sales_date|  sales|first_month_sale|last_month_sales|
+----------+------------+----------+-------+----------------+----------------+
|         1|      iphone|01-01-2006|  15000|           15000|         1100000|
|         1|      iphone|01-02-2023|1300000|           15000|         1100000|
|         1|      iphone|01-03-2023|1600000|           15000|         1100000|
|         1|      iphone|01-04-2023|1700000|           15000|         1100000|
|         1|      iphone|01-05-2023|1200000|           15000|         1100000|
|         1|      iphone|01-06-2023|1100000|           15000|         1100000|
|         2|     samsung|01-01-1995|  11000|           11000|         1100000|
|         2|     samsung|01-02-2023|1120000|           11000|         1100000|
|         2|     samsung|01-03-2023|1080000|           11000|         1100000|
|         2|     samsung|01-04-2023|1800000|        

Find all the employees who have not completed 8 hours in office

In [0]:
emp_data = [(1,"manish","11-07-2023","10:20"),
        (1,"manish","11-07-2023","11:20"),
        (2,"rajesh","11-07-2023","11:20"),
        (1,"manish","11-07-2023","11:50"),
        (2,"rajesh","11-07-2023","13:20"),
        (1,"manish","11-07-2023","19:20"),
        (2,"rajesh","11-07-2023","17:20"),
        (1,"manish","12-07-2023","10:32"),
        (1,"manish","12-07-2023","12:20"),
        (3,"vikash","12-07-2023","09:12"),
        (1,"manish","12-07-2023","16:23"),
        (3,"vikash","12-07-2023","18:08")]

emp_schema = ["id", "name", "date", "time"]
emp_df = spark.createDataFrame(data=emp_data, schema=emp_schema)

emp_df.show()


+---+------+----------+-----+
| id|  name|      date| time|
+---+------+----------+-----+
|  1|manish|11-07-2023|10:20|
|  1|manish|11-07-2023|11:20|
|  2|rajesh|11-07-2023|11:20|
|  1|manish|11-07-2023|11:50|
|  2|rajesh|11-07-2023|13:20|
|  1|manish|11-07-2023|19:20|
|  2|rajesh|11-07-2023|17:20|
|  1|manish|12-07-2023|10:32|
|  1|manish|12-07-2023|12:20|
|  3|vikash|12-07-2023|09:12|
|  1|manish|12-07-2023|16:23|
|  3|vikash|12-07-2023|18:08|
+---+------+----------+-----+



In [0]:
window = Window.partitionBy("id","date").orderBy("date")\
               .rowsBetween(Window.unboundedPreceding,Window.unboundedFollowing)

emp_df.withColumn("login",first("time").over(window))\
     .withColumn("logout",last("time").over(window))\
     .withColumn("login_time",to_timestamp("login","yyyy-MM-dd HH:mm:ss"))\
     .withColumn("logout_time",to_timestamp("logout","yyyy-MM-dd HH:mm:ss"))\
     .withColumn("time_in_office", col("logout_time")-col("login_time")).show()

+---+------+----------+-----+-----+------+----------+-----------+--------------+
| id|  name|      date| time|login|logout|login_time|logout_time|time_in_office|
+---+------+----------+-----+-----+------+----------+-----------+--------------+
|  1|manish|11-07-2023|10:20|10:20| 19:20|      null|       null|          null|
|  1|manish|11-07-2023|11:20|10:20| 19:20|      null|       null|          null|
|  1|manish|11-07-2023|11:50|10:20| 19:20|      null|       null|          null|
|  1|manish|11-07-2023|19:20|10:20| 19:20|      null|       null|          null|
|  1|manish|12-07-2023|10:32|10:32| 16:23|      null|       null|          null|
|  1|manish|12-07-2023|12:20|10:32| 16:23|      null|       null|          null|
|  1|manish|12-07-2023|16:23|10:32| 16:23|      null|       null|          null|
|  2|rajesh|11-07-2023|11:20|11:20| 17:20|      null|       null|          null|
|  2|rajesh|11-07-2023|13:20|11:20| 17:20|      null|       null|          null|
|  2|rajesh|11-07-2023|17:20

In [0]:
emp_df.printSchema()

root
 |-- id: long (nullable = true)
 |-- name: string (nullable = true)
 |-- date: string (nullable = true)
 |-- time: string (nullable = true)



In [0]:
emp_df = emp_df.withColumn("timestamp", to_timestamp(concat(col("date"), lit(" "), col("time")), "dd-MM-yyyy HH:mm"))

In [0]:
emp_df.printSchema()

root
 |-- id: long (nullable = true)
 |-- name: string (nullable = true)
 |-- date: string (nullable = true)
 |-- time: string (nullable = true)
 |-- timestamp: timestamp (nullable = true)



In [0]:
emp_df.show()

+---+------+----------+-----+-------------------+
| id|  name|      date| time|          timestamp|
+---+------+----------+-----+-------------------+
|  1|manish|11-07-2023|10:20|2023-07-11 10:20:00|
|  1|manish|11-07-2023|11:20|2023-07-11 11:20:00|
|  2|rajesh|11-07-2023|11:20|2023-07-11 11:20:00|
|  1|manish|11-07-2023|11:50|2023-07-11 11:50:00|
|  2|rajesh|11-07-2023|13:20|2023-07-11 13:20:00|
|  1|manish|11-07-2023|19:20|2023-07-11 19:20:00|
|  2|rajesh|11-07-2023|17:20|2023-07-11 17:20:00|
|  1|manish|12-07-2023|10:32|2023-07-12 10:32:00|
|  1|manish|12-07-2023|12:20|2023-07-12 12:20:00|
|  3|vikash|12-07-2023|09:12|2023-07-12 09:12:00|
|  1|manish|12-07-2023|16:23|2023-07-12 16:23:00|
|  3|vikash|12-07-2023|18:08|2023-07-12 18:08:00|
+---+------+----------+-----+-------------------+



In [0]:
window = Window.partitionBy("id","date").orderBy("date")\
               .rowsBetween(Window.unboundedPreceding,Window.unboundedFollowing)

emp_timestamp = emp_df.withColumn("login",first("timestamp").over(window))\
     .withColumn("logout",last("timestamp").over(window))\
     .withColumn("time_in_office", unix_timestamp(col("logout")) - unix_timestamp(col("login")))

emp_timestamp.show()

+---+------+----------+-----+-------------------+-------------------+-------------------+--------------+
| id|  name|      date| time|          timestamp|              login|             logout|time_in_office|
+---+------+----------+-----+-------------------+-------------------+-------------------+--------------+
|  1|manish|11-07-2023|10:20|2023-07-11 10:20:00|2023-07-11 10:20:00|2023-07-11 19:20:00|         32400|
|  1|manish|11-07-2023|11:20|2023-07-11 11:20:00|2023-07-11 10:20:00|2023-07-11 19:20:00|         32400|
|  1|manish|11-07-2023|11:50|2023-07-11 11:50:00|2023-07-11 10:20:00|2023-07-11 19:20:00|         32400|
|  1|manish|11-07-2023|19:20|2023-07-11 19:20:00|2023-07-11 10:20:00|2023-07-11 19:20:00|         32400|
|  1|manish|12-07-2023|10:32|2023-07-12 10:32:00|2023-07-12 10:32:00|2023-07-12 16:23:00|         21060|
|  1|manish|12-07-2023|12:20|2023-07-12 12:20:00|2023-07-12 10:32:00|2023-07-12 16:23:00|         21060|
|  1|manish|12-07-2023|16:23|2023-07-12 16:23:00|2023-0

In [0]:
emp_timestamp.filter(col("time_in_office") > 28800).select("name").distinct().show()

+------+
|  name|
+------+
|vikash|
|manish|
+------+



In [0]:
product.show()

        

+----------+------------+----------+-------+
|product_id|product_name|sales_date|  sales|
+----------+------------+----------+-------+
|         2|     samsung|01-01-1995|  11000|
|         1|      iphone|01-02-2023|1300000|
|         2|     samsung|01-02-2023|1120000|
|         3|     oneplus|01-02-2023|1120000|
|         1|      iphone|01-03-2023|1600000|
|         2|     samsung|01-03-2023|1080000|
|         3|     oneplus|01-03-2023|1160000|
|         1|      iphone|01-01-2006|  15000|
|         1|      iphone|01-04-2023|1700000|
|         2|     samsung|01-04-2023|1800000|
|         3|     oneplus|01-04-2023|1170000|
|         1|      iphone|01-05-2023|1200000|
|         2|     samsung|01-05-2023| 980000|
|         3|     oneplus|01-05-2023|1175000|
|         1|      iphone|01-06-2023|1100000|
|         3|     oneplus|01-01-2010|  23000|
|         2|     samsung|01-06-2023|1100000|
|         3|     oneplus|01-06-2023|1200000|
+----------+------------+----------+-------+



#find the sales of each product based on 3 months

In [0]:
window = Window.partitionBy("product_id").orderBy("sales_date").rowsBetween(-2,0)
window1 = Window.partitionBy("product_id").orderBy("sales_date")

p_df = product.withColumn("last_3month_sale",avg(col("sales")).over(window))\
                .withColumn("rn",row_number().over(window1))\
                .filter(col("rn") > 2).show()
                

+----------+------------+----------+-------+------------------+---+
|product_id|product_name|sales_date|  sales|  last_3month_sale| rn|
+----------+------------+----------+-------+------------------+---+
|         1|      iphone|01-03-2023|1600000| 971666.6666666666|  3|
|         1|      iphone|01-04-2023|1700000|1533333.3333333333|  4|
|         1|      iphone|01-05-2023|1200000|         1500000.0|  5|
|         1|      iphone|01-06-2023|1100000|1333333.3333333333|  6|
|         2|     samsung|01-03-2023|1080000|          737000.0|  3|
|         2|     samsung|01-04-2023|1800000|1333333.3333333333|  4|
|         2|     samsung|01-05-2023| 980000|1286666.6666666667|  5|
|         2|     samsung|01-06-2023|1100000|1293333.3333333333|  6|
|         3|     oneplus|01-03-2023|1160000| 767666.6666666666|  3|
|         3|     oneplus|01-04-2023|1170000|         1150000.0|  4|
|         3|     oneplus|01-05-2023|1175000|1168333.3333333333|  5|
|         3|     oneplus|01-06-2023|1200000|1181