In [37]:
import findspark
findspark.init()

In [38]:
from pyspark.sql.session import SparkSession

In [39]:
spark = SparkSession.builder.appName('Nth highestSalary').master("local[*]").getOrCreate()

In [40]:
# sample data for dataframe
data = (("Ram", "Sales", 3000),
              ("Meena", "Sales", 4600),
              ("Robin", "Sales", 4100),
              ("Kunal", "Finance", 3000),
              ("Ram", "Sales", 3000),
              ("Srishti", "Management", 3300),
              ("Jeny", "Finance", 3900),
              ("Hitesh", "Marketing", 3000),
              ("Kailash", "Marketing", 2000),
              ("Sharad", "Sales", 4100)
              )
 
# column names for dataframe
columns = ["emp_name", "dep", "salary"]
 
# creating the dataframe df
df3 = spark.createDataFrame(data=data,
                            schema=columns)
 
# print schema
df3.printSchema()
 
# show df
df3.show()

root
 |-- emp_name: string (nullable = true)
 |-- dep: string (nullable = true)
 |-- salary: long (nullable = true)

+--------+----------+------+
|emp_name|       dep|salary|
+--------+----------+------+
|     Ram|     Sales|  3000|
|   Meena|     Sales|  4600|
|   Robin|     Sales|  4100|
|   Kunal|   Finance|  3000|
|     Ram|     Sales|  3000|
| Srishti|Management|  3300|
|    Jeny|   Finance|  3900|
|  Hitesh| Marketing|  3000|
| Kailash| Marketing|  2000|
|  Sharad|     Sales|  4100|
+--------+----------+------+



In [42]:
from pyspark.sql.window import Window

In [43]:
from pyspark.sql.functions import col, dense_rank, desc

In [46]:
# creating a window partition of dataframe
windowSpec = Window.partitionBy("dep").orderBy(col("salary").desc())

In [48]:
df_salary_rank = df3.withColumn("salary_rank", dense_rank().over(windowSpec))

In [49]:
df_salary_rank.show()

+--------+----------+------+-----------+
|emp_name|       dep|salary|salary_rank|
+--------+----------+------+-----------+
|   Meena|     Sales|  4600|          1|
|   Robin|     Sales|  4100|          2|
|  Sharad|     Sales|  4100|          2|
|     Ram|     Sales|  3000|          3|
|     Ram|     Sales|  3000|          3|
| Srishti|Management|  3300|          1|
|    Jeny|   Finance|  3900|          1|
|   Kunal|   Finance|  3000|          2|
|  Hitesh| Marketing|  3000|          1|
| Kailash| Marketing|  2000|          2|
+--------+----------+------+-----------+



In [54]:
n = int(input("Enter the value of n: "))
print("n is: ", n)
df_salary_rank.filter(f"salary_rank=={n}").show()

Enter the value of n: 3
n is:  3
+--------+-----+------+-----------+
|emp_name|  dep|salary|salary_rank|
+--------+-----+------+-----------+
|     Ram|Sales|  3000|          3|
|     Ram|Sales|  3000|          3|
+--------+-----+------+-----------+

