In [1]:
from pyspark.sql import SparkSession,Row

In [2]:
spark = SparkSession.builder.appName("test").master("local[*]").getOrCreate()

In [3]:
header =Row("ID", "Name", "gender", "Salary", "Department")

In [4]:
employee_df = spark.sparkContext.parallelize([
    header(1, "Deva", "Male", 5000, "Sales"), 
    header(2, "Jugnu", "Female", 6000, "HR"), 
    header(3, "Kavita", "Female", 7500, "IT"), 
    header(4, "Vikram", "Male", 6500, "Marketing"), 
    header(5, "Shabana", "Female", 5500, "Finance"), 
    header(6, "Shantilal", "Male", 8000, "Sales"), 
    header(7, "Vinod","Male", 7200, "HR"), 
    header(8, "Vimla", "Female", 6600, "IT"), 
    header(9, "Jasmin", "Female", 5400, "Marketing"), 
    header(10, "Lovely", "Female", 6300, "Finance"), 
    header(11, "Mohan", "Male", 5700, "Sales"), 
    header(12, "Purvish", "Male", 7000, "HR"), 
    header(13, "Jinat", "Female", 7100, "IT"), 
    header(14, "Eva", "Female", 6800,"Marketing"), 
    header(15, "Jitendra", "Male", 5000, "Finance"), 
    header(15, "Rajkumar", "Male", 4500, "Finance"), 
    header(15, "Satish", "Male", 4500, "Finance"), 
    header(15, "Himmat", "Male", 3500, "Finance")]).toDF()

In [5]:
#Create a Window based on the Gender to rank their salary 
#For the same salary it will assign same rank 
from pyspark.sql.window import Window 
from pyspark.sql.functions import desc_nulls_last, rank 

In [7]:
genderPartitionedSpec = Window.partitionBy("gender").orderBy(desc_nulls_last("Salary")) 

In [8]:
employee_df.withColumn("rank", rank().over(genderPartitionedSpec)).show()

+---+---------+------+------+----------+----+
| ID|     Name|gender|Salary|Department|rank|
+---+---------+------+------+----------+----+
|  3|   Kavita|Female|  7500|        IT|   1|
| 13|    Jinat|Female|  7100|        IT|   2|
| 14|      Eva|Female|  6800| Marketing|   3|
|  8|    Vimla|Female|  6600|        IT|   4|
| 10|   Lovely|Female|  6300|   Finance|   5|
|  2|    Jugnu|Female|  6000|        HR|   6|
|  5|  Shabana|Female|  5500|   Finance|   7|
|  9|   Jasmin|Female|  5400| Marketing|   8|
|  6|Shantilal|  Male|  8000|     Sales|   1|
|  7|    Vinod|  Male|  7200|        HR|   2|
| 12|  Purvish|  Male|  7000|        HR|   3|
|  4|   Vikram|  Male|  6500| Marketing|   4|
| 11|    Mohan|  Male|  5700|     Sales|   5|
|  1|     Deva|  Male|  5000|     Sales|   6|
| 15| Jitendra|  Male|  5000|   Finance|   6|
| 15| Rajkumar|  Male|  4500|   Finance|   8|
| 15|   Satish|  Male|  4500|   Finance|   8|
| 15|   Himmat|  Male|  3500|   Finance|  10|
+---+---------+------+------+-----

In [9]:
#Create a Window based on the Department to rank their salary 
departmentPartitionedSpec = Window.partitionBy("Department").orderBy(desc_nulls_last("Salary")) 

In [10]:
employee_df.withColumn("rank", rank().over(departmentPartitionedSpec)).show()

+---+---------+------+------+----------+----+
| ID|     Name|gender|Salary|Department|rank|
+---+---------+------+------+----------+----+
|  6|Shantilal|  Male|  8000|     Sales|   1|
| 11|    Mohan|  Male|  5700|     Sales|   2|
|  1|     Deva|  Male|  5000|     Sales|   3|
|  7|    Vinod|  Male|  7200|        HR|   1|
| 12|  Purvish|  Male|  7000|        HR|   2|
|  2|    Jugnu|Female|  6000|        HR|   3|
| 10|   Lovely|Female|  6300|   Finance|   1|
|  5|  Shabana|Female|  5500|   Finance|   2|
| 15| Jitendra|  Male|  5000|   Finance|   3|
| 15| Rajkumar|  Male|  4500|   Finance|   4|
| 15|   Satish|  Male|  4500|   Finance|   4|
| 15|   Himmat|  Male|  3500|   Finance|   6|
| 14|      Eva|Female|  6800| Marketing|   1|
|  4|   Vikram|  Male|  6500| Marketing|   2|
|  9|   Jasmin|Female|  5400| Marketing|   3|
|  3|   Kavita|Female|  7500|        IT|   1|
| 13|    Jinat|Female|  7100|        IT|   2|
|  8|    Vimla|Female|  6600|        IT|   3|
+---+---------+------+------+-----

In [11]:
#Create a Window based on the Departrment as well as gender to rank their salary 
departmentGenderPartitionedSpec = Window.partitionBy("Department", "gender").orderBy(desc_nulls_last("Salary")) 

In [12]:
employee_df.withColumn("rank", rank().over(departmentGenderPartitionedSpec)).show()

+---+---------+------+------+----------+----+
| ID|     Name|gender|Salary|Department|rank|
+---+---------+------+------+----------+----+
|  3|   Kavita|Female|  7500|        IT|   1|
| 13|    Jinat|Female|  7100|        IT|   2|
|  8|    Vimla|Female|  6600|        IT|   3|
|  7|    Vinod|  Male|  7200|        HR|   1|
| 12|  Purvish|  Male|  7000|        HR|   2|
|  6|Shantilal|  Male|  8000|     Sales|   1|
| 11|    Mohan|  Male|  5700|     Sales|   2|
|  1|     Deva|  Male|  5000|     Sales|   3|
| 14|      Eva|Female|  6800| Marketing|   1|
|  9|   Jasmin|Female|  5400| Marketing|   2|
| 10|   Lovely|Female|  6300|   Finance|   1|
|  5|  Shabana|Female|  5500|   Finance|   2|
|  4|   Vikram|  Male|  6500| Marketing|   1|
| 15| Jitendra|  Male|  5000|   Finance|   1|
| 15| Rajkumar|  Male|  4500|   Finance|   2|
| 15|   Satish|  Male|  4500|   Finance|   2|
| 15|   Himmat|  Male|  3500|   Finance|   4|
|  2|    Jugnu|Female|  6000|        HR|   1|
+---+---------+------+------+-----

In [13]:
#Lets get percent rank 
#For the same salary it will assign same rank 
genderPartitionedSpec1 = Window.partitionBy("gender").orderBy(desc_nulls_last("Salary")) 

In [15]:
from pyspark.sql.functions import percent_rank 

In [16]:
employee_df.withColumn("percentRank", percent_rank().over(genderPartitionedSpec1)).show()

+---+---------+------+------+----------+-------------------+
| ID|     Name|gender|Salary|Department|        percentRank|
+---+---------+------+------+----------+-------------------+
|  3|   Kavita|Female|  7500|        IT|                0.0|
| 13|    Jinat|Female|  7100|        IT|0.14285714285714285|
| 14|      Eva|Female|  6800| Marketing| 0.2857142857142857|
|  8|    Vimla|Female|  6600|        IT|0.42857142857142855|
| 10|   Lovely|Female|  6300|   Finance| 0.5714285714285714|
|  2|    Jugnu|Female|  6000|        HR| 0.7142857142857143|
|  5|  Shabana|Female|  5500|   Finance| 0.8571428571428571|
|  9|   Jasmin|Female|  5400| Marketing|                1.0|
|  6|Shantilal|  Male|  8000|     Sales|                0.0|
|  7|    Vinod|  Male|  7200|        HR| 0.1111111111111111|
| 12|  Purvish|  Male|  7000|        HR| 0.2222222222222222|
|  4|   Vikram|  Male|  6500| Marketing| 0.3333333333333333|
| 11|    Mohan|  Male|  5700|     Sales| 0.4444444444444444|
|  1|     Deva|  Male|  

In [17]:
#Use the dens_rank #It will give you the continuous rank 
genderPartitionedSpec2 = Window.partitionBy("gender").orderBy(desc_nulls_last("Salary")) 

In [18]:
from pyspark.sql.functions import dense_rank 

In [19]:
employee_df.withColumn("denseRank", dense_rank().over(genderPartitionedSpec2)).show()

+---+---------+------+------+----------+---------+
| ID|     Name|gender|Salary|Department|denseRank|
+---+---------+------+------+----------+---------+
|  3|   Kavita|Female|  7500|        IT|        1|
| 13|    Jinat|Female|  7100|        IT|        2|
| 14|      Eva|Female|  6800| Marketing|        3|
|  8|    Vimla|Female|  6600|        IT|        4|
| 10|   Lovely|Female|  6300|   Finance|        5|
|  2|    Jugnu|Female|  6000|        HR|        6|
|  5|  Shabana|Female|  5500|   Finance|        7|
|  9|   Jasmin|Female|  5400| Marketing|        8|
|  6|Shantilal|  Male|  8000|     Sales|        1|
|  7|    Vinod|  Male|  7200|        HR|        2|
| 12|  Purvish|  Male|  7000|        HR|        3|
|  4|   Vikram|  Male|  6500| Marketing|        4|
| 11|    Mohan|  Male|  5700|     Sales|        5|
|  1|     Deva|  Male|  5000|     Sales|        6|
| 15| Jitendra|  Male|  5000|   Finance|        6|
| 15| Rajkumar|  Male|  4500|   Finance|        7|
| 15|   Satish|  Male|  4500|  