In [3]:
from pyspark.sql import SparkSession, functions as F, Window as W
from pyspark.sql.types import StructType, StructField, IntegerType, StringType

In [4]:
spark = SparkSession.builder.appName("DailyCodingProblem-24-08-2025").getOrCreate()

# 📝 Problem 1: PySpark – Rank Employees by Salary within Departments

### **Problem Statement**

You have a PySpark DataFrame containing employee salaries across departments. Write a PySpark program to **rank employees within each department based on salary in descending order**.

### **Sample Input** (`employee_salaries`)

| emp\_id | dept | salary |
| ------- | ---- | ------ |
| 1       | HR   | 60000  |
| 2       | HR   | 75000  |
| 3       | HR   | 50000  |
| 4       | IT   | 90000  |
| 5       | IT   | 85000  |

### **Expected Output**

| emp\_id | dept | salary | rank |
| ------- | ---- | ------ | ---- |
| 2       | HR   | 75000  | 1    |
| 1       | HR   | 60000  | 2    |
| 3       | HR   | 50000  | 3    |
| 4       | IT   | 90000  | 1    |
| 5       | IT   | 85000  | 2    |

---

In [5]:
schema = StructType([
    StructField("emp_id", IntegerType(), True),
    StructField("dept", StringType(), True),
    StructField("salary", IntegerType(), True)
])

In [6]:
data = [
    (1, "HR", 60000),
    (2, "HR", 75000),
    (3, "HR", 50000),
    (4, "IT", 90000),
    (5, "IT", 85000),
]

In [8]:
df = spark.createDataFrame(
    data,
    schema=schema
)

In [14]:
w = W.partitionBy(F.col("dept")).orderBy(F.col('salary').desc())

In [15]:
df = df.withColumn(
    'rank',
    F.dense_rank().over(w)
)


df.show()

+------+----+------+----+
|emp_id|dept|salary|rank|
+------+----+------+----+
|     2|  HR| 75000|   1|
|     1|  HR| 60000|   2|
|     3|  HR| 50000|   3|
|     4|  IT| 90000|   1|
|     5|  IT| 85000|   2|
+------+----+------+----+



In [None]:
df.toPandas()