In [0]:
from pyspark.sql import SparkSession
from pyspark.sql.window import Window
from pyspark.sql import functions as F

In [0]:
#spark session
spark = SparkSession.builder.appName("DataFrameOperations").getOrCreate()

In [0]:
file_path = "/FileStore/tables/Employees-3.csv"
df = spark.read.csv(file_path, header=True, inferSchema=True)

In [0]:
# Group by "category" and calculate the average of "value"
grouped_df = df.groupBy("gender").agg(F.avg("salary").alias("avg_value"))

# Create a window specification for the window function
window_spec = Window.partitionBy("gender").orderBy("salary")

# Calculate the row number within each category
df_with_row_number = df.withColumn("row_number", F.row_number().over(window_spec))

# Filter rows where the row number is less than or equal to 3
filtered_df = df_with_row_number.filter(F.col("row_number") <= 3)

In [0]:
filtered_df.createOrReplaceTempView("temp_view")

In [0]:
# Filter using SQL
spark.sql("SELECT * FROM temp_view WHERE salary > 500000").show()

# Group by using SQL
spark.sql("SELECT emp_name, AVG(salary) as avg_value FROM temp_view GROUP BY country, emp_name").show()

# Window function using SQL
spark.sql("SELECT *, ROW_NUMBER() OVER (PARTITION BY country ORDER BY salary) as row_number FROM temp_view").show()


+------+--------+------+---------+------+----------+
|emp_id|emp_name|gender|  country|salary|row_number|
+------+--------+------+---------+------+----------+
|  1003|   Ayush|  Male|    India|600000|         2|
|  1005|     Sam|  Male|Australia|700000|         3|
+------+--------+------+---------+------+----------+

+--------+---------+
|emp_name|avg_value|
+--------+---------+
|     Sam| 700000.0|
|  Pranay| 500000.0|
|  Ritika| 450000.0|
|   Ayush| 600000.0|
|Priyanka| 400000.0|
+--------+---------+

+------+--------+-------+---------+------+----------+----------+
|emp_id|emp_name| gender|  country|salary|row_number|row_number|
+------+--------+-------+---------+------+----------+----------+
|  1005|     Sam|   Male|Australia|700000|         3|         1|
|  1004|  Ritika|Female |  England|450000|         2|         1|
|  1001|  Pranay|   Male|    India|500000|         1|         1|
|  1003|   Ayush|   Male|    India|600000|         2|         2|
|  1002|Priyanka|Female |      USA|4