In [9]:
from pyspark.sql import SparkSession
from pyspark.sql.functions import col, when

In [10]:
spark = SparkSession.builder \
    .appName("PySpark Transformations Example") \
    .getOrCreate()


In [11]:
data = [
    (1, "Alice", 34),
    (2, "Bob", 45),
    (3, "Cathy", 29),
    (4, "David", 31)
]

columns = ["id", "name", "age"]

In [12]:
df = spark.createDataFrame(data, schema=columns)

print("Original DataFrame:")
df.show()

filtered_df = df.filter(col("age") > 30)

transformed_df = filtered_df.withColumn(
    "age_group",
    when(col("age") > 40, "Senior").otherwise("Junior")
)

print("Transformed DataFrame:")
transformed_df.show()


Original DataFrame:
+---+-----+---+
| id| name|age|
+---+-----+---+
|  1|Alice| 34|
|  2|  Bob| 45|
|  3|Cathy| 29|
|  4|David| 31|
+---+-----+---+

Transformed DataFrame:
+---+-----+---+---------+
| id| name|age|age_group|
+---+-----+---+---------+
|  1|Alice| 34|   Junior|
|  2|  Bob| 45|   Senior|
|  4|David| 31|   Junior|
+---+-----+---+---------+



In [13]:

spark.stop()