In [0]:
from pyspark.sql import SparkSession
from pyspark.sql.functions import col

# Step 1: Create SparkSession
spark = SparkSession.builder.appName("ColumnFunctionsExample").getOrCreate()

# Step 2: Sample DataFrame
data = [
    ("Priya", "25", "India", 4000),
    ("Aryan", "30", None, 5000),
    ("Kavya", "28", "USA", 6000),
    ("Ram", None, "UK", 4500)
]
df = spark.createDataFrame(data, ["name", "age", "country", "salary"])

# Step 3: Use column functions
df_transformed = df.select(
    col("name").alias("full_name"),                  # rename column
    col("age").cast("int").alias("age_int"),         # convert age from string to int
    col("country").isNotNull().alias("has_country"), # check for nulls
    col("name").contains("ya").alias("has_ya"),      # contains "ya" in name
    col("name").startswith("P").alias("starts_with_P"), # name starts with P
    (col("salary") + 1000).alias("updated_salary")   # add 1000 to salary
)

# Step 4: Show results
df.show()
df.printSchema()
df_transformed.show()
df_transformed.printSchema()


+-----+----+-------+------+
| name| age|country|salary|
+-----+----+-------+------+
|Priya|  25|  India|  4000|
|Aryan|  30|   NULL|  5000|
|Kavya|  28|    USA|  6000|
|  Ram|NULL|     UK|  4500|
+-----+----+-------+------+

root
 |-- name: string (nullable = true)
 |-- age: string (nullable = true)
 |-- country: string (nullable = true)
 |-- salary: long (nullable = true)

+---------+-------+-----------+------+-------------+--------------+
|full_name|age_int|has_country|has_ya|starts_with_P|updated_salary|
+---------+-------+-----------+------+-------------+--------------+
|    Priya|     25|       true|  true|         true|          5000|
|    Aryan|     30|      false|  true|        false|          6000|
|    Kavya|     28|       true|  true|        false|          7000|
|      Ram|   NULL|       true| false|        false|          5500|
+---------+-------+-----------+------+-------------+--------------+

root
 |-- full_name: string (nullable = true)
 |-- age_int: integer (nullable 