In [0]:
from pyspark.sql.functions import col

data = [("Ram",3000), ("Sham", 4000), ("Balram", 5000)]
columns = ["name", "salary"]
df = spark.createDataFrame(data, columns)

df = df.withColumn("bonus", col("salary") * 0.10)

df.show()


+------+------+-----+
|  name|salary|bonus|
+------+------+-----+
|   Ram|  3000|300.0|
|  Sham|  4000|400.0|
|Balram|  5000|500.0|
+------+------+-----+



In [0]:
df = df.withColumn("salary", col("salary") * 80) # modifying col 
df.show()


+------+------+-----+
|  name|salary|bonus|
+------+------+-----+
|   Ram|240000|300.0|
|  Sham|320000|400.0|
|Balram|400000|500.0|
+------+------+-----+



In [0]:
from pyspark.sql.types import *

df = df.withColumn("salary", col("salary").cast(DoubleType()))
df.printSchema()


root
 |-- name: string (nullable = true)
 |-- salary: double (nullable = true)
 |-- bonus: double (nullable = true)



In [0]:
df = df.withColumn("details", concat(col("name"), lit(" earns ₹"), col("salary")))
df.show(truncate=False)  

+------+--------+-----+----------------------+
|name  |salary  |bonus|details               |
+------+--------+-----+----------------------+
|Ram   |240000.0|300.0|Ram earns ₹240000.0   |
|Sham  |320000.0|400.0|Sham earns ₹320000.0  |
|Balram|400000.0|500.0|Balram earns ₹400000.0|
+------+--------+-----+----------------------+



In [0]:
from pyspark.sql.functions import *

df = df.withColumn("category",
                   when(col("salary") > 4000, "High")
                   .otherwise("Low"))
df.show()

+------+--------+-----+--------------------+--------+
|  name|  salary|bonus|             details|category|
+------+--------+-----+--------------------+--------+
|   Ram|240000.0|300.0| Ram earns ₹240000.0|    High|
|  Sham|320000.0|400.0|Sham earns ₹320000.0|    High|
|Balram|400000.0|500.0|Balram earns ₹400...|    High|
+------+--------+-----+--------------------+--------+



In [0]:
df.select(col("name").alias("employee_name")).show()


+-------------+
|employee_name|
+-------------+
|          Ram|
|         Sham|
|       Balram|
+-------------+



In [0]:
df.where(col("salary") > 24000.0)


Out[22]: DataFrame[name: string, salary: double, bonus: double, details: string, category: string]