In [0]:
from pyspark.sql import SparkSession
spark = SparkSession.builder.appName('SparkByExamples.com').getOrCreate()

from pyspark.sql.functions import expr
#Concatenate columns
data=[("James","Bond"),("Scott","Varsa")] 
df=spark.createDataFrame(data).toDF("col1","col2") 
df.withColumn("Name",expr(" col1 ||','|| col2")).show()

#Using CASE WHEN sql expression
data = [("James","M"),("Michael","F"),("Jen","")]
columns = ["name","gender"]
df = spark.createDataFrame(data = data, schema = columns)
df2 = df.withColumn("gender", expr("CASE WHEN gender = 'M' THEN 'Male' " +
           "WHEN gender = 'F' THEN 'Female' ELSE 'unknown' END"))
df2.show()

#Add months from a value of another column
data=[("2019-01-23",1),("2019-06-24",2),("2019-09-20",3)] 
df=spark.createDataFrame(data).toDF("date","increment") 
df.select(df.date,df.increment,
     expr("add_months(date,increment)")
  .alias("inc_date")).show()

# Providing alias using 'as'
df.select(df.date,df.increment,
     expr("""add_months(date,increment) as inc_date""")
  ).show()

# Add
df.select(df.date,df.increment,
     expr("increment + 5 as new_increment")
  ).show()

df.select("increment",expr("cast(increment as string) as str_increment")) \
  .printSchema()
#Use expr()  to filter the rows
data=[(100,2),(200,3000),(500,500)] 
df=spark.createDataFrame(data).toDF("col1","col2") 
df.filter(expr("col1 == col2")).show()

+-----+-----+-----------+
| col1| col2|       Name|
+-----+-----+-----------+
|James| Bond| James,Bond|
|Scott|Varsa|Scott,Varsa|
+-----+-----+-----------+

+-------+-------+
|   name| gender|
+-------+-------+
|  James|   Male|
|Michael| Female|
|    Jen|unknown|
+-------+-------+

+----------+---------+----------+
|      date|increment|  inc_date|
+----------+---------+----------+
|2019-01-23|        1|2019-02-23|
|2019-06-24|        2|2019-08-24|
|2019-09-20|        3|2019-12-20|
+----------+---------+----------+

+----------+---------+----------+
|      date|increment|  inc_date|
+----------+---------+----------+
|2019-01-23|        1|2019-02-23|
|2019-06-24|        2|2019-08-24|
|2019-09-20|        3|2019-12-20|
+----------+---------+----------+

+----------+---------+-------------+
|      date|increment|new_increment|
+----------+---------+-------------+
|2019-01-23|        1|            6|
|2019-06-24|        2|            7|
|2019-09-20|        3|            8|
+----------+----

In [0]:
#Concatenate columns: This section creates a DataFrame df with two columns 'col1' and 'col2', and adds a new column 'Name' by concatenating 'col1' and 'col2' using the expr function. The resulting DataFrame is displayed using df.withColumn("Name",expr(" col1 ||','|| col2")).show().

#Using CASE WHEN SQL expression: This section creates a DataFrame df with columns 'name' and 'gender'. It uses the expr function and SQL CASE WHEN expression to transform the 'gender' column values. The resulting DataFrame is displayed using df2.show().

#Add months from a value of another column: This section creates a DataFrame df with columns 'date' and 'increment'. It adds a new column 'inc_date' by using the expr function and applying the add_months function to the 'date' column with the value from the 'increment' column. The resulting DataFrame is displayed using df.select().

#Providing alias using 'as': This section demonstrates the usage of 'as' to provide an alias for the newly added column. It shows an alternative way to write the previous expression with the alias using expr function and select.

#Add: This section adds a new column 'new_increment' by adding a constant value (5) to the 'increment' column using the expr function. The resulting DataFrame is displayed using df.select().

#Using expr() to filter rows: This section filters the rows in the DataFrame based on the condition specified in expr. It selects the rows where 'col1' is equal to 'col2' using df.filter(expr("col1 == col2")). The resulting DataFrame is displayed using show().

#These examples showcase the flexibility of PySpark's expr function for performing column-based computations, transformations, and filtering operations within DataFrames.