In [0]:
"""
PySpark SQL functions lit() and typedLit() are used to add a new column to DataFrame by assigning a literal or constant value. Both these functions return Column type as return type.
"""


In [0]:

import pyspark
from pyspark.sql import SparkSession

spark = SparkSession.builder.appName('SparkByExamples.com').getOrCreate()
data = [("111",50000),("222",60000),("333",40000)]
columns= ["EmpId","Salary"]
df = spark.createDataFrame(data = data, schema = columns)
df.show(truncate= False)


+-----+------+
|EmpId|Salary|
+-----+------+
|111  |50000 |
|222  |60000 |
|333  |40000 |
+-----+------+



In [0]:
"""
PySpark lit() function is used to add constant or literal value as a new column to the DataFrame.
"""


from pyspark.sql.functions import col,lit

df2 = df.select(col("EmpId"),col("Salary"),lit("1").alias("lit_value1"))

df2.show(truncate=False)


+-----+------+----------+
|EmpId|Salary|lit_value1|
+-----+------+----------+
|111  |50000 |1         |
|222  |60000 |1         |
|333  |40000 |1         |
+-----+------+----------+



In [0]:
"""  lit() function with withColumn """


from pyspark.sql.functions import when, lit, col

#df3 = df2.withColumn("lit_value2", when((col("Salary") >=40000 & col("Salary") <= 50000),lit("100")).otherwise(lit("200")))


df3 = df2.withColumn("lit_value2",when((col("Salary")>=40000 ), lit("100")).otherwise(lit("200")))

df3.show(truncate=False)


+-----+------+----------+----------+
|EmpId|Salary|lit_value1|lit_value2|
+-----+------+----------+----------+
|111  |50000 |1         |100       |
|222  |60000 |1         |100       |
|333  |40000 |1         |100       |
+-----+------+----------+----------+



In [0]:

import pyspark
from pyspark.sql import SparkSession

spark = SparkSession.builder.appName('SparkByExamples.com').getOrCreate()

data = [("111",50000),("222",60000),("333",40000)]
columns= ["EmpId","Salary"]
df = spark.createDataFrame(data = data, schema = columns)

df.printSchema()
df.show(truncate=False)

from pyspark.sql.functions import col,lit
df2 = df.select(col("EmpId"),col("Salary"),lit("1").alias("lit_value1"))
df2.show(truncate=False)

from pyspark.sql.functions import when
df3 = df2.withColumn("lit_value2", when(col("Salary") >=40000 ,lit("100")).otherwise(lit("200")))
df3.show(truncate=False)


root
 |-- EmpId: string (nullable = true)
 |-- Salary: long (nullable = true)

+-----+------+
|EmpId|Salary|
+-----+------+
|111  |50000 |
|222  |60000 |
|333  |40000 |
+-----+------+

+-----+------+----------+
|EmpId|Salary|lit_value1|
+-----+------+----------+
|111  |50000 |1         |
|222  |60000 |1         |
|333  |40000 |1         |
+-----+------+----------+

+-----+------+----------+----------+
|EmpId|Salary|lit_value1|lit_value2|
+-----+------+----------+----------+
|111  |50000 |1         |100       |
|222  |60000 |1         |100       |
|333  |40000 |1         |100       |
+-----+------+----------+----------+

