# If Then Else and Switch Pattern

In [3]:
from pyspark.sql import SparkSession
from pyspark.sql import functions as F

spark = SparkSession.builder.getOrCreate()
spark

## Use when otherwise Function on DataFrame

In [7]:
pdf = spark.createDataFrame(
    [
        (1, "c1"),
        (2, "c2"),
        (3, "c3"),
    ],
    ("id", "category")
)
pdf.show()

+---+--------+
| id|category|
+---+--------+
|  1|      c1|
|  2|      c2|
|  3|      c3|
+---+--------+



In [23]:
pdf.withColumn(
    "category_description",
    (
        F.when(pdf.category == "c1", "this is category 1")
        .when(pdf.category == "c2", "this is category 2")
        .otherwise("unknown category")
    )
).show()

+---+--------+--------------------+
| id|category|category_description|
+---+--------+--------------------+
|  1|      c1|  this is category 1|
|  2|      c2|  this is category 2|
|  3|      c3|    unknown category|
+---+--------+--------------------+



In [22]:
pdf.withColumn(
    "category_description",
    (
        F.when(pdf.category == "c1", "this is category 1")
        .when(pdf.category == "c2", "this is category 2")
        .otherwise(pdf.category)
    )
).show()

+---+--------+--------------------+
| id|category|category_description|
+---+--------+--------------------+
|  1|      c1|  this is category 1|
|  2|      c2|  this is category 2|
|  3|      c3|                  c3|
+---+--------+--------------------+



## Using CASE WHEN THEN ELSE END on PySpark SQL

In [21]:
pdf.createOrReplaceTempView("items")
sql = """
SELECT 
    id,
    category,
    CASE 
        WHEN category = 'c1' THEN 'this is category 1' 
        WHEN category = 'c2' THEN 'this is category 2' 
        ELSE 'unknown category' 
        END as category_description
FROM items
""".strip()
spark.sql(sql).show()

+---+--------+--------------------+
| id|category|category_description|
+---+--------+--------------------+
|  1|      c1|  this is category 1|
|  2|      c2|  this is category 2|
|  3|      c3|    unknown category|
+---+--------+--------------------+



## Multiple Conditions using & and | operator

In [18]:
pdf.withColumn(
    "category_description",
    (
        F.when(
            (F.col("category") == "c1") | (F.col("category") == "c2"),
            "known category",
        )
        .otherwise("unknown category")
    )
).show()

+---+--------+--------------------+
| id|category|category_description|
+---+--------+--------------------+
|  1|      c1|      known category|
|  2|      c2|      known category|
|  3|      c3|    unknown category|
+---+--------+--------------------+

