## 01-pyspark-sql-case-when.py

In [0]:
# 01-pyspark-sql-case-when.py
from pyspark.sql import SparkSession
spark = SparkSession.builder.appName('PySparkExamples').getOrCreate()
data = [("Jai", "M", 60000), ("Mohan", "M", 70000), ("Rohit", None, 400000),
        ("Maria", "F", 500000), ("Jaya", "", None)]

columns = ["name", "gender", "salary"]
df1 = spark.createDataFrame(data = data, schema = columns)
df1.show()

+-----+------+------+
| name|gender|salary|
+-----+------+------+
|  Jai|     M| 60000|
|Mohan|     M| 70000|
|Rohit|  null|400000|
|Maria|     F|500000|
| Jaya|      |  null|
+-----+------+------+



In [0]:
# Using When otherwise
from pyspark.sql.functions import when, col
df2 = df1.withColumn("new_gender", when(df1.gender == "M", "Male")
                                  .when(df1.gender == "F", "Female")
                                  .when(df1.gender.isNull() ,"Other")
                                  .otherwise("Unknown"))
df2.show()

+-----+------+------+----------+
| name|gender|salary|new_gender|
+-----+------+------+----------+
|  Jai|     M| 60000|      Male|
|Mohan|     M| 70000|      Male|
|Rohit|  null|400000|     Other|
|Maria|     F|500000|    Female|
| Jaya|      |  null|   Unknown|
+-----+------+------+----------+



In [0]:
df3 = df1.select(col("*"), when(df1.gender == "M", "Male")
                          .when(df1.gender == "F", "Female")
                          .when(df1.gender.isNull(), "Other")
                          .otherwise(df1.gender).alias("new_gender"))
df3.show()

+-----+------+------+----------+
| name|gender|salary|new_gender|
+-----+------+------+----------+
|  Jai|     M| 60000|      Male|
|Mohan|     M| 70000|      Male|
|Rohit|  null|400000|     Other|
|Maria|     F|500000|    Female|
| Jaya|      |  null|          |
+-----+------+------+----------+



In [0]:
# Using SQL Case When
from pyspark.sql.functions import expr
df3 = df2.withColumn("new_gender", expr("CASE WHEN gender = 'M' THEN 'Male' " + 
           "WHEN gender = 'F' THEN 'Female' WHEN gender IS NULL THEN 'Other'" +
           "ELSE gender END"))
df3.show()

+-----+------+------+----------+
| name|gender|salary|new_gender|
+-----+------+------+----------+
|  Jai|     M| 60000|      Male|
|Mohan|     M| 70000|      Male|
|Rohit|  null|400000|     Other|
|Maria|     F|500000|    Female|
| Jaya|      |  null|          |
+-----+------+------+----------+



In [0]:
df4 = df1.select(col("*"), expr("CASE WHEN gender = 'M' THEN 'Male' " +
           "WHEN gender = 'F' THEN 'Female' WHEN gender IS NULL THEN 'Other'" +
           "ELSE gender END").alias("new_gender"))
df4.show()

+-----+------+------+----------+
| name|gender|salary|new_gender|
+-----+------+------+----------+
|  Jai|     M| 60000|      Male|
|Mohan|     M| 70000|      Male|
|Rohit|  null|400000|     Other|
|Maria|     F|500000|    Female|
| Jaya|      |  null|          |
+-----+------+------+----------+



In [0]:
df1.createOrReplaceTempView("EMP")
spark.sql("select name, CASE WHEN gender = 'M' THEN 'Male' " + 
               "WHEN gender = 'F' THEN 'Female' WHEN gender IS NULL THEN 'Other'" +
               "ELSE gender END as new_gender from EMP").show()

+-----+----------+
| name|new_gender|
+-----+----------+
|  Jai|      Male|
|Mohan|      Male|
|Rohit|     Other|
|Maria|    Female|
| Jaya|          |
+-----+----------+

