In [0]:
# File location and type
file_location = "dbfs:/FileStore/tables/Person_Person.csv"
file_type = "csv"

# CSV options
infer_schema = "true"
first_row_is_header = "true"
delimiter = ","

In [0]:
# The applied options are for CSV files. For other file types, these will be ignored.
df = spark\
    .read\
    .format(file_type)\
    .option("inferSchema", infer_schema)\
    .option("header", first_row_is_header)\
    .option("sep", delimiter)\
    .load(file_location)

df.show(5)

+----------------+----------+---------+-----+---------+----------+----------+------+--------------+---------------------+--------------------+--------------------+--------------------+
|BusinessEntityID|PersonType|NameStyle|Title|FirstName|MiddleName|  LastName|Suffix|EmailPromotion|AdditionalContactInfo|        Demographics|             rowguid|        ModifiedDate|
+----------------+----------+---------+-----+---------+----------+----------+------+--------------+---------------------+--------------------+--------------------+--------------------+
|               1|        EM|        0| NULL|      Ken|         J|   Sánchez|  NULL|             0|                 NULL|"<IndividualSurve...|92c4279f-1207-48a...|2009-01-07 00:00:...|
|               2|        EM|        0| NULL|    Terri|       Lee|     Duffy|  NULL|             1|                 NULL|"<IndividualSurve...|d8763459-8aa8-47c...|2008-01-24 00:00:...|
|               3|        EM|        0| NULL|  Roberto|      NULL|Tamburell

In [0]:
# Method 1
# Using when() otherwise() on PySpark DataFrame

from pyspark.sql.functions import when, expr
df2 = df.withColumn("gender", when(df.Title == 'Mr.', "Male")
                             .when(df.Title == 'Ms.', "Female")
                             .when(df.Title == 'NULL', "Null")
                             .when(df.Title.isNull(), "Null")
                             .otherwise("Unknown"))
                                

In [0]:
# Method 2
# Using SQL CASE WHEN with expr()
# PySpark doesn’t have SQL Like CASE WHEN so in order to use this on PySpark DataFrame withColumn() or select(), you should use expr()

df3 = df.withColumn("gender", expr("CASE WHEN Title == 'Mr.' THEN 'Male' WHEN Title == 'Ms.' THEN 'Female' WHEN Title == 'NULL' THEN 'Null' ELSE 'unknow' END"))