In [1]:
from pyspark.sql import SparkSession
from pyspark.sql.functions import col

In [2]:
spark = SparkSession.builder \
    .master("local[1]") \
    .appName("SparkByExamples.com") \
    .getOrCreate()

data = [
    ("James", None, "M"),
    ("Anna", "NY", "F"),
    ("Julia", None, None)
]

columns = ["name", "state", "gender"]
df = spark.createDataFrame(data, columns)
df.show()

+-----+-----+------+
| name|state|gender|
+-----+-----+------+
|James| null|     M|
| Anna|   NY|     F|
|Julia| null|  null|
+-----+-----+------+



## Filter Rows with NULL Values in DataFrame

In [3]:
print('Filter Rows with NULL Values in DataFrame')
df.filter('state IS NULL').show()
df.filter(df.state.isNull()).show()
df.filter(col('state').isNull()).show()

Filter Rows with NULL Values in DataFrame
+-----+-----+------+
| name|state|gender|
+-----+-----+------+
|James| null|     M|
|Julia| null|  null|
+-----+-----+------+

+-----+-----+------+
| name|state|gender|
+-----+-----+------+
|James| null|     M|
|Julia| null|  null|
+-----+-----+------+

+-----+-----+------+
| name|state|gender|
+-----+-----+------+
|James| null|     M|
|Julia| null|  null|
+-----+-----+------+



## Filter Rows with NULL on multiple columns

In [4]:
print('Filter Rows with NULL on multiple columns')
df.filter('state IS NULL AND gender IS NULL').show()
df.filter(df.state.isNull() & df.gender.isNull()).show()

Filter Rows with NULL on multiple columns
+-----+-----+------+
| name|state|gender|
+-----+-----+------+
|Julia| null|  null|
+-----+-----+------+

+-----+-----+------+
| name|state|gender|
+-----+-----+------+
|Julia| null|  null|
+-----+-----+------+



## Filter Rows with IS NOT NULL or isNotNull

In [5]:
print('Filter Rows with IS NOT NULL or isNotNull')
df.filter('state IS NOT NULL').show()
df.filter('NOT state IS NULL').show()
df.filter(df.state.isNotNull()).show()
df.filter(col('state').isNotNull()).show()
df.na.drop(subset=["state"]).show()

Filter Rows with IS NOT NULL or isNotNull
+----+-----+------+
|name|state|gender|
+----+-----+------+
|Anna|   NY|     F|
+----+-----+------+

+----+-----+------+
|name|state|gender|
+----+-----+------+
|Anna|   NY|     F|
+----+-----+------+

+----+-----+------+
|name|state|gender|
+----+-----+------+
|Anna|   NY|     F|
+----+-----+------+

+----+-----+------+
|name|state|gender|
+----+-----+------+
|Anna|   NY|     F|
+----+-----+------+

+----+-----+------+
|name|state|gender|
+----+-----+------+
|Anna|   NY|     F|
+----+-----+------+



## SQL Filter Rows with NULL Values

In [6]:
print('SQL Filter Rows with NULL Values')
df.createOrReplaceTempView("DATA")
spark.sql("SELECT * FROM DATA where STATE IS NULL").show()
spark.sql("SELECT * FROM DATA where STATE IS NULL AND GENDER IS NULL").show()
spark.sql("SELECT * FROM DATA where STATE IS NOT NULL").show()

SQL Filter Rows with NULL Values
+-----+-----+------+
| name|state|gender|
+-----+-----+------+
|James| null|     M|
|Julia| null|  null|
+-----+-----+------+

+-----+-----+------+
| name|state|gender|
+-----+-----+------+
|Julia| null|  null|
+-----+-----+------+

+----+-----+------+
|name|state|gender|
+----+-----+------+
|Anna|   NY|     F|
+----+-----+------+

