In [1]:
import findspark

findspark.init()

from pyspark.sql import SparkSession

spark = SparkSession.builder.getOrCreate()

In [3]:
data = [(1,'Srikanth','Male',10000,None),\
        (2,'Akshainie','Female',20000,'IT'),
        (3,'Manvith',None,50000,'HR')]

schema = ['ID','Name',"Gender",'Salary','Dept']

df = spark.createDataFrame(data,schema)

df.show()

df.printSchema()

+---+---------+------+------+----+
| ID|     Name|Gender|Salary|Dept|
+---+---------+------+------+----+
|  1| Srikanth|  Male| 10000|null|
|  2|Akshainie|Female| 20000|  IT|
|  3|  Manvith|  null| 50000|  HR|
+---+---------+------+------+----+

root
 |-- ID: long (nullable = true)
 |-- Name: string (nullable = true)
 |-- Gender: string (nullable = true)
 |-- Salary: long (nullable = true)
 |-- Dept: string (nullable = true)



In [4]:
help(df.fillna)

Help on method fillna in module pyspark.sql.dataframe:

fillna(value, subset=None) method of pyspark.sql.dataframe.DataFrame instance
    Replace null values, alias for ``na.fill()``.
    :func:`DataFrame.fillna` and :func:`DataFrameNaFunctions.fill` are aliases of each other.
    
    .. versionadded:: 1.3.1
    
    Parameters
    ----------
    value : int, float, string, bool or dict
        Value to replace null values with.
        If the value is a dict, then `subset` is ignored and `value` must be a mapping
        from column name (string) to replacement value. The replacement value must be
        an int, float, boolean, or string.
    subset : str, tuple or list, optional
        optional list of column names to consider.
        Columns specified in subset that do not have matching data type are ignored.
        For example, if `value` is a string, and subset contains a non-string column,
        then the non-string column is simply ignored.
    
    Examples
    --------
 

In [5]:
df.fillna('Unknown').show()

+---+---------+-------+------+-------+
| ID|     Name| Gender|Salary|   Dept|
+---+---------+-------+------+-------+
|  1| Srikanth|   Male| 10000|Unknown|
|  2|Akshainie| Female| 20000|     IT|
|  3|  Manvith|Unknown| 50000|     HR|
+---+---------+-------+------+-------+



In [6]:
df.fillna('Unknown',['Gender']).show()

+---+---------+-------+------+----+
| ID|     Name| Gender|Salary|Dept|
+---+---------+-------+------+----+
|  1| Srikanth|   Male| 10000|null|
|  2|Akshainie| Female| 20000|  IT|
|  3|  Manvith|Unknown| 50000|  HR|
+---+---------+-------+------+----+



In [7]:
df.fillna('Unknown',['Gender','Dept']).show()

+---+---------+-------+------+-------+
| ID|     Name| Gender|Salary|   Dept|
+---+---------+-------+------+-------+
|  1| Srikanth|   Male| 10000|Unknown|
|  2|Akshainie| Female| 20000|     IT|
|  3|  Manvith|Unknown| 50000|     HR|
+---+---------+-------+------+-------+



In [9]:
df.na.fill('Unknown').show()

+---+---------+-------+------+-------+
| ID|     Name| Gender|Salary|   Dept|
+---+---------+-------+------+-------+
|  1| Srikanth|   Male| 10000|Unknown|
|  2|Akshainie| Female| 20000|     IT|
|  3|  Manvith|Unknown| 50000|     HR|
+---+---------+-------+------+-------+



In [10]:
df.na.fill('Unknow',['Gender']).show()

+---+---------+------+------+----+
| ID|     Name|Gender|Salary|Dept|
+---+---------+------+------+----+
|  1| Srikanth|  Male| 10000|null|
|  2|Akshainie|Female| 20000|  IT|
|  3|  Manvith|Unknow| 50000|  HR|
+---+---------+------+------+----+

