In [None]:
import pandas as pd
import numpy as np

#Pandas has 4 ways as Checking Null values

In [None]:
pd.isnull(np.nan) #isnull is much preferable

In [None]:
pd.isnull(None)

In [None]:
pd.isna(np.nan)

In [None]:
pd.isna(None)

Checking for Not Null Values

In [None]:
pd.notnull(None)

In [None]:
pd.notna(np.nan)

False

In [None]:
pd.notnull(3)

True

In [None]:
pd.isnull(pd.Series([11,21,np.nan,33]))

0    False
1    False
2     True
3    False
dtype: bool

In [None]:
pd.notnull(pd.Series([11,21,np.nan,33]))

0     True
1     True
2    False
3     True
dtype: bool

Counting the Null Values

In [None]:
n = pd.Series([21,np.nan,98,np.nan,89,99,np.nan])

In [None]:
pd.isnull(n)

0    False
1     True
2    False
3     True
4    False
5    False
6     True
dtype: bool

In [None]:
pd.notnull(n)

0     True
1    False
2     True
3    False
4     True
5     True
6    False
dtype: bool

In [None]:
pd.isnull(n).sum() #Counting Null Values

3

In [None]:
pd.notnull(n).sum()

4

In [None]:
n[pd.notnull(n)] #Removing Null Values

0    21.0
2    98.0
4    89.0
5    99.0
dtype: float64

In [None]:
n[pd.isnull(n)] #Getting Null Values

1   NaN
3   NaN
6   NaN
dtype: float64

In [None]:
n.isnull()  #we can use as a method also

0    False
1     True
2    False
3     True
4    False
5    False
6     True
dtype: bool

In [None]:
n.notnull()

0     True
1    False
2     True
3    False
4     True
5     True
6    False
dtype: bool

Dropping Null Values

In [None]:
n.dropna()

0    21.0
2    98.0
4    89.0
5    99.0
dtype: float64

Everything what we have done above also works the same for Data Frames

#Handling Null Values in Data Frames

In [None]:
df = pd.DataFrame({
    'Column A': [1, np.nan, 30, np.nan],
    'Column B': [2, 8, 31, np.nan],
    'Column C': [np.nan, 9, 32, 100],
    'Column D': [5, 8, 34, 110],
})

In [None]:
df

Unnamed: 0,Column A,Column B,Column C,Column D
0,1.0,2.0,,5
1,,8.0,9.0,8
2,30.0,31.0,32.0,34
3,,,100.0,110


In [None]:
df.info()

<class 'pandas.core.frame.DataFrame'>
RangeIndex: 4 entries, 0 to 3
Data columns (total 4 columns):
 #   Column    Non-Null Count  Dtype  
---  ------    --------------  -----  
 0   Column A  2 non-null      float64
 1   Column B  3 non-null      float64
 2   Column C  3 non-null      float64
 3   Column D  4 non-null      int64  
dtypes: float64(3), int64(1)
memory usage: 256.0 bytes


In [None]:
df.isnull() # Also pd.isnull(df) will work

Unnamed: 0,Column A,Column B,Column C,Column D
0,False,False,True,False
1,True,False,False,False
2,False,False,False,False
3,True,True,False,False


In [None]:
df.isnull().sum()

Column A    2
Column B    1
Column C    1
Column D    0
dtype: int64

In [None]:
df.dropna()

Unnamed: 0,Column A,Column B,Column C,Column D
2,30.0,31.0,32.0,34


In [None]:
df.dropna(axis=1) #Gets the rows which doesn't have Null values 

Unnamed: 0,Column D
0,5
1,8
2,34
3,110


In [None]:
df.dropna(how='all') #If a column has all null values, then only it will remove

Unnamed: 0,Column A,Column B,Column C,Column D
0,1.0,2.0,,5
1,,8.0,9.0,8
2,30.0,31.0,32.0,34
3,,,100.0,110


In [None]:
df.dropna(how='any') #If a column has any null value in it,then it will remove that column

Unnamed: 0,Column A,Column B,Column C,Column D
2,30.0,31.0,32.0,34


In [None]:
df.dropna(thresh=3) #Removes a column having a value which is mentioned under the Thresh

Unnamed: 0,Column A,Column B,Column C,Column D
0,1.0,2.0,,5
1,,8.0,9.0,8
2,30.0,31.0,32.0,34


#Filling Missing Values

In [None]:
n

0    21.0
1     NaN
2    98.0
3     NaN
4    89.0
5    99.0
6     NaN
dtype: float64

In [None]:
n.fillna(0)

0    21.0
1     0.0
2    98.0
3     0.0
4    89.0
5    99.0
6     0.0
dtype: float64

In [None]:
n.fillna(n.mean())

0    21.00
1    76.75
2    98.00
3    76.75
4    89.00
5    99.00
6    76.75
dtype: float64

In [None]:
n.fillna(method='ffill') #Forward Fill

0    21.0
1    21.0
2    98.0
3    98.0
4    89.0
5    99.0
6    99.0
dtype: float64

In [None]:
n.fillna(method='bfill') #Backward Fill

0    21.0
1    98.0
2    98.0
3    89.0
4    89.0
5    99.0
6     NaN
dtype: float64

We have to make sure that there are no null values at starting or ending bcoz it fills NAN values in the place null values again and again

In [None]:
df.fillna(method='ffill') #Fills Vertically by Default

Unnamed: 0,Column A,Column B,Column C,Column D
0,1.0,2.0,,5
1,1.0,8.0,9.0,8
2,30.0,31.0,32.0,34
3,30.0,31.0,100.0,110


In [None]:
df.fillna(method='ffill', axis=1) #Fills Horizontally

Unnamed: 0,Column A,Column B,Column C,Column D
0,1.0,2.0,2.0,5.0
1,,8.0,9.0,8.0
2,30.0,31.0,32.0,34.0
3,,,100.0,110.0
