[jupyter notebook](pandas-dataframe-null.ipynb)

# Find, remove or replace null values in a dataframe

In [1]:
import pandas as pd
import numpy as np

df = pd.DataFrame( { 'A': [ 'a', 'a', 'b', 'a', None, 'b', 'a', 'b'],
                     'B': [  1 ,  4 ,  np.nan ,  5 ,  5 ,  6 ,  3 ,  1 ],
                     'C': [  9 ,  np.nan ,  3 ,  5 ,  1 ,  np.nan ,  7 ,  2 ] } )
display( df )

Unnamed: 0,A,B,C
0,a,1.0,9.0
1,a,4.0,
2,b,,3.0
3,a,5.0,5.0
4,,5.0,1.0
5,b,6.0,
6,a,3.0,7.0
7,b,1.0,2.0


### pandas.DataFrame.isnull

In [2]:
display( df.isnull() )

Unnamed: 0,A,B,C
0,False,False,False
1,False,False,True
2,False,True,False
3,False,False,False
4,True,False,False
5,False,False,True
6,False,False,False
7,False,False,False


In [3]:
print( any( df.isnull() ) )

True


In [4]:
print( any( df['A'].isnull() ) )

True


In [5]:
print( all( df.isnull() ) )

True


In [6]:
display( df.loc[ ~df['A'].isnull() ] )

Unnamed: 0,A,B,C
0,a,1.0,9.0
1,a,4.0,
2,b,,3.0
3,a,5.0,5.0
5,b,6.0,
6,a,3.0,7.0
7,b,1.0,2.0


### pandas.isnull

In [7]:
print( pd.isnull( df[ 'A' ].iat[ 4 ] ) )

True


### pandas.DataFrame.dropna

In [8]:
display( df.dropna() )

Unnamed: 0,A,B,C
0,a,1.0,9.0
3,a,5.0,5.0
6,a,3.0,7.0
7,b,1.0,2.0


### pandas.DataFrame.fillna

In [9]:
# Fill with a constant value

display( df.fillna( 0 ) )

Unnamed: 0,A,B,C
0,a,1.0,9.0
1,a,4.0,0.0
2,b,0.0,3.0
3,a,5.0,5.0
4,0,5.0,1.0
5,b,6.0,0.0
6,a,3.0,7.0
7,b,1.0,2.0


In [10]:
# Forward fill with last not null value

display( df.fillna( method = 'ffill' ) )

Unnamed: 0,A,B,C
0,a,1.0,9.0
1,a,4.0,9.0
2,b,4.0,3.0
3,a,5.0,5.0
4,a,5.0,1.0
5,b,6.0,1.0
6,a,3.0,7.0
7,b,1.0,2.0


In [11]:
# Fill columns with different methods

df1 = df.copy()
df1[ 'A' ] = df1[ 'A' ].fillna( 'c' )
df1[ 'B' ] = df1[ 'B' ].fillna( method = 'ffill' )
df1[ 'C' ] = df1[ 'C' ].fillna( method = 'bfill' )
display( df1 )

Unnamed: 0,A,B,C
0,a,1.0,9.0
1,a,4.0,3.0
2,b,4.0,3.0
3,a,5.0,5.0
4,c,5.0,1.0
5,b,6.0,7.0
6,a,3.0,7.0
7,b,1.0,2.0


### pandas.DataFrame.interpolate

In [12]:
# Fill null values of column 'C' with inear interpolation

df1 = df.copy()
df1[ 'C' ] = df1[ 'C' ].interpolate( method = 'linear' )
display( df1 )

Unnamed: 0,A,B,C
0,a,1.0,9.0
1,a,4.0,6.0
2,b,,3.0
3,a,5.0,5.0
4,,5.0,1.0
5,b,6.0,4.0
6,a,3.0,7.0
7,b,1.0,2.0


In [13]:
# Fill null values with interpolation using index numerical values

df1 = df.copy()
df1 = df1.loc[ ~df['A'].isnull() ]
df1[ 'C' ] = df1[ 'C' ].interpolate( method = 'index' )
display( df1 )

Unnamed: 0,A,B,C
0,a,1.0,9.0
1,a,4.0,6.0
2,b,,3.0
3,a,5.0,5.0
5,b,6.0,6.333333
6,a,3.0,7.0
7,b,1.0,2.0
