In [1]:
import numpy as np
from pandas import Series, DataFrame
import pandas as pd

In [2]:
data = Series(['one','two',np.nan,'four'])

In [3]:
data

0     one
1     two
2     NaN
3    four
dtype: object

In [4]:
data.isnull()

0    False
1    False
2     True
3    False
dtype: bool

In [5]:
#Dropping null values
data.dropna()

0     one
1     two
3    four
dtype: object

In [6]:
#Dropping null values in a DataFrame
dframe = DataFrame([[1,2,3],[np.nan,5,6],[7,np.nan,9],[np.nan,np.nan,np.nan]])

dframe

Unnamed: 0,0,1,2
0,1.0,2.0,3.0
1,,5.0,6.0
2,7.0,,9.0
3,,,


In [7]:
clean_dframe = dframe.dropna()

In [8]:
clean_dframe

Unnamed: 0,0,1,2
0,1.0,2.0,3.0


Any row that has a null value GETS DROPPED if dropna is used on a dataframe

In [9]:
#Only drops rows if all rows is NaN
dframe.dropna(how='all')

Unnamed: 0,0,1,2
0,1.0,2.0,3.0
1,,5.0,6.0
2,7.0,,9.0


In [10]:
#Drop column if it contains a null value
dframe.dropna(axis=1)

0
1
2
3


In [11]:
#Threshold missing data (ex: Rows with at least 3 data points)
npn = np.nan
dframe2 = DataFrame([[1,2,3,npn],[2,npn,5,6],[npn,7,npn,9],[1,npn,npn,npn]])

In [12]:
dframe2

Unnamed: 0,0,1,2,3
0,1.0,2.0,3.0,
1,2.0,,5.0,6.0
2,,7.0,,9.0
3,1.0,,,


In [13]:
#Drops rows that don't have a minimum THRESHOLD of 2 datapoints
dframe2.dropna(thresh=2)

Unnamed: 0,0,1,2,3
0,1.0,2.0,3.0,
1,2.0,,5.0,6.0
2,,7.0,,9.0


In [14]:
dframe2.dropna(thresh=3)

Unnamed: 0,0,1,2,3
0,1.0,2.0,3.0,
1,2.0,,5.0,6.0


In [15]:
dframe2

Unnamed: 0,0,1,2,3
0,1.0,2.0,3.0,
1,2.0,,5.0,6.0
2,,7.0,,9.0
3,1.0,,,


In [16]:
#Fill null values instead of drop
dframe2.fillna(1)

Unnamed: 0,0,1,2,3
0,1.0,2.0,3.0,1.0
1,2.0,1.0,5.0,6.0
2,1.0,7.0,1.0,9.0
3,1.0,1.0,1.0,1.0


In [17]:
dframe2

Unnamed: 0,0,1,2,3
0,1.0,2.0,3.0,
1,2.0,,5.0,6.0
2,,7.0,,9.0
3,1.0,,,


In [18]:
#Fill NaN values specifically for different rows
dframe2.fillna({0:0,1:1,2:2,3:3})

Unnamed: 0,0,1,2,3
0,1.0,2.0,3.0,3.0
1,2.0,1.0,5.0,6.0
2,0.0,7.0,2.0,9.0
3,1.0,1.0,2.0,3.0


In [19]:
#Modify existing object
#dframe2 = dframe2.fillna()
#Modify IN PLACE
dframe2.fillna(0,inplace=True)

In [20]:
dframe2

Unnamed: 0,0,1,2,3
0,1.0,2.0,3.0,0.0
1,2.0,0.0,5.0,6.0
2,0.0,7.0,0.0,9.0
3,1.0,0.0,0.0,0.0
