## Missing Value

In [5]:
import numpy as np
from pandas import Series, DataFrame
import pandas as pd

In [6]:
#Creatinng a series
data = Series(['one', 'four', np.nan, 'two'])

In [7]:
data

0     one
1    four
2     NaN
3     two
dtype: object

In [8]:
data.isnull()

0    False
1    False
2     True
3    False
dtype: bool

In [9]:
data.dropna()

0     one
1    four
3     two
dtype: object

In [14]:
# Let's see how to handle missing values in dataframe
# create a dataframe first

df = pd.DataFrame([[1,2,3], [np.nan,5,6], [7,np.nan,9], [np.nan, np.nan, np.nan]])
df

Unnamed: 0,0,1,2
0,1.0,2.0,3.0
1,,5.0,6.0
2,7.0,,9.0
3,,,


In [15]:
# If we did not specify how to drop, pandas will drop anywhere as long as there is at least one null value
df.dropna(how = 'all')

Unnamed: 0,0,1,2
0,1.0,2.0,3.0
1,,5.0,6.0
2,7.0,,9.0


In [17]:
df.dropna(axis =1)

0
1
2
3


In [19]:
# Let's see how to drop at least n data points by setting up a threshold
# create a new dateframe called df2

df2 = DataFrame([[1,2,3,np.nan], [2,np.nan,5,6], [np.nan,7,np.nan,8],[9,np.nan,np.nan,np.nan]])
df2

Unnamed: 0,0,1,2,3
0,1.0,2.0,3.0,
1,2.0,,5.0,6.0
2,,7.0,,8.0
3,9.0,,,


In [20]:
# drop null when there are two missing points
df2.dropna(thresh=2)

Unnamed: 0,0,1,2,3
0,1.0,2.0,3.0,
1,2.0,,5.0,6.0
2,,7.0,,8.0


In [21]:
# try thresh =3
df2.dropna(thresh=3)

Unnamed: 0,0,1,2,3
0,1.0,2.0,3.0,
1,2.0,,5.0,6.0


In [22]:
df2

Unnamed: 0,0,1,2,3
0,1.0,2.0,3.0,
1,2.0,,5.0,6.0
2,,7.0,,8.0
3,9.0,,,


In [24]:
# fill null value with 1
df2.fillna(1)

Unnamed: 0,0,1,2,3
0,1.0,2.0,3.0,1.0
1,2.0,1.0,5.0,6.0
2,1.0,7.0,1.0,8.0
3,9.0,1.0,1.0,1.0


In [25]:
# fill null to specific column with specific value by passing a dictionary
df2.fillna({0:0, 1:1, 2:2, 3:3})

Unnamed: 0,0,1,2,3
0,1.0,2.0,3.0,3.0
1,2.0,1.0,5.0,6.0
2,0.0,7.0,2.0,8.0
3,9.0,1.0,2.0,3.0


In [26]:
df2

Unnamed: 0,0,1,2,3
0,1.0,2.0,3.0,
1,2.0,,5.0,6.0
2,,7.0,,8.0
3,9.0,,,


#### Notice: Although we have modified our dataframe by dropping or filling with na, however, these modification did not affect on our original dataframe permenantly. So we either save it after modifying, or pass inplace parameter.

In [29]:
# save it like following
# df2 = df2.fillna(0)

# or pass inplace parameter equals True
df2.fillna(0, inplace = True)
df2

Unnamed: 0,0,1,2,3
0,1.0,2.0,3.0,0.0
1,2.0,0.0,5.0,6.0
2,0.0,7.0,0.0,8.0
3,9.0,0.0,0.0,0.0
