# Working with Missing Datas in Pandas

**Checking Missing Values in Pandas**

Pandas provides two important functions to help in detecting whether a value is NaN:

* Using isnull()

* Using notnull()

In [6]:
#using isnull()
import pandas as pd
import numpy as np

dic={'First Score':[100,90,np.nan,99],
     'Second Score':[80,np.nan,90,85],
     'Third Score':[82,79,88,85]}
df=pd.DataFrame(dic)
missing_val=df.isnull()
missing_val

Unnamed: 0,First Score,Second Score,Third Score
0,False,False,False
1,False,True,False
2,True,False,False
3,False,False,False


In [5]:
#using notnull()

dic={'First Score':[100,90,np.nan,99],
     'Second Score':[80,np.nan,90,85],
     'Third Score':[82,79,88,85]}
df=pd.DataFrame(dic)
non_miss_val=df.notnull()
non_miss_val

Unnamed: 0,First Score,Second Score,Third Score
0,True,True,True
1,True,False,True
2,False,True,True
3,True,True,True


# Filling missing values in Pandas

* Using fillna()

* Using ffill()

* Using bfill()

* Using replace()

In [9]:
#using fillna() to fill missing values with Zero

import pandas as pd
import numpy as np

dic={'First Score':[100,90,np.nan,99],
     'Second Score':[80,np.nan,90,85],
     'Third Score':[82,79,88,85]}
df.fillna(0)

Unnamed: 0,First Score,Second Score,Third Score
0,100.0,80.0,82
1,90.0,0.0,79
2,0.0,90.0,88
3,99.0,85.0,85


In [11]:
# using ffill() to fill missing values with Previous Value(forward fill)
df.ffill()

Unnamed: 0,First Score,Second Score,Third Score
0,100.0,80.0,82
1,90.0,80.0,79
2,90.0,90.0,88
3,99.0,85.0,85


In [12]:
# using bfill()  to fill missing values with Next Value(Backward fill)
df.bfill()

Unnamed: 0,First Score,Second Score,Third Score
0,100.0,80.0,82
1,90.0,90.0,79
2,99.0,90.0,88
3,99.0,85.0,85


In [18]:
#fill NaN values for specific column

dic={'Name':['Neema','Hari','Sushmita'],
     'Gender':['F',np.nan,'F']}
df=pd.DataFrame(dic)
df.fillna({'Gender':'No Gender'}, inplace=True)
df

Unnamed: 0,Name,Gender
0,Neema,F
1,Hari,No Gender
2,Sushmita,F


In [25]:
#using replace()

dic={'Name':['Neema','Hari',np.nan],
     'Gender':['F',np.nan,'F']}
df=pd.DataFrame(dic)
df.replace(to_replace=np.nan,value='Replaced',inplace=True)
df

Unnamed: 0,Name,Gender
0,Neema,F
1,Hari,Replaced
2,Replaced,F


#Drop the missing values

In [31]:
#using dropna() for rows with missing values
dic={'Name':['Neema','Hari',np.nan,np.nan],
     'Gender':['F',np.nan,'F',np.nan],
     'Age':[21,25,22,26]}
df=pd.DataFrame(dic)
df.dropna(inplace=True) #drops rows with missing values
df

Unnamed: 0,Name,Gender,Age
0,Neema,F,21


In [32]:
#using dropna() for column with missing values
dic={'Name':['Neema','Hari',np.nan,np.nan],
     'Gender':['F',np.nan,'F',np.nan],
     'Age':[21,25,22,26]}
df=pd.DataFrame(dic)
df.dropna(axis=1,inplace=True)
df

Unnamed: 0,Age
0,21
1,25
2,22
3,26
