**Working with missing values**

In [1]:
# import the libraries
import pandas as pd
import numpy as np

df = pd.DataFrame(np.random.randn(5, 3), index=['a', 'c', 'e', 'f', 'h'],columns=['col1', 'col2', 'col3'])

In [2]:
df = df.reindex(['a', 'b', 'c', 'd', 'e', 'f', 'g', 'h'])
df

Unnamed: 0,col1,col2,col3
a,0.460156,-1.081714,-2.747526
b,,,
c,0.352825,-0.841815,-0.096485
d,,,
e,0.473043,0.101894,0.219936
f,-0.805117,0.135705,-0.163069
g,,,
h,-0.922018,-0.019957,-0.474301


**Check for Missing Values**

In [3]:
df['col1'].isnull()

a    False
b     True
c    False
d     True
e    False
f    False
g     True
h    False
Name: col1, dtype: bool

**Cleaning / Filling Missing Data**

**1. Replace "NaN" with "0":**

In [4]:
df = pd.DataFrame(np.random.randn(3, 3), index=['a', 'c', 'e'],columns=['one', 'two', 'three'])

In [5]:
df = df.reindex(['a', 'b', 'c'])
df

Unnamed: 0,one,two,three
a,-0.125501,0.542916,0.244537
b,,,
c,1.950167,-0.532046,-0.521901


In [6]:
print ("NaN replaced with '0':")
df.fillna(0)

NaN replaced with '0':


Unnamed: 0,one,two,three
a,-0.125501,0.542916,0.244537
b,0.0,0.0,0.0
c,1.950167,-0.532046,-0.521901


**2. Replace "NaN" with Mean:**

In [9]:
df.fillna(df.mean())

Unnamed: 0,one,two,three
a,-0.125501,0.542916,0.244537
b,0.912333,0.005435,-0.138682
c,1.950167,-0.532046,-0.521901


**3. Drop Missing Values:**

In [10]:
df = pd.DataFrame(np.random.randn(5, 3), index=['a', 'c', 'e', 'f', 'h'],columns=['col1', 'col2', 'col3'])
df = df.reindex(['a', 'b', 'c', 'd', 'e', 'f', 'g', 'h'])
df

Unnamed: 0,col1,col2,col3
a,-0.616931,1.058241,1.799449
b,,,
c,0.1554,0.203395,0.23087
d,,,
e,-0.718872,0.399152,-0.822284
f,0.937365,0.28256,-1.11996
g,,,
h,1.179591,0.452233,0.475778


In [11]:
df.dropna()

Unnamed: 0,col1,col2,col3
a,-0.616931,1.058241,1.799449
c,0.1554,0.203395,0.23087
e,-0.718872,0.399152,-0.822284
f,0.937365,0.28256,-1.11996
h,1.179591,0.452233,0.475778
