### Handling Missing Data in Pandas  
**Author:** Taskeen Hussain  

### Pandas provides powerful tools to detect, handle, and impute missing data in datasets. Managing missing values effectively ensures cleaner and more reliable data for analysis.

In [2]:
# import the pandas library
import pandas as pd
import numpy as np

df = pd.DataFrame(np.random.randn(5, 3), index=['a', 'c', 'e', 'f',
'h'],columns=['one', 'two', 'three'])

df = df.reindex(['a', 'b', 'c', 'd', 'e', 'f', 'g', 'h'])

print (df)

        one       two     three
a  0.485127 -0.186803 -1.017166
b       NaN       NaN       NaN
c  0.127270  0.445707  0.741581
d       NaN       NaN       NaN
e -1.005846  1.580626 -1.275264
f -0.920170  0.309405  1.804177
g       NaN       NaN       NaN
h -0.440892 -0.706993 -0.128412


In [3]:
# ## Check for Missing Values


import pandas as pd
import numpy as np
 
df = pd.DataFrame(np.random.randn(5, 3), index=['a', 'c', 'e', 'f',
'h'],columns=['one', 'two', 'three'])

df = df.reindex(['a', 'b', 'c', 'd', 'e', 'f', 'g', 'h'])

print (df['one'].isnull())




a    False
b     True
c    False
d     True
e    False
f    False
g     True
h    False
Name: one, dtype: bool


In [4]:
df = pd.DataFrame(np.random.randn(5, 3), index=['a', 'c', 'e', 'f',
'h'],columns=['one', 'two', 'three'])

df = df.reindex(['a', 'b', 'c', 'd', 'e', 'f', 'g', 'h'])

print (df['one'].notnull())


a     True
b    False
c     True
d    False
e     True
f     True
g    False
h     True
Name: one, dtype: bool


In [5]:
# ## Calculations with Missing Data

import pandas as pd
import numpy as np

df = pd.DataFrame(np.random.randn(5, 3), index=['a', 'c', 'e', 'f',
'h'],columns=['one', 'two', 'three'])

df = df.reindex(['a', 'b', 'c', 'd', 'e', 'f', 'g', 'h'])

print (df['one'].sum())


1.7323413724031005


In [6]:
df = pd.DataFrame(index=[0,1,2,3,4,5],columns=['one','two'])
print (df['one'].sum())

0


In [7]:
# ## Replace NaN with a Scalar Value

import pandas as pd
import numpy as np

df = pd.DataFrame(np.random.randn(3, 3), index=['a', 'c', 'e'],columns=['one',
'two', 'three'])

df = df.reindex(['a', 'b', 'c'])

print (df)
print ("NaN replaced with '0':")
print (df.fillna(0))


        one       two     three
a  1.318009  0.687034  0.592998
b       NaN       NaN       NaN
c  1.838192  0.321317  0.429000
NaN replaced with '0':
        one       two     three
a  1.318009  0.687034  0.592998
b  0.000000  0.000000  0.000000
c  1.838192  0.321317  0.429000


In [9]:
# ## Fill NA Forward and Backward

df = pd.DataFrame(np.random.randn(5, 3), index=['a', 'c', 'e', 'f',
'h'],columns=['one', 'two', 'three'])

df = df.reindex(['a', 'b', 'c', 'd', 'e', 'f', 'g', 'h'])

print (df.fillna(method='pad'))


        one       two     three
a  1.640716  0.759067 -2.210660
b  1.640716  0.759067 -2.210660
c  1.627572  1.880448 -0.189955
d  1.627572  1.880448 -0.189955
e  0.420227  0.173211  0.080387
f  1.627205 -2.547934 -1.169289
g  1.627205 -2.547934 -1.169289
h  1.855382  1.042929  1.001885


  print (df.fillna(method='pad'))


In [10]:
df = pd.DataFrame(np.random.randn(5, 3), index=['a', 'c', 'e', 'f',
'h'],columns=['one', 'two', 'three'])

df = df.reindex(['a', 'b', 'c', 'd', 'e', 'f', 'g', 'h'])

print (df.fillna(method='backfill'))

        one       two     three
a -0.754309  0.531128 -0.404934
b  1.039203 -0.026742  0.394118
c  1.039203 -0.026742  0.394118
d -1.099509  0.342296 -0.162433
e -1.099509  0.342296 -0.162433
f  1.144312  0.725377 -1.243728
g  0.100177 -0.264645 -1.197288
h  0.100177 -0.264645 -1.197288


  print (df.fillna(method='backfill'))


In [11]:
# ## Drop Missing Values
import pandas as pd
import numpy as np

df = pd.DataFrame(np.random.randn(5, 3), index=['a', 'c', 'e', 'f',
'h'],columns=['one', 'two', 'three'])

df = df.reindex(['a', 'b', 'c', 'd', 'e', 'f', 'g', 'h'])
print (df.dropna())


        one       two     three
a  1.743495  0.351057 -1.013221
c -0.482599 -0.763688  1.100917
e  0.581027  0.640558  0.503216
f  0.828338 -1.177771 -0.350012
h -1.509471  0.363585 -0.342226


In [12]:
df = pd.DataFrame(np.random.randn(5, 3), index=['a', 'c', 'e', 'f',
'h'],columns=['one', 'two', 'three'])

df = df.reindex(['a', 'b', 'c', 'd', 'e', 'f', 'g', 'h'])
print (df.dropna(axis=1))

Empty DataFrame
Columns: []
Index: [a, b, c, d, e, f, g, h]


In [13]:
df = pd.DataFrame({'one':[10,20,30,40,50,2000], 'two':[1000,0,30,40,50,60]})

print (df.replace({1000:10,2000:60}))

   one  two
0   10   10
1   20    0
2   30   30
3   40   40
4   50   50
5   60   60


In [14]:
df = pd.DataFrame({'one':[10,20,30,40,50,2000], 'two':[1000,0,30,40,50,60]})
print (df.replace({1000:10,2000:60}))


   one  two
0   10   10
1   20    0
2   30   30
3   40   40
4   50   50
5   60   60
