In [1]:
### Create a DataFrame and Reindex
import pandas as pd
import numpy as np

# Create a DataFrame
df = pd.DataFrame(np.random.randn(5, 3), 
                  index=['a', 'c', 'e', 'f', 'h'], 
                  columns=['one', 'two', 'three'])

# Reindex the DataFrame
df = df.reindex(['a', 'b', 'c', 'd', 'e', 'f', 'g', 'h'])

print(df)

        one       two     three
a  0.165155  0.932365 -0.042613
b       NaN       NaN       NaN
c  0.036699  0.492256  2.497011
d       NaN       NaN       NaN
e -1.962122 -0.051650  1.399697
f -0.830838  0.157540  0.278874
g       NaN       NaN       NaN
h  1.105890 -1.128467  1.486829


In [2]:
### Check for Missing Values
### Example 1: Check for Null Values
import pandas as pd
import numpy as np

df = pd.DataFrame(np.random.randn(5, 3), 
                  index=['a', 'c', 'e', 'f', 'h'], 
                  columns=['one', 'two', 'three'])

df = df.reindex(['a', 'b', 'c', 'd', 'e', 'f', 'g', 'h'])

print(df['one'].isnull())


a    False
b     True
c    False
d     True
e    False
f    False
g     True
h    False
Name: one, dtype: bool


In [3]:
### Example 2: Check for Non-Null Values
import pandas as pd
import numpy as np

df = pd.DataFrame(np.random.randn(5, 3), 
                  index=['a', 'c', 'e', 'f', 'h'], 
                  columns=['one', 'two', 'three'])

df = df.reindex(['a', 'b', 'c', 'd', 'e', 'f', 'g', 'h'])

print(df['one'].notnull())

a     True
b    False
c     True
d    False
e     True
f     True
g    False
h     True
Name: one, dtype: bool


In [4]:
### Calculations with Missing Data
### Example 1: Sum of a Column
import pandas as pd
import numpy as np

df = pd.DataFrame(np.random.randn(5, 3), 
                  index=['a', 'c', 'e', 'f', 'h'], 
                  columns=['one', 'two', 'three'])

df = df.reindex(['a', 'b', 'c', 'd', 'e', 'f', 'g', 'h'])

print(df['one'].sum())

1.6874779755452707


In [5]:
### Example 2: Sum with Missing Values
import pandas as pd
import numpy as np

df = pd.DataFrame(index=[0, 1, 2, 3, 4, 5], columns=['one', 'two'])

print(df['one'].sum())

0


In [6]:
### Replace NaN with a Scalar Value
import pandas as pd
import numpy as np

df = pd.DataFrame(np.random.randn(3, 3), 
                  index=['a', 'c', 'e'], 
                  columns=['one', 'two', 'three'])

df = df.reindex(['a', 'b', 'c'])

print(df)
print("NaN replaced with '0':")
print(df.fillna(0))


        one       two     three
a -0.368186 -1.511181  0.967688
b       NaN       NaN       NaN
c -1.282333  1.027555 -0.585463
NaN replaced with '0':
        one       two     three
a -0.368186 -1.511181  0.967688
b  0.000000  0.000000  0.000000
c -1.282333  1.027555 -0.585463


In [7]:
### Fill NA Forward and Backward
### Example 1: Forward Fill
import pandas as pd
import numpy as np

df = pd.DataFrame(np.random.randn(5, 3), 
                  index=['a', 'c', 'e', 'f', 'h'], 
                  columns=['one', 'two', 'three'])

df = df.reindex(['a', 'b', 'c', 'd', 'e', 'f', 'g', 'h'])

print(df.fillna(method='pad'))

        one       two     three
a -0.303803 -0.624866  1.029964
b -0.303803 -0.624866  1.029964
c  0.645432  0.294801 -0.292823
d  0.645432  0.294801 -0.292823
e -0.482310 -0.188421 -0.721041
f -0.504310  1.633316  0.024969
g -0.504310  1.633316  0.024969
h  0.188459 -0.313155 -1.906799


  print(df.fillna(method='pad'))


In [8]:
### Example 2: Backward Fill
import pandas as pd
import numpy as np

df = pd.DataFrame(np.random.randn(5, 3), 
                  index=['a', 'c', 'e', 'f', 'h'], 
                  columns=['one', 'two', 'three'])

df = df.reindex(['a', 'b', 'c', 'd', 'e', 'f', 'g', 'h'])

print(df.fillna(method='backfill'))

        one       two     three
a -0.079356  1.273583  1.461806
b -2.178723 -0.561080 -1.343031
c -2.178723 -0.561080 -1.343031
d  0.070053  0.120466  1.445779
e  0.070053  0.120466  1.445779
f  1.423508 -0.707165  0.421545
g  1.268212 -0.300893  2.220439
h  1.268212 -0.300893  2.220439


  print(df.fillna(method='backfill'))


In [9]:
### Drop Missing Values
### Example 1: Drop Rows with Missing Values
import pandas as pd
import numpy as np

df = pd.DataFrame(np.random.randn(5, 3), 
                  index=['a', 'c', 'e', 'f', 'h'], 
                  columns=['one', 'two', 'three'])

df = df.reindex(['a', 'b', 'c', 'd', 'e', 'f', 'g', 'h'])

print(df.dropna())

        one       two     three
a -0.421312 -0.297195  0.315855
c -0.993886  0.325089 -1.442511
e  0.874641  0.353433 -0.035514
f  1.037967  0.442367  1.717675
h -1.523926 -0.096478  0.321625


In [11]:
#### Example 2: Drop Columns with Missing Values
import pandas as pd
import numpy as np

df = pd.DataFrame(np.random.randn(5, 3), 
                  index=['a', 'c', 'e', 'f', 'h'], 
                  columns=['one', 'two', 'three'])

df = df.reindex(['a', 'b', 'c', 'd', 'e', 'f', 'g', 'h'])

print(df.dropna(axis=1))

Empty DataFrame
Columns: []
Index: [a, b, c, d, e, f, g, h]


In [12]:
### Replace Missing (or Generic) Values
### Example 1: Replace Specific Values
import pandas as pd
import numpy as np

df = pd.DataFrame({'one': [10, 20, 30, 40, 50, 2000], 
                   'two': [1000, 0, 30, 40, 50, 60]})

print(df.replace({1000: 10, 2000: 60}))

   one  two
0   10   10
1   20    0
2   30   30
3   40   40
4   50   50
5   60   60


In [13]:
### Example 2: Replace Specific Values (Alternative)
import pandas as pd
import numpy as np

df = pd.DataFrame({'one': [10, 20, 30, 40, 50, 2000], 
                   'two': [1000, 0, 30, 40, 50, 60]})

print(df.replace({1000: 10, 2000: 60}))

   one  two
0   10   10
1   20    0
2   30   30
3   40   40
4   50   50
5   60   60
