In [1]:
import pandas as pd
import numpy as np

In [2]:
pd.isnull(np.nan)

True

In [3]:
pd.isna(None)

True

In [5]:
pd.isna(np.nan)

True

In [12]:
pd.notnull(np.nan)

False

In [7]:
series1 = pd.Series([1, 2, 3, 4, np.nan, np.nan, 5, 6])

In [9]:
series1[~pd.isnull(series1)]

4   NaN
5   NaN
dtype: float64

In [16]:
df1 = pd.DataFrame({
    'Col A': [1, 2, np.nan],
    'Col B': [4, np.nan, 5],
    'Col C': [np.nan, 6, 7]
}, index = ['igl', 'zod', 'op'])

In [17]:
df1

Unnamed: 0,Col A,Col B,Col C
igl,1.0,4.0,
zod,2.0,,6.0
op,,5.0,7.0


In [18]:
df1.loc['igl'][pd.notnull(df1.loc['igl'])]

Col A    1.0
Col B    4.0
Name: igl, dtype: float64

In [19]:
df1['Col C'][pd.notnull(df1['Col C'])]

zod    6.0
op     7.0
Name: Col C, dtype: float64

In [20]:
# Alternative way for the same thing as above
df1['Col C'][df1['Col C'].notnull()]

zod    6.0
op     7.0
Name: Col C, dtype: float64

In [21]:
pd.Series([1, 1, 2, 3, 5, np.nan]).count()

5

In [22]:
pd.Series([1, 1, 2, 3, 5, np.nan]).sum()

12.0

In [23]:
pd.Series([1, 1, 2, 3, 5, np.nan]).mean()

2.4

In [24]:
pd.isnull(pd.Series([1, 2, 3, np.nan, np.nan]))

0    False
1    False
2    False
3     True
4     True
dtype: bool

In [25]:
pd.isnull(pd.Series([1, 2, 3, np.nan, np.nan])).sum()

2

In [26]:
pd.notnull(pd.Series([1, 2, 3, np.nan, np.nan])).sum()

3

In [27]:
s = pd.Series([1, 2, np.nan, np.nan, 5, 6])
s

0    1.0
1    2.0
2    NaN
3    NaN
4    5.0
5    6.0
dtype: float64

'dropna' method excludes the NaN values

In [28]:
s.dropna()

0    1.0
1    2.0
4    5.0
5    6.0
dtype: float64

In [29]:
df2 = pd.DataFrame({
    'Column A': [1, np.nan, 30, np.nan],
    'Column B': [2, 8, 31, np.nan],
    'Column C': [np.nan, 9, 32, 100],
    'Column D': [5, 8, 34, 110],
})

In [30]:
df2.isnull()

Unnamed: 0,Column A,Column B,Column C,Column D
0,False,False,True,False
1,True,False,False,False
2,False,False,False,False
3,True,True,False,False


In [31]:
df2[df2.notnull()]

Unnamed: 0,Column A,Column B,Column C,Column D
0,1.0,2.0,,5
1,,8.0,9.0,8
2,30.0,31.0,32.0,34
3,,,100.0,110


In [32]:
# This returns a new dataframe if all the values in the row are valid
df2.dropna()

Unnamed: 0,Column A,Column B,Column C,Column D
2,30.0,31.0,32.0,34


In [33]:
df2.dropna(how='any') # this is the default behaviour

Unnamed: 0,Column A,Column B,Column C,Column D
2,30.0,31.0,32.0,34


In [36]:
# If any 3 values are valid then it returns them
df2.dropna(thresh=3)

Unnamed: 0,Column A,Column B,Column C,Column D
0,1.0,2.0,,5
1,,8.0,9.0,8
2,30.0,31.0,32.0,34


In [38]:
# this fills missing values with 0
df2.fillna(0)

Unnamed: 0,Column A,Column B,Column C,Column D
0,1.0,2.0,0.0,5
1,0.0,8.0,9.0,8
2,30.0,31.0,32.0,34
3,0.0,0.0,100.0,110


In [40]:
df2.fillna(df2.mean())

Unnamed: 0,Column A,Column B,Column C,Column D
0,1.0,2.0,47.0,5
1,15.5,8.0,9.0,8
2,30.0,31.0,32.0,34
3,15.5,13.666667,100.0,110


In [51]:
df2

Unnamed: 0,Column A,Column B,Column C,Column D
0,1.0,2.0,,5
1,,8.0,9.0,8
2,30.0,31.0,32.0,34
3,,,100.0,110


In [56]:
df2.fillna({'Column A': 0,'Column B': 1,'Column C': 2,'Column D': df2['Column D'].mean()})

Unnamed: 0,Column A,Column B,Column C,Column D
0,1.0,2.0,2.0,5
1,0.0,8.0,9.0,8
2,30.0,31.0,32.0,34
3,0.0,1.0,100.0,110


In [50]:
`df2.fillna(method='ffill', axis=1)

Unnamed: 0,Column A,Column B,Column C,Column D
0,1.0,2.0,2.0,5.0
1,,8.0,9.0,8.0
2,30.0,31.0,32.0,34.0
3,,,100.0,110.0


In [41]:
s = pd.Series([1, 2, 3, np.nan, np.nan, 5], dtype=float)

In [42]:
s

0    1.0
1    2.0
2    3.0
3    NaN
4    NaN
5    5.0
dtype: float64

In [43]:
s.fillna(method='ffill')

0    1.0
1    2.0
2    3.0
3    3.0
4    3.0
5    5.0
dtype: float64

In [45]:
s.fillna(method='bfill')

0    1.0
1    2.0
2    3.0
3    5.0
4    5.0
5    5.0
dtype: float64