In [1]:
import numpy as np
import pandas as pd

In [5]:
df = pd.DataFrame({
    'Column A' : [1,np.nan,30,np.nan],
    'Column B' : [2,8,31,np.nan],
    'Column C' : [np.nan,9,32,100],
    'Column D' : [5,8,34,110]
})

In [6]:
df

Unnamed: 0,Column A,Column B,Column C,Column D
0,1.0,2.0,,5
1,,8.0,9.0,8
2,30.0,31.0,32.0,34
3,,,100.0,110


In [7]:
df.info()              #show all of information about our dataframe

<class 'pandas.core.frame.DataFrame'>
RangeIndex: 4 entries, 0 to 3
Data columns (total 4 columns):
 #   Column    Non-Null Count  Dtype  
---  ------    --------------  -----  
 0   Column A  2 non-null      float64
 1   Column B  3 non-null      float64
 2   Column C  3 non-null      float64
 3   Column D  4 non-null      int64  
dtypes: float64(3), int64(1)
memory usage: 256.0 bytes


In [8]:
df.isnull().sum()    

Column A    2
Column B    1
Column C    1
Column D    0
dtype: int64

In [9]:
df.dropna()  # by default dropna will delete the row with invalid value

Unnamed: 0,Column A,Column B,Column C,Column D
2,30.0,31.0,32.0,34


In [10]:
df.dropna(axis=1) # custom to delete invalid value from columns

Unnamed: 0,Column D
0,5
1,8
2,34
3,110


In [11]:
df.dropna(how='any')    # delete everything invalid value

Unnamed: 0,Column A,Column B,Column C,Column D
2,30.0,31.0,32.0,34


In [24]:
df.dropna(thresh=3) # for valid value at least 3

Unnamed: 0,Column A,Column B,Column C,Column D
0,1.0,2.0,,5
1,,8.0,9.0,8
2,30.0,31.0,32.0,34


In [22]:
df.dropna(thresh=4, axis=1) # for deleting colum if there are no exist at least 4 valid value

Unnamed: 0,Column D
0,5
1,8
2,34
3,110


In [27]:
df.dropna(thresh=4, axis='columns')   # for deleting colum if there are no exist at least 4 valid value

Unnamed: 0,Column D
0,5
1,8
2,34
3,110


Filling Null Value

In [28]:
df2 = pd.DataFrame({
    'Column A' : [1,np.nan,30],
    'Column B' : [2,np.nan,31],
    'Column C' : [np.nan, np.nan, 100]
})

df2

Unnamed: 0,Column A,Column B,Column C
0,1.0,2.0,
1,,,
2,30.0,31.0,100.0


In [32]:
s = pd.Series([1.0, 2.0, 3.0, np.nan, np.nan, 4.0])

In [33]:
s

0    1.0
1    2.0
2    3.0
3    NaN
4    NaN
5    4.0
dtype: float64

In [34]:
s.fillna(0)    #to fill the null value with 0

0    1.0
1    2.0
2    3.0
3    0.0
4    0.0
5    4.0
dtype: float64

In [35]:
s.fillna(s.mean())   # we also can fill with mean value etc..

0    1.0
1    2.0
2    3.0
3    2.5
4    2.5
5    4.0
dtype: float64

In [38]:
s.fillna(method='ffill') # will filled by value before invalid value and start count from the first index

0    1.0
1    2.0
2    3.0
3    3.0
4    3.0
5    4.0
dtype: float64

In [39]:
s.fillna(method='bfill') # reverse of ffill

0    1.0
1    2.0
2    3.0
3    4.0
4    4.0
5    4.0
dtype: float64

In [41]:
pd.Series([np.nan,3,np.nan,9]).fillna(method='ffill')   # still have null value because the 1st row is a null value 

0    NaN
1    3.0
2    3.0
3    9.0
dtype: float64

In [42]:
pd.Series([1,np.nan,3,np.nan]).fillna(method='bfill') # still have null value because the last row of the series is a null value 

0    1.0
1    3.0
2    3.0
3    NaN
dtype: float64

Filling Null value on DataFrame

In [43]:
df

Unnamed: 0,Column A,Column B,Column C,Column D
0,1.0,2.0,,5
1,,8.0,9.0,8
2,30.0,31.0,32.0,34
3,,,100.0,110


In [44]:
df.fillna({'Column A': 0, 'Column B' : 99, 'Column C' : df['Column C'].mean()})

Unnamed: 0,Column A,Column B,Column C,Column D
0,1.0,2.0,47.0,5
1,0.0,8.0,9.0,8
2,30.0,31.0,32.0,34
3,0.0,99.0,100.0,110


In [45]:
df.fillna(method='ffill', axis=0)  # with axis 0 its gonna be column

Unnamed: 0,Column A,Column B,Column C,Column D
0,1.0,2.0,,5
1,1.0,8.0,9.0,8
2,30.0,31.0,32.0,34
3,30.0,31.0,100.0,110


In [48]:
df.fillna(method='bfill', axis=1)  # with axis 1 its gonna be row

Unnamed: 0,Column A,Column B,Column C,Column D
0,1.0,2.0,5.0,5.0
1,8.0,8.0,9.0,8.0
2,30.0,31.0,32.0,34.0
3,100.0,100.0,100.0,110.0


Checking if there are Null values

In [56]:
s.dropna().count()

4

In [53]:
missing_value = len(s.dropna()) != len(s)
missing_value

True

In [54]:
len(s)   #show lenght of the Series

6

In [59]:
s.count()   # it will show lenght of valid value

4

In [60]:
missing_value = s.count() != len(s)    #so we can change the condition to be like this
missing_value

True

In [61]:
pd.Series([True, False, False]).any()   # check if there are any True values in the series

True

In [62]:
pd.Series([True, False, False]).all()  # check if there are all True values in the series

False

In [63]:
pd.Series([True, True, True]).all()

True