In [10]:
import pandas as pd
import numpy as np

#### ***Generating Random DataFrame***

In [11]:
data = np.random.rand(10, 3)
for col in range(data.shape[1]):
    null_indices = np.random.choice(10, 3, replace=False)
    data[null_indices, col] = np.nan
df = pd.DataFrame(data, columns=['Column_1', 'Column_2', 'Column_3'])

In [12]:
df

Unnamed: 0,Column_1,Column_2,Column_3
0,0.148614,0.985308,
1,0.608841,,0.393251
2,0.454252,0.212653,0.176867
3,0.717629,,0.950183
4,0.398804,0.126133,0.599553
5,0.776876,0.347494,
6,,0.857731,0.429517
7,0.762975,0.346343,
8,,0.838406,0.474715
9,,,0.372445


***checking with pd.isna()***

In [13]:
pd.isna(df['Column_1'])

0    False
1    False
2    False
3    False
4    False
5    False
6     True
7    False
8     True
9     True
Name: Column_1, dtype: bool

In [14]:
pd.isna(df)

Unnamed: 0,Column_1,Column_2,Column_3
0,False,False,True
1,False,True,False
2,False,False,False
3,False,True,False
4,False,False,False
5,False,False,True
6,True,False,False
7,False,False,True
8,True,False,False
9,True,True,False


***Checking with pd.notna()***

In [15]:
pd.notna(df['Column_1'])

0     True
1     True
2     True
3     True
4     True
5     True
6    False
7     True
8    False
9    False
Name: Column_1, dtype: bool

In [16]:
pd.notna(df)

Unnamed: 0,Column_1,Column_2,Column_3
0,True,True,False
1,True,False,True
2,True,True,True
3,True,False,True
4,True,True,True
5,True,True,False
6,False,True,True
7,True,True,False
8,False,True,True
9,False,False,True


***checking with isnull().sum()***

In [17]:
df.isnull().sum()

Column_1    3
Column_2    3
Column_3    3
dtype: int64

***checking with notnull().sum()***

In [18]:
df.notnull().sum()

Column_1    7
Column_2    7
Column_3    7
dtype: int64

***Checking With info()***

In [19]:
df.info()

<class 'pandas.core.frame.DataFrame'>
RangeIndex: 10 entries, 0 to 9
Data columns (total 3 columns):
 #   Column    Non-Null Count  Dtype  
---  ------    --------------  -----  
 0   Column_1  7 non-null      float64
 1   Column_2  7 non-null      float64
 2   Column_3  7 non-null      float64
dtypes: float64(3)
memory usage: 372.0 bytes


#### ***Handling NaN Values***

In [22]:
df1 = df.copy()
df1.head()

Unnamed: 0,Column_1,Column_2,Column_3
0,0.148614,0.985308,
1,0.608841,,0.393251
2,0.454252,0.212653,0.176867
3,0.717629,,0.950183
4,0.398804,0.126133,0.599553


In [24]:
df1.fillna(
{
    'Column_1': 0,
    'Column_2' : 1,
    'Column_3': 2
})

Unnamed: 0,Column_1,Column_2,Column_3
0,0.148614,0.985308,2.0
1,0.608841,1.0,0.393251
2,0.454252,0.212653,0.176867
3,0.717629,1.0,0.950183
4,0.398804,0.126133,0.599553
5,0.776876,0.347494,2.0
6,0.0,0.857731,0.429517
7,0.762975,0.346343,2.0
8,0.0,0.838406,0.474715
9,0.0,1.0,0.372445


In [26]:
df.dropna(how='any')

Unnamed: 0,Column_1,Column_2,Column_3
2,0.454252,0.212653,0.176867
4,0.398804,0.126133,0.599553


In [27]:
df.dropna(how='all')

Unnamed: 0,Column_1,Column_2,Column_3
0,0.148614,0.985308,
1,0.608841,,0.393251
2,0.454252,0.212653,0.176867
3,0.717629,,0.950183
4,0.398804,0.126133,0.599553
5,0.776876,0.347494,
6,,0.857731,0.429517
7,0.762975,0.346343,
8,,0.838406,0.474715
9,,,0.372445


#### ***Filling NaN (missing) values with central tendency***

In [28]:
df

Unnamed: 0,Column_1,Column_2,Column_3
0,0.148614,0.985308,
1,0.608841,,0.393251
2,0.454252,0.212653,0.176867
3,0.717629,,0.950183
4,0.398804,0.126133,0.599553
5,0.776876,0.347494,
6,,0.857731,0.429517
7,0.762975,0.346343,
8,,0.838406,0.474715
9,,,0.372445


In [29]:
df.mean()

Column_1    0.552570
Column_2    0.530581
Column_3    0.485219
dtype: float64

In [30]:
df.fillna(df.mean())

Unnamed: 0,Column_1,Column_2,Column_3
0,0.148614,0.985308,0.485219
1,0.608841,0.530581,0.393251
2,0.454252,0.212653,0.176867
3,0.717629,0.530581,0.950183
4,0.398804,0.126133,0.599553
5,0.776876,0.347494,0.485219
6,0.55257,0.857731,0.429517
7,0.762975,0.346343,0.485219
8,0.55257,0.838406,0.474715
9,0.55257,0.530581,0.372445


In [31]:
df.fillna(df.median()) # for outlier

Unnamed: 0,Column_1,Column_2,Column_3
0,0.148614,0.985308,0.429517
1,0.608841,0.347494,0.393251
2,0.454252,0.212653,0.176867
3,0.717629,0.347494,0.950183
4,0.398804,0.126133,0.599553
5,0.776876,0.347494,0.429517
6,0.608841,0.857731,0.429517
7,0.762975,0.346343,0.429517
8,0.608841,0.838406,0.474715
9,0.608841,0.347494,0.372445
