In [21]:
import numpy as np
import pandas as pd

In [29]:
data = [[23,'Female'],[35,'Male'],[26,'Male'],[40,'Female'],[np.nan,'Male'],[29,'Female'],[45,np.nan],[39,'Male']]
df = pd.DataFrame(data, columns = ['Age','Gender'])
df

Unnamed: 0,Age,Gender
0,23.0,Female
1,35.0,Male
2,26.0,Male
3,40.0,Female
4,,Male
5,29.0,Female
6,45.0,
7,39.0,Male


In [32]:
df['Gender'] = df.Gender.map({'Female' : 0, 'Male' : 1})
df

Unnamed: 0,Age,Gender
0,23.0,0.0
1,35.0,1.0
2,26.0,1.0
3,40.0,0.0
4,,1.0
5,29.0,0.0
6,45.0,
7,39.0,1.0


In [5]:
df.isnull().sum()

Age       1
Gender    1
dtype: int64

# Dropping the Missing Values

In [6]:
df.dropna()

Unnamed: 0,Age,Gender
0,23.0,0.0
1,35.0,1.0
2,26.0,1.0
3,40.0,0.0
5,29.0,0.0
7,39.0,1.0


# Missing Value(NaN) Imputation

# Random Value

In [3]:
df.fillna(value = 0)

Unnamed: 0,Age,Gender
0,23.0,Female
1,35.0,Male
2,26.0,Male
3,40.0,Female
4,0.0,Male
5,29.0,Female
6,45.0,0
7,39.0,Male


# Forward Fill

In [15]:
df.fillna(method = 'ffill')

Unnamed: 0,Age,Gender
0,23.0,Female
1,35.0,Male
2,26.0,Male
3,40.0,Female
4,40.0,Male
5,29.0,Female
6,45.0,Female
7,39.0,Male


# Backward Fill

In [17]:
df.fillna(method = 'bfill')

Unnamed: 0,Age,Gender
0,23.0,Female
1,35.0,Male
2,26.0,Male
3,40.0,Female
4,29.0,Male
5,29.0,Female
6,45.0,Male
7,39.0,Male


# Mean and Median

In [30]:
df.Age.mean()

33.857142857142854

In [24]:
df['Age'] = df['Age'].fillna(df['Age'].mean())
df

Unnamed: 0,Age,Gender
0,23.0,0.0
1,35.0,1.0
2,26.0,1.0
3,40.0,0.0
4,33.857143,1.0
5,29.0,0.0
6,45.0,
7,39.0,1.0


In [31]:
df.Age.median()

35.0

In [10]:
df['Age'] = df['Age'].fillna(df['Age'].median())
df

Unnamed: 0,Age,Gender
0,23.0,Female
1,35.0,Male
2,26.0,Male
3,40.0,Female
4,35.0,Male
5,29.0,Female
6,45.0,
7,39.0,Male


# Mode - Most Frequent

In [33]:
df.Gender.mode()

0    1.0
dtype: float64

In [13]:
df = df.apply(lambda x:x.fillna(x.value_counts().index[0]))
df

Unnamed: 0,Age,Gender
0,23.0,0.0
1,35.0,1.0
2,26.0,1.0
3,40.0,0.0
4,39.0,1.0
5,29.0,0.0
6,45.0,1.0
7,39.0,1.0


# KNN Imputation

In [18]:
from sklearn.impute import KNNImputer

In [25]:
imputer = KNNImputer(n_neighbors = 2)
df_filled = imputer.fit_transform(df[['Age','Gender']])
New_df = pd.DataFrame(df_filled, columns = ['Age', 'Gender'])

In [28]:
New_df['Gender'] = New_df['Gender'].astype(int)
New_df

Unnamed: 0,Age,Gender
0,23.0,0
1,35.0,1
2,26.0,1
3,40.0,0
4,33.857143,1
5,29.0,0
6,45.0,0
7,39.0,1
