In [None]:
import pandas as pd
import numpy as np

In [None]:
df = pd.DataFrame([[np.nan, 2, np.nan, 0,'Doctor'],
                   [3, 4, np.nan, 1,'Doctor'],
                   [np.nan, np.nan, np.nan, np.nan, np.nan],
                   [np.nan, 3, np.nan, 4, 'Nurse']],
                  columns=list("ABCDE"))

In [None]:
df.head()

Unnamed: 0,A,B,C,D,E
0,,2.0,,0.0,Doctor
1,3.0,4.0,,1.0,Doctor
2,,,,,
3,,3.0,,4.0,Nurse


In [None]:
df.info()

<class 'pandas.core.frame.DataFrame'>
RangeIndex: 4 entries, 0 to 3
Data columns (total 5 columns):
 #   Column  Non-Null Count  Dtype  
---  ------  --------------  -----  
 0   A       1 non-null      float64
 1   B       3 non-null      float64
 2   C       0 non-null      float64
 3   D       3 non-null      float64
 4   E       3 non-null      object 
dtypes: float64(4), object(1)
memory usage: 288.0+ bytes


In [None]:
df_clean = df.dropna(axis=1, how='all')

In [None]:
df_clean

Unnamed: 0,A,B,D,E
0,,2.0,0.0,Doctor
1,3.0,4.0,1.0,Doctor
2,,,,
3,,3.0,4.0,Nurse


#Mean Imputation

In [None]:
non_categorical_columns = df_clean.select_dtypes(exclude=['object']).columns

In [None]:
non_categorical_columns

Index(['A', 'B', 'D'], dtype='object')

In [None]:
df_mean = df_clean.copy()

In [None]:
df_mean

Unnamed: 0,A,B,D,E
0,,2.0,0.0,Doctor
1,3.0,4.0,1.0,Doctor
2,,,,
3,,3.0,4.0,Nurse


In [None]:
means = df_mean[non_categorical_columns].mean()

In [None]:
means

A    3.000000
B    3.000000
D    1.666667
dtype: float64

In [None]:
df_mean.fillna(means,inplace=True)

In [None]:
df_mean

Unnamed: 0,A,B,D,E
0,3.0,2.0,0.0,Doctor
1,3.0,4.0,1.0,Doctor
2,3.0,3.0,1.666667,
3,3.0,3.0,4.0,Nurse


In [None]:
from sklearn.impute import SimpleImputer

In [None]:
simp_imputer = SimpleImputer(strategy='mean')

In [None]:
df_mean_imp = df_clean.copy()

In [None]:
df_mean_imp

Unnamed: 0,A,B,D,E
0,,2.0,0.0,Doctor
1,3.0,4.0,1.0,Doctor
2,,,,
3,,3.0,4.0,Nurse


In [None]:
simp_imputer.fit(df_mean_imp[non_categorical_columns])

In [None]:
df_mean_imp[non_categorical_columns] = simp_imputer.transform(df_mean_imp[non_categorical_columns])

In [None]:
df_mean_imp

Unnamed: 0,A,B,D,E
0,3.0,2.0,0.0,Doctor
1,3.0,4.0,1.0,Doctor
2,3.0,3.0,1.666667,
3,3.0,3.0,4.0,Nurse


#Median Imputer

In [None]:
df_median = df_clean.copy()

In [None]:
df_median

Unnamed: 0,A,B,D,E
0,,2.0,0.0,Doctor
1,3.0,4.0,1.0,Doctor
2,,,,
3,,3.0,4.0,Nurse


In [None]:
medians = df_median[non_categorical_columns].median()

In [None]:
df_median.fillna(medians, inplace=True)

In [None]:
df_median

Unnamed: 0,A,B,D,E
0,3.0,2.0,0.0,Doctor
1,3.0,4.0,1.0,Doctor
2,3.0,3.0,1.0,
3,3.0,3.0,4.0,Nurse


In [None]:
df_median_imp = df_clean.copy()

In [None]:
median_imputer = SimpleImputer(strategy='median')

In [None]:
median_imputer.fit(df_median_imp[non_categorical_columns])

In [None]:
df_median_imp[non_categorical_columns] = median_imputer.transform(df_mean_imp[non_categorical_columns])

In [None]:
df_median_imp

Unnamed: 0,A,B,D,E
0,3.0,2.0,0.0,Doctor
1,3.0,4.0,1.0,Doctor
2,3.0,3.0,1.666667,
3,3.0,3.0,4.0,Nurse


#Mode Imputer

In [None]:
df_mode = df_clean.copy()

In [None]:
df_mode

Unnamed: 0,A,B,D,E
0,,2.0,0.0,Doctor
1,3.0,4.0,1.0,Doctor
2,,,,
3,,3.0,4.0,Nurse


In [None]:
categorical_column = df_mode.select_dtypes(include=['object']).columns

In [None]:
categorical_column

Index(['E'], dtype='object')

In [None]:
df_mode.fillna(df_mode[categorical_column].mode().iloc[0], inplace=True)

In [None]:
df_mode

Unnamed: 0,A,B,D,E
0,,2.0,0.0,Doctor
1,3.0,4.0,1.0,Doctor
2,,,,Doctor
3,,3.0,4.0,Nurse


In [None]:
df_mode_imp = df_clean.copy()

In [None]:
imputer = SimpleImputer(strategy='most_frequent')

In [None]:
imputer.fit(df_mode_imp[categorical_column])

In [None]:
df_mode_imp[categorical_column] = imputer.transform(df_mode_imp[categorical_column])

In [None]:
df_mode_imp

Unnamed: 0,A,B,D,E
0,,2.0,0.0,Doctor
1,3.0,4.0,1.0,Doctor
2,,,,Doctor
3,,3.0,4.0,Nurse


##Forward Fill

In [None]:
df_fill = df.copy()

In [None]:
df_fill

Unnamed: 0,A,B,C,D,E
0,,2.0,,0.0,Doctor
1,3.0,4.0,,1.0,Doctor
2,,,,,
3,,3.0,,4.0,Nurse


In [None]:
df_fill.fillna(method='ffill')

Unnamed: 0,A,B,C,D,E
0,,2.0,,0.0,Doctor
1,3.0,4.0,,1.0,Doctor
2,3.0,4.0,,1.0,Doctor
3,3.0,3.0,,4.0,Nurse


In [None]:
df_fill.fillna(method='bfill')

Unnamed: 0,A,B,C,D,E
0,3.0,2.0,,0.0,Doctor
1,3.0,4.0,,1.0,Doctor
2,,3.0,,4.0,Nurse
3,,3.0,,4.0,Nurse


In [None]:
df_fill.fillna(method='ffill').fillna(method='bfill')

Unnamed: 0,A,B,C,D,E
0,3.0,2.0,,0.0,Doctor
1,3.0,4.0,,1.0,Doctor
2,3.0,4.0,,1.0,Doctor
3,3.0,3.0,,4.0,Nurse
