### Encoding and Handling Missing Values

In [1]:
import pandas as pd

In [19]:
df=pd.DataFrame({'Fare':[56,34,89,90,23,45],
               'Embarked':['S','Q','T','S','Q','S'],
                'Gender':['male','female','female','male','female','female'],
                'Age':[23,34,36,25,43,None]})

In [20]:
df

Unnamed: 0,Fare,Embarked,Gender,Age
0,56,S,male,23.0
1,34,Q,female,34.0
2,89,T,female,36.0
3,90,S,male,25.0
4,23,Q,female,43.0
5,45,S,female,


In [21]:
from sklearn.preprocessing import OneHotEncoder
from sklearn.impute import SimpleImputer
from sklearn.compose import make_column_transformer

In [22]:
ohe=OneHotEncoder()
si=SimpleImputer()

In [23]:
ct=make_column_transformer((ohe,['Embarked','Gender']),
                           (si,['Age']),
                           remainder='passthrough')

In [24]:
ct.fit_transform(df)

array([[ 0. ,  1. ,  0. ,  0. ,  1. , 23. , 56. ],
       [ 1. ,  0. ,  0. ,  1. ,  0. , 34. , 34. ],
       [ 0. ,  0. ,  1. ,  1. ,  0. , 36. , 89. ],
       [ 0. ,  1. ,  0. ,  0. ,  1. , 25. , 90. ],
       [ 1. ,  0. ,  0. ,  1. ,  0. , 43. , 23. ],
       [ 0. ,  1. ,  0. ,  1. ,  0. , 32.2, 45. ]])

# Simple Imputer with Indicator

In [27]:
imputer=SimpleImputer(add_indicator=True)
imputer.fit_transform(df[['Age']])

array([[23. ,  0. ],
       [34. ,  0. ],
       [36. ,  0. ],
       [25. ,  0. ],
       [43. ,  0. ],
       [32.2,  1. ]])

# KNN Imputer

In [28]:
df

Unnamed: 0,Fare,Embarked,Gender,Age
0,56,S,male,23.0
1,34,Q,female,34.0
2,89,T,female,36.0
3,90,S,male,25.0
4,23,Q,female,43.0
5,45,S,female,


In [29]:
from sklearn.impute import KNNImputer

In [30]:
knn_imp=KNNImputer(n_neighbors=2)
knn_imp.fit_transform(df[['Fare','Age']])

array([[56. , 23. ],
       [34. , 34. ],
       [89. , 36. ],
       [90. , 25. ],
       [23. , 43. ],
       [45. , 28.5]])

# Iterative Imputer

In [31]:
from sklearn.experimental import enable_iterative_imputer

In [32]:
from sklearn.impute import IterativeImputer

In [33]:
iter_imp=IterativeImputer()
iter_imp.fit_transform(df[['Fare','Age']])

array([[56.        , 23.        ],
       [34.        , 34.        ],
       [89.        , 36.        ],
       [90.        , 25.        ],
       [23.        , 43.        ],
       [45.        , 32.74122657]])