In [2]:
import numpy as np
import matplotlib.pyplot as plt
import pandas as pd
import seaborn as sns
from sklearn.linear_model import SGDClassifier

## SimpleImputer

In [3]:
from sklearn.impute import SimpleImputer

In [4]:
X = np.array([
    [10,3],
    [0,4],
    [5,3],
    [np.nan , 3]
])

In [5]:
imputer = SimpleImputer(missing_values = np.nan,
             strategy = 'mean')
imputer.fit_transform(X)

array([[10.,  3.],
       [ 0.,  4.],
       [ 5.,  3.],
       [ 5.,  3.]])

In [6]:
X_test = np.array([[12,5],
                  [40,2],
                  [5,5],
                  [np.nan , np.nan]])
imputer.fit_transform(X_test)

array([[12.,  5.],
       [40.,  2.],
       [ 5.,  5.],
       [19.,  4.]])

## KNNImputer

In [7]:
from sklearn.impute import KNNImputer , MissingIndicator

In [8]:
imputer = KNNImputer(n_neighbors = 1)
imputer.fit_transform(X)

array([[10.,  3.],
       [ 0.,  4.],
       [ 5.,  3.],
       [10.,  3.]])

## MissingIndicator

In [9]:
MissingIndicator().fit_transform(X_test)

array([[False, False],
       [False, False],
       [False, False],
       [ True,  True]])

In [10]:
from sklearn.pipeline import make_union

In [11]:
pipeline = make_union(SimpleImputer(strategy = 'constant', fill_value = -99),MissingIndicator())

pipeline.fit_transform(X)

array([[ 10.,   3.,   0.],
       [  0.,   4.,   0.],
       [  5.,   3.,   0.],
       [-99.,   3.,   1.]])

## Application

In [15]:
from sklearn.pipeline import make_union
from sklearn.model_selection import GridSearchCV, train_test_split

In [16]:
import seaborn as sns

In [19]:
titanic = sns.load_dataset('titanic')
X = titanic[['pclass','age']]
y = titanic['survived']

X_train , X_test , y_train , y_test = train_test_split(X,y, test_size = 0.2, random_state = 5)

In [18]:
from sklearn.pipeline import make_pipeline

In [20]:
model = make_pipeline(KNNImputer(), SGDClassifier())

In [21]:
params = {
    'knnimputer__n_neighbors' : [1,2,3,4]
}

In [22]:
grid = GridSearchCV(model, param_grid = params , cv = 4)

In [23]:
grid.fit(X_train,y_train)

In [24]:
grid.best_params_

{'knnimputer__n_neighbors': 4}