# Imputers

In [1]:
import numpy as np
from sklearn.impute import KNNImputer
from sklearn.experimental import enable_iterative_imputer
from sklearn.impute import IterativeImputer
from sklearn.linear_model import BayesianRidge

### Dataset with missing values

* Dataset has 2 missing values (nan)

In [2]:
X = [[1, 2, np.nan], [3, 4, 3], [np.nan, 6, 5], [8, 8, 7]]
X

[[1, 2, nan], [3, 4, 3], [nan, 6, 5], [8, 8, 7]]

### KNN Imputer

* Uses the k-Nearest Neighbour algorithm to replace missing values
* First missing value is 4
* Second missing value is 5.5

In [3]:
imputer = KNNImputer(n_neighbors=2)
imputer.fit_transform(X)

array([[1. , 2. , 4. ],
       [3. , 4. , 3. ],
       [5.5, 6. , 5. ],
       [8. , 8. , 7. ]])

### Iterative Imputer

* Uses Bayesian Ridge algorithm to replace missing values
* First missing value is 1.2
* Second missing value is 5.5

In [5]:
# Recreate dataset with missing values
X = [[1, 2, np.nan], [3, 4, 3], [np.nan, 6, 5], [8, 8, 7]]

imputer = IterativeImputer(estimator=BayesianRidge())
imputed = imputer.fit_transform(X)
imputed

array([[1.        , 2.        , 1.24390155],
       [3.        , 4.        , 3.        ],
       [5.50000483, 6.        , 5.        ],
       [8.        , 8.        , 7.        ]])