### Imputación medias no condicionadas

In [2]:
import numpy as np
from sklearn.impute import SimpleImputer

In [3]:
imp = SimpleImputer(missing_values = np.nan, strategy = 'mean')

In [4]:
imp.fit([[1,2], [np.nan, 3], [7, 6]])

In [5]:
x = [[np.nan, 2], [6, np.nan], [7, 8]]

In [6]:
imp.transform(x)

array([[4.        , 2.        ],
       [6.        , 3.66666667],
       [7.        , 8.        ]])

In [7]:
import scipy.sparse as sp

In [8]:
X = sp.csc_matrix([[1,2], [0, -1], [8, 4]])

In [10]:
imp_sparse = SimpleImputer(missing_values = -1, strategy = 'mean')

In [11]:
imp_sparse.fit(X)

In [12]:
X_test = sp.csc_matrix([[-1, 2], [6, -1], [7, 6]])

In [14]:
imp_sparse.transform(X_test).toarray()

array([[3., 2.],
       [6., 3.],
       [7., 6.]])

In [15]:
import pandas as pd

In [19]:
df = pd.DataFrame([
    ["a", "b"],
    [np.nan, "y"],
    ["a", np.nan],
    ["b", "y"]
], dtype = "category")

In [17]:
imp = SimpleImputer(strategy = "most_frequent")

In [21]:
imp.fit(df)

In [22]:
imp.transform(df)

array([['a', 'b'],
       ['a', 'y'],
       ['a', 'y'],
       ['b', 'y']], dtype=object)

### Imputación múltiple

In [25]:
from sklearn.experimental import enable_iterative_imputer
from sklearn.impute import IterativeImputer

In [26]:
imp = IterativeImputer(max_iter = 10, random_state = 0)

In [27]:
imp.fit([[1, 2], [3, 6], [4, 8], [np.nan, 3], [7, np.nan]])

In [28]:
X_test = [[np.nan, 2], [6, np.nan], [np.nan, 6]]
imp.transform(X_test)

array([[ 1.00007297,  2.        ],
       [ 6.        , 12.00002754],
       [ 2.99996145,  6.        ]])

### Imputación por vecinos cercanos

In [30]:
from sklearn.impute import KNNImputer

In [31]:
X = [[1, 2, np.nan], [3, 4, 3], [np.nan, 6, 5], [8, 8, 7]]

In [32]:
imp = KNNImputer(n_neighbors = 2, weights = 'uniform')

In [33]:
imp.fit(X)

In [34]:
imp.transform(X)

array([[1. , 2. , 4. ],
       [3. , 4. , 3. ],
       [5.5, 6. , 5. ],
       [8. , 8. , 7. ]])