In [1]:
import numpy as np
from source.missing_imputation import MeanValueImputation

imputer = MeanValueImputation()
rng = np.random.default_rng()

y = rng.normal(size=200)
X = rng.normal(size=(200, 20))
num_missing = rng.binomial(200, 0.03)
mask = rng.choice(200, num_missing, replace=False)
y[mask] = np.nan

y_imputed = imputer.impute_missing(X, y)
_, cov = imputer.compute_covariance(X, y, 1.0)

def make_imputer(X, y):
    # shape (n, n - num_missing)
    nan_mask = np.isnan(y)
    num_missing = np.count_nonzero(nan_mask)
    n = len(y)
    imputer = np.zeros((n, n - num_missing))
    imputer[nan_mask] = 1 / (n - num_missing)
    imputer[~nan_mask, :] = np.eye(n - num_missing)
    return imputer

L = make_imputer(X, y)
print(np.allclose(L @ y[~np.isnan(y)], y_imputed))
print(np.allclose(L @ L.T, cov))

True
True


In [113]:
import numpy as np
from source.missing_imputation import EuclideanImputation

imputer = EuclideanImputation()
rng = np.random.default_rng()

y = rng.normal(size=200)
X = rng.normal(size=(200, 20))
num_missing = rng.binomial(200, 0.03)
mask = rng.choice(200, num_missing, replace=False)
y[mask] = np.nan

y_imputed = imputer.impute_missing(X, y)
_, cov = imputer.compute_covariance(X, y, 1.0)

def make_imputer(X, y):
    # shape (n, n - num_missing)
    nan_mask = np.isnan(y)
    num_missing = np.count_nonzero(nan_mask)
    n = len(y)
    imputer = np.zeros((n, n - num_missing))
    imputer[~nan_mask, :] = np.eye(n - num_missing)

    missing_index = np.where(nan_mask)[0]
    for index in missing_index:
        # euclidean distance
        X_euclidean = np.sqrt(np.sum((X[~nan_mask, :] - X[index]) ** 2, axis=1)) # shape (n - num_missing, )
        idx = np.argmin(X_euclidean)
        imputer[index, idx] = 1.0
    return imputer

L = make_imputer(X, y)
assert np.allclose(L @ y[~np.isnan(y)], y_imputed), 'wao'
assert np.allclose(L @ L.T, cov), np.sum(np.abs((L @ L.T - cov)))

In [120]:
import numpy as np
from source.missing_imputation import ManhattanImputation

imputer = ManhattanImputation()
rng = np.random.default_rng()

y = rng.normal(size=200)
X = rng.normal(size=(200, 20))
num_missing = rng.binomial(200, 0.03)
mask = rng.choice(200, num_missing, replace=False)
y[mask] = np.nan

y_imputed = imputer.impute_missing(X, y)
_, cov = imputer.compute_covariance(X, y, 1.0)

def make_imputer(X, y):
    # shape (n, n - num_missing)
    nan_mask = np.isnan(y)
    num_missing = np.count_nonzero(nan_mask)
    n = len(y)
    imputer = np.zeros((n, n - num_missing))
    imputer[~nan_mask, :] = np.eye(n - num_missing)

    missing_index = np.where(nan_mask)[0]
    for index in missing_index:
        # euclidean distance
        X_manhattan = np.sum(np.abs(X[~nan_mask] - X[index]), axis=1) # shape (n - num_missing, )
        idx = np.argmin(X_manhattan)
        imputer[index, idx] = 1.0
    return imputer

L = make_imputer(X, y)
assert np.allclose(L @ y[~np.isnan(y)], y_imputed), 'wao'
assert np.allclose(L @ L.T, cov), np.sum(np.abs((L @ L.T - cov)))

In [33]:
import numpy as np
from source.missing_imputation import ChebyshevImputation

imputer = ChebyshevImputation()
rng = np.random.default_rng()

y = rng.normal(size=200)
X = rng.normal(size=(200, 20))
num_missing = rng.binomial(200, 0.03)
mask = rng.choice(200, num_missing, replace=False)
y[mask] = np.nan

y_imputed = imputer.impute_missing(X, y)
_, cov = imputer.compute_covariance(X, y, 1.0)

def make_imputer(X, y):
    # shape (n, n - num_missing)
    nan_mask = np.isnan(y)
    num_missing = np.count_nonzero(nan_mask)
    n = len(y)
    imputer = np.zeros((n, n - num_missing))
    imputer[~nan_mask, :] = np.eye(n - num_missing)

    missing_index = np.where(nan_mask)[0]
    for index in missing_index:
        # euclidean distance
        X_chebyshev = np.max(np.abs(X[~nan_mask] - X[index]), axis=1) # shape (n - num_missing, )
        idx = np.argmin(X_chebyshev)
        imputer[index, idx] = 1.0
    return imputer

L = make_imputer(X, y)
assert np.allclose(L @ y[~np.isnan(y)], y_imputed), 'wao'
assert np.allclose(L @ L.T, cov), np.sum(np.abs((L @ L.T - cov)))

AssertionError: 2.0

In [9]:
import numpy as np
from source.missing_imputation import DefiniteRegressionImputation

imputer = DefiniteRegressionImputation()
rng = np.random.default_rng()

y = rng.normal(size=200)
X = rng.normal(size=(200, 20))
num_missing = rng.binomial(200, 0.03)
mask = rng.choice(200, num_missing, replace=False)
y[mask] = np.nan

y_imputed = imputer.impute_missing(X, y)
_, cov = imputer.compute_covariance(X, y, 1.0)

def make_imputer(X, y):
    # shape (n, n - num_missing)
    nan_mask = np.isnan(y)
    num_missing = np.count_nonzero(nan_mask)
    n = len(y)
    imputer = np.zeros((n, n - num_missing))
    imputer[~nan_mask, :] = np.eye(n - num_missing)

    missing_index = np.where(np.isnan(y))[0]
    beta_hat_front = np.linalg.inv(X[~nan_mask, :].T @ X[~nan_mask, :]) @ X[~nan_mask, :].T
    # for index in missing_index:
        # imputer[index, :] = X[index] @ beta_hat_front
    imputer[nan_mask, :] = X[nan_mask, :] @ beta_hat_front
    return imputer

L = make_imputer(X, y)
assert np.allclose(L @ y[~np.isnan(y)], y_imputed), 'wao'
assert np.allclose(L @ L.T, cov), np.sum(np.abs((L @ L.T - cov)))