![Task.png](attachment:Task.png)

In [1]:
import numpy as np

In [2]:
import math

1. Решение задачи для вложенных списков Python без использования NumPy

In [3]:
def replace_nan_to_means(X):
    height, width = len(X), len(X[0])
    T = [[0] * height for _ in range(width)]
    for line in range(height):
        for col in range(width):
            T[col][line] = X[line][col]
    for i in range(width):
        nonnan_line = []
        summ = 0
        for j in range(height):
            if not math.isnan(T[i][j]):
                nonnan_line.append(T[i][j])
                summ += T[i][j]
        nonnans = len(nonnan_line)
        aver = summ / nonnans
        for j in range(height):
            if not nonnans:
                T[i] = [0] * height
                break
            elif math.isnan(T[i][j]):
                T[i][j] = aver
    Y = [[0] * width for _ in range(height)]
    for line in range(width):
        for col in range(height):
            Y[col][line] = T[line][col]
    return Y


%timeit replace_nan_to_means([[1, 0, 1], [math.nan, 2, 5], [2, 0, math.nan], [3, 1, 3]])


34.2 µs ± 1.73 µs per loop (mean ± std. dev. of 7 runs, 10000 loops each)


2. Гибридное решение задачи с использованием цикла for и возможностей NumPy

In [4]:
def replace_nan_to_means(X):
    Y = X.copy()
    for line in Y.T:
        isnan_line = np.isnan(line)
        if np.all(isnan_line):
            line[isnan_line] = 0
            # Почему-то не работает: "line = 0"
        else:
            line[isnan_line] = np.nanmean(line)
    return Y


%timeit replace_nan_to_means(np.array([[1, 0, 1], [np.nan, 2, 5], [2, 0, np.nan], [3, 1, 3]]))


271 µs ± 4.03 µs per loop (mean ± std. dev. of 7 runs, 1000 loops each)


3. Первоначальное решение с использованием функции numpy.apply_along_axis()

In [5]:
def f(Z):
    isnan_column = np.isnan(Z)
    if np.all(isnan_column):
        Z = 0
    else:
        Z[isnan_column] = np.nanmean(Z)
    return Z


def replace_nan_to_means(X):
    Y = X.copy()
    return np.apply_along_axis(f, 0, Y)


%timeit replace_nan_to_means(np.array([[1, 0, 1], [np.nan, 2, 5], [2, 0, np.nan], [3, 1, 3]]))


391 µs ± 2.87 µs per loop (mean ± std. dev. of 7 runs, 1000 loops each)


4. Решение с использованием функции numpy.apply_along_axis() и конструкции try - except

In [6]:
import warnings
warnings.filterwarnings('error')


def f(Z):
    try:
        Z[np.isnan(Z)] = np.nanmean(Z)
    except Exception:
        Z = 0
    return Z


def replace_nan_to_means(X):
    Y = X.copy()
    return np.apply_along_axis(f, 0, Y)


%timeit replace_nan_to_means(np.array([[1, 0, 1], [np.nan, 2, 5], [2, 0, np.nan], [3, 1, 3]]))


367 µs ± 7.05 µs per loop (mean ± std. dev. of 7 runs, 1000 loops each)


5. Полностью векторизованное решение

In [7]:
def replace_nan_to_means(X):
    Y = X.copy()
    isnan_array = np.isnan(X)
    nanmean_array = np.nanmean(X, axis=0)
    nanmean_array[np.isnan(nanmean_array)] = 0
    Y[isnan_array] = (nanmean_array + np.zeros(X.shape))[isnan_array]
    return Y


%timeit replace_nan_to_means(np.array([[1, 0, 1], [np.nan, 2, 5], [2, 0, np.nan], [3, 1, 3]]))


121 µs ± 2.98 µs per loop (mean ± std. dev. of 7 runs, 10000 loops each)
