## Read csv file

In [1]:
import pandas as pd
import numpy as np
import argparse
import os

In [2]:
def read_matrix(file_path: str) -> np.ndarray:
    """
    Reads a CSV file into a NumPy array, treating 'nan' as actual NaN values.

    Args:
        file_path (str): The path to the input CSV file.

    Returns:
        np.ndarray: A 2D NumPy array representing the matrix, with 'nan' converted to np.nan.

    Raises:
        FileNotFoundError: If the specified file does not exist.
        ValueError: If the CSV cannot be parsed into a numeric matrix.
    """
    if not os.path.exists(file_path):
        raise FileNotFoundError(f"Input file not found: {file_path}")
    try:
        # Read CSV, specifying 'nan' as a missing value indicator.
        # dtype=float ensures that all parsed values are numeric.
        df = pd.read_csv(file_path, header=None, na_values=['nan'], dtype=float)
        return df.to_numpy()
    except Exception as e:
        raise ValueError(f"Error reading or parsing CSV file {file_path}: {e}")

In [6]:
data_matrix=read_matrix(file_path="../example_data/input_test_data.csv")
print(data_matrix)

[[37.454012 95.071431 73.199394 59.865848       nan]
 [15.599452  5.808361 86.617615 60.111501 70.807258]
 [ 2.058449 96.990985       nan 21.233911 18.182497]
 [      nan 30.424224 52.475643 43.194502 29.122914]
 [61.185289 13.949386 29.214465       nan 45.606998]]


# Interpolation

In [8]:
rows, cols = data_matrix.shape
print(f"row:{rows}\ncols:{cols}")

row:5
cols:5


In [9]:
def _get_non_diagonal_neighbors(matrix: np.ndarray, r: int, c: int) -> list:
    """
    Collects the non-diagonal (up, down, left, right) neighbors of a cell
    in a 2D matrix.

    Args:
        matrix (np.ndarray): The 2D NumPy array.
        r (int): The row index of the current cell.
        c (int): The column index of the current cell.

    Returns:
        list: A list of values of the non-diagonal neighbors.
    """
    neighbors = []
    rows, cols = matrix.shape

    # Check up
    if r > 0:
        neighbors.append(matrix[r - 1, c])
    # Check down
    if r < rows - 1:
        neighbors.append(matrix[r + 1, c])
    # Check left
    if c > 0:
        neighbors.append(matrix[r, c - 1])
    # Check right
    if c < cols - 1:
        neighbors.append(matrix[r, c + 1])

    return neighbors

In [10]:
_get_non_diagonal_neighbors(data_matrix, 2, 2)

[np.float64(86.617615),
 np.float64(52.475643),
 np.float64(96.990985),
 np.float64(21.233911)]

# interpolator
Note of potential problems:
1. All NA matrix
2. some NA's neighbors are also NA, how to deal with this?

In [11]:
def interpolate_matrix(matrix: np.ndarray) -> np.ndarray:
    """
    Interpolates missing values (np.nan) in a 2D NumPy matrix.
    Missing values are replaced by the average of their non-diagonal neighbors.
    If a missing value has no valid non-diagonal neighbors, it is replaced
    by the mean of all non-missing values in the entire matrix. If the entire
    matrix is missing, all values are set to 0.0.

    Args:
        matrix (np.ndarray): The input 2D NumPy array, possibly containing np.nan.

    Returns:
        np.ndarray: A new 2D NumPy array with all missing values interpolated.
    """
    # Create a copy to avoid modifying the original matrix
    interpolated_matrix = np.copy(matrix)
    rows, cols = interpolated_matrix.shape

    # Calculate the mean of all non-missing values in the matrix.
    # This will be used as a fallback for isolated NaNs.
    # If the entire matrix is NaN, this will be NaN, so we handle that case.
    global_mean = np.nanmean(interpolated_matrix)
    if np.isnan(global_mean):
        global_mean = 0.0 # Fallback if the entire matrix is NaN

    # Iterate through each cell in the matrix
    for r in range(rows):
        for c in range(cols):
            if np.isnan(interpolated_matrix[r, c]):
                # Collect non-diagonal neighbors using the new helper function
                neighbors = _get_non_diagonal_neighbors(matrix, r, c)
                
                # Filter out NaN values from neighbors
                valid_neighbors = [n for n in neighbors if not np.isnan(n)]

                if valid_neighbors:
                    # Calculate the average of valid non-diagonal neighbors
                    interpolated_matrix[r, c] = np.mean(valid_neighbors)
                else:
                    # If no valid non-diagonal neighbors, use the global mean
                    interpolated_matrix[r, c] = global_mean
    return interpolated_matrix

In [12]:
interpolate_matrix(data_matrix)

array([[37.454012 , 95.071431 , 73.199394 , 59.865848 , 65.336553 ],
       [15.599452 ,  5.808361 , 86.617615 , 60.111501 , 70.807258 ],
       [ 2.058449 , 96.990985 , 64.3295385, 21.233911 , 18.182497 ],
       [31.222654 , 30.424224 , 52.475643 , 43.194502 , 29.122914 ],
       [61.185289 , 13.949386 , 29.214465 , 39.338655 , 45.606998 ]])