In [22]:
import numpy as np
import pandas as pd

In [23]:
def minmax(x, y):
    #Calcula un rango mínimo y máximo.
    resul1 = x - y
    resul2 = x + y
    resultados = {'min': resul1, 'max': resul2}
    return resultados

In [24]:
def arreglo_variable_opt_py(f):
    #Imputa valores faltantes (NaN) de forma vectorizada.
    #Se espera que la entrada 'f' sea un objeto tipo Series de pandas.
    if not isinstance(f, pd.Series):
        try:
            f = pd.Series(f, dtype=float)
        except ValueError:
            return f

    nan_mask = f.isna()
    
    if not np.any(nan_mask):
        return f
        
    mean_val = f.mean(skipna=True)
    std_val = f.std(skipna=True)
    
    mn_sd = minmax(x=round(mean_val), y=round(std_val))
    
    num_nan = np.sum(nan_mask)
    aleatorios = np.random.randint(mn_sd['min'], mn_sd['max'] + 1, size=num_nan)

    f_imputado = f.copy()
    
    f_imputado[nan_mask] = aleatorios

    f_imputado[nan_mask & (f_imputado < 1)] = 1
    f_imputado[nan_mask & (f_imputado > mn_sd['max'])] = mn_sd['max']
    
    return f_imputado

In [25]:
def imputar_numericas_df(df):
    """
    Args:
        df (pd.DataFrame): El dataframe a procesar.

    Returns:
        pd.DataFrame: Un nuevo dataframe con las columnas numéricas imputadas.
    """
    df_imputado = df.copy()
    
    for column in df_imputado.columns:
        if pd.api.types.is_numeric_dtype(df_imputado[column]):
            #print(f"Procesando la columna numérica: '{column}'")
            df_imputado[column] = arreglo_variable_opt_py(df_imputado[column])
            
    return df_imputado

In [26]:
data = pd.read_csv('~/data.csv', sep=',')
df = pd.DataFrame(data)

In [27]:
df_imputado = imputar_numericas_df(df)

In [28]:
df_imputado.head()

Unnamed: 0,Pregnancies,Glucose,BloodPressure,SkinThickness,Insulin,BMI,DiabetesPedigreeFunction,Age,diabetic_type
0,6.0,148.0,72.0,35.0,163.0,33.6,0.627,50,diabetic
1,1.0,85.0,66.0,29.0,218.0,26.6,0.351,31,normal
2,8.0,183.0,64.0,36.0,66.0,23.3,0.672,32,diabetic
3,1.0,89.0,66.0,23.0,94.0,28.1,0.167,21,normal
4,7.0,137.0,40.0,35.0,168.0,43.1,2.288,33,diabetic
