# Vergleich von Imputation Methoden

An dieser Stelle sollen verschiede Methoden zum interpolieren von fehlenden Werten betrachtet und verglichen werden.

### Vorbereitung

In [1]:
import pandas as pd
import numpy as np
import math
import time

from sklearn.preprocessing import StandardScaler
from sklearn.metrics import r2_score, mean_squared_error
from sklearn.experimental import enable_iterative_imputer
from sklearn.impute import IterativeImputer, KNNImputer

In [2]:
results = []

In [3]:
def reset_base():
    base= pd.read_csv('additional_data/base.csv') 
    base.set_index(['Country Name', 'Indicator Name'], inplace=True)
    base = base.sort_index(level=['Country Name', 'Indicator Name'])
    return base
base = reset_base()

def get_cords(frac, rnd_state):
    n = int(base.isna().sum().sum()*frac)
    print(f'Testdaten mit {frac*100}% fehlenden Werten (absolut: {n})')
    #random state to ensure reproducibility
    rnds = np.random.RandomState(rnd_state)

    #coordinates for data entries to be removed randomly
    #5000 entries are selected
    cords = pd.DataFrame([[rnds.randint(0, len(base), size=n*4)[i], 
                  rnds.randint(0, len(base.columns), size=n*4)[i]]
                  for i in range(n*4)])

    #all coordinates pointing to NaN entries are removed and
    #first 1000 remaining entries are selected
    cords['value'] = [base.iloc[cords[0][i], cords[1][i]] for i in cords.index]
    cords = cords.dropna()[:n].reset_index(drop=True)
    
    return cords

def reset_train(cords):
    train = reset_base()
    for i in cords.index:
        train.iloc[cords[0][i], cords[1][i]] = None
    return train

def evaluate(df, t, cords):
    
    
    #scaling original data and imputed data
    #necessary ?????????????????????????????????????
    train = reset_train(cords)
    scaler = StandardScaler().fit(train) #fitting on train?
    norm_base = pd.DataFrame(scaler.transform(base))
    df = pd.DataFrame(scaler.transform(df))

    #getting imputed values for simulated NaNs and true value 
    res =pd.DataFrame({'y_true': [norm_base.iloc[cords[0][i], cords[1][i]] for i in cords.index],
                       'y_pred': [df.iloc[cords[0][i], cords[1][i]] for i in cords.index]
                      })
    res = res.dropna()

   
    #calculate evaluation metrics
    r2 = r2_score(res['y_true'], res['y_pred'])
    rmse = math.sqrt(mean_squared_error(res['y_true'], res['y_pred']))
    still_missing = df.isna().sum().sum()
    
    print(f'Mit dieser Methode bleiben {still_missing} NaNs bestehen.')
    print('')
    print(f'{len(res)} Werte wurden für die Metriken verwendet.')
    print(f'r2: {r2}, rmse: {rmse}')
    
    return [r2, rmse, still_missing, t]



In [4]:
def impute_backfill(df):
    df = df.fillna(method='bfill', limit=3)
    return df

def impute_overall_means(df):
    #fill NaNs with overall mean of that indicator
    values = pd.DataFrame(df.stack()).groupby('Indicator Name')[0].mean()
    df = pd.DataFrame(df.stack(dropna=False))
    
    df[0] = df[0].fillna(df.groupby('Indicator Name')[0].transform('mean'))
    df = df.unstack()
    df.columns = df.columns.droplevel(0)
    df = df.sort_index(level=['Country Name', 'Indicator Name'])
        
    return df

def impute_yearly_means(df):
    #fill NaNs with overall mean of that indicator
    
    for i in df.columns:
        df[i] = df[i].fillna(df.groupby('Indicator Name')[i].transform('mean'))
            
    return df

def impute_yearly_means_per_region(df):
    country_data = pd.read_csv('../Data/WDICountry.csv')
    country_data = country_data.loc[:,['Table Name', 'Region']]
    df = pd.merge(df.reset_index(), country_data, how='left', left_on='Country Name', right_on='Table Name').drop('Table Name', axis=1)
    df = df.set_index(['Country Name', 'Indicator Name', 'Region'])

    for i in df.columns:
        df[i] = df[i].fillna(df.groupby(['Indicator Name', 'Region'])[i].transform('mean'))

    df = df.reset_index().set_index(['Country Name', 'Indicator Name']).drop('Region', axis=1)
    return df

def interpolate3(df):
    df = df.interpolate(limit=3)
    return df

def interpolate_all(df):
    df = df.interpolate()
    return df

def iterative_imputer_1(df):
    col = df.columns
    idx = df.index
    
    iter_imp = IterativeImputer(random_state=999)
    df= iter_imp.fit_transform(df)
    df= pd.DataFrame(df, columns=col, index=idx)
    return df

def iterative_imputer_2(df):
    df = df.unstack().T
    col = df.columns
    idx = df.index

    iter_imp = IterativeImputer(random_state=999)
    df= iter_imp.fit_transform(df)

    df = pd.DataFrame(df, columns=col, index=idx)
    df = df.unstack().T
    df = df.sort_index(level=['Country Name', 'Indicator Name'])
    
    return df

def iterative_imputer_3(df):

    df = df.reset_index()
    df = df.set_index(['Indicator Name', 'Country Name'])
    df = df.unstack().T

    col = df.columns
    idx = df.index

    iter_imp = IterativeImputer(random_state=999, verbose=True)
    df= iter_imp.fit_transform(df)

    df = pd.DataFrame(df, columns=col, index=idx)
    df = df.unstack().T
    df = df.reset_index()
    df = df.set_index(['Country Name', 'Indicator Name'])
    df = df.sort_index(level=['Country Name', 'Indicator Name'])
    
    return df

def mice_imputer(df):
    n_imputations =  12
    dfs = []
    col = df.columns
    idx = df.index
    
    for i in range(n_imputations): 
        print(f'Imputation round {i}')
        iter_imp = IterativeImputer(random_state=i, sample_posterior=True, verbose=2)
        df_temp = iter_imp.fit_transform(df)
        dfs.append(df_temp)
    
    df = np.mean(np.array(dfs), axis=0)
    df = pd.DataFrame(df, columns=col, index=idx)
    return df

def knn_imputer1(df):
    col = df.columns
    idx = df.index
    
    knn_imp = KNNImputer(n_neighbors=2)
    df= knn_imp.fit_transform(df)
    df = pd.DataFrame(df, columns=col, index=idx)
    return df

def knn_imputer2(df, n=4):
    
    df = df.reset_index()
    df = df.set_index(['Indicator Name', 'Country Name'])
    df = df.unstack().T

    col = df.columns
    idx = df.index

    knn_imp = KNNImputer(n_neighbors=n)
    df= knn_imp.fit_transform(df)
    df = pd.DataFrame(df, columns=col, index=idx)

    df = df.unstack().T
    df = df.reset_index()
    df = df.set_index(['Country Name', 'Indicator Name'])
    df = df.sort_index(level=['Country Name', 'Indicator Name'])
    
    return df

In [5]:


functions =  {'Backfill':impute_backfill, 'Overall Mean':impute_overall_means, 
              'Yearly Mean':impute_yearly_means, 'Yearly Mean per Region':impute_yearly_means_per_region, 
              'Interpolate 3':interpolate3, 'Interpolate all':interpolate_all, 
              'ICE 1':iterative_imputer_1, 'ICE 2':iterative_imputer_2, 
              'ICE3':iterative_imputer_3, 'MICE':mice_imputer, 
              'KNN 1':knn_imputer1, 'KNN 2':knn_imputer2
             }



In [6]:
def run_all(frac):
    
    cords_list =[]
    for i in np.arange(1,6):
        cords_list.append(get_cords(frac, i))
    
    results = {}
    detailed_results = {}    
    
    for name, func in functions.items():
        func_runs = []
        print(name)
        
        for cords in cords_list:
            
            t0 = time.time()
            df= func(reset_train(cords)) 
            t1 = time.time()

            t = t1-t0

            func_runs.append(evaluate(df, t, cords))
        
        print(func_runs)
        detailed_results[name] = func_runs
        results[name] = np.mean(np.array(func_runs), axis=0)
        
    return results, detailed_results
r1, r1_detail = run_all(0.05)

Testdaten mit 5.0% fehlenden Werten (absolut: 8515)
Testdaten mit 5.0% fehlenden Werten (absolut: 8515)
Testdaten mit 5.0% fehlenden Werten (absolut: 8515)
Testdaten mit 5.0% fehlenden Werten (absolut: 8515)
Testdaten mit 5.0% fehlenden Werten (absolut: 8515)
Backfill
Mit dieser Methode bleiben 32924 NaNs bestehen.

8195 Werte wurden für die Metriken verwendet.
r2: -0.2630386067986965, rmse: 3.1264580984032304
Mit dieser Methode bleiben 32825 NaNs bestehen.

8263 Werte wurden für die Metriken verwendet.
r2: -2.77533240911633, rmse: 1.1896157293266358
Mit dieser Methode bleiben 32824 NaNs bestehen.

8250 Werte wurden für die Metriken verwendet.
r2: -1.688280205196218, rmse: 1.0226797653748734
Mit dieser Methode bleiben 32841 NaNs bestehen.

8249 Werte wurden für die Metriken verwendet.
r2: -0.6672076075877178, rmse: 2.085863590967128
Mit dieser Methode bleiben 32828 NaNs bestehen.

8240 Werte wurden für die Metriken verwendet.
r2: -0.07720522781720884, rmse: 3.533882957257935
[[-0.26303



Mit dieser Methode bleiben 0 NaNs bestehen.

8515 Werte wurden für die Metriken verwendet.
r2: 0.9942158015767555, rmse: 0.20756410920051788




Mit dieser Methode bleiben 0 NaNs bestehen.

8515 Werte wurden für die Metriken verwendet.
r2: 0.7827329900210515, rmse: 0.28204132134606086




Mit dieser Methode bleiben 0 NaNs bestehen.

8515 Werte wurden für die Metriken verwendet.
r2: 0.9882365310034197, rmse: 0.06661977296323156




Mit dieser Methode bleiben 0 NaNs bestehen.

8515 Werte wurden für die Metriken verwendet.
r2: 0.9877198131046442, rmse: 0.17619933464945803




Mit dieser Methode bleiben 0 NaNs bestehen.

8515 Werte wurden für die Metriken verwendet.
r2: 0.9908166504615118, rmse: 0.3209799494672369
[[0.9942158015767555, 0.20756410920051788, 0, 10.540084838867188], [0.7827329900210515, 0.28204132134606086, 0, 10.650822162628174], [0.9882365310034197, 0.06661977296323156, 0, 10.39804458618164], [0.9877198131046442, 0.17619933464945803, 0, 10.379166841506958], [0.9908166504615118, 0.3209799494672369, 0, 10.227316617965698]]
ICE 2
Mit dieser Methode bleiben 0 NaNs bestehen.

8515 Werte wurden für die Metriken verwendet.
r2: 0.8970001206792948, rmse: 0.8758889078254123
Mit dieser Methode bleiben 0 NaNs bestehen.

8515 Werte wurden für die Metriken verwendet.
r2: 0.9664793452415512, rmse: 0.11078273770133765
Mit dieser Methode bleiben 0 NaNs bestehen.

8515 Werte wurden für die Metriken verwendet.
r2: 0.8501371275692521, rmse: 0.23778391237999355
Mit dieser Methode bleiben 0 NaNs bestehen.

8515 Werte wurden für die Metriken verwendet.
r2: 0.965367



Mit dieser Methode bleiben 0 NaNs bestehen.

8515 Werte wurden für die Metriken verwendet.
r2: 0.6608870903592907, rmse: 1.5892888267905176
[IterativeImputer] Completing matrix with shape (4898, 165)
[IterativeImputer] Change: 3.042906089520646e+16, scaled tolerance: 35084726045503.402 
[IterativeImputer] Change: 2954233767627873.0, scaled tolerance: 35084726045503.402 
[IterativeImputer] Change: 2643613149961181.0, scaled tolerance: 35084726045503.402 
[IterativeImputer] Change: 721728026903842.5, scaled tolerance: 35084726045503.402 
[IterativeImputer] Change: 419223926970822.3, scaled tolerance: 35084726045503.402 
[IterativeImputer] Change: 262976338864440.5, scaled tolerance: 35084726045503.402 
[IterativeImputer] Change: 87106028046302.67, scaled tolerance: 35084726045503.402 
[IterativeImputer] Change: 3554771316448614.0, scaled tolerance: 35084726045503.402 
[IterativeImputer] Change: 1582538600202182.0, scaled tolerance: 35084726045503.402 
[IterativeImputer] Change: 764600851



Mit dieser Methode bleiben 0 NaNs bestehen.

8515 Werte wurden für die Metriken verwendet.
r2: 0.8401332984275719, rmse: 0.24193285700689618
[IterativeImputer] Completing matrix with shape (4898, 165)
[IterativeImputer] Change: 3.0843604371072092e+16, scaled tolerance: 35084726045503.402 
[IterativeImputer] Change: 1974599435726865.5, scaled tolerance: 35084726045503.402 
[IterativeImputer] Change: 2803462445055742.0, scaled tolerance: 35084726045503.402 
[IterativeImputer] Change: 1560980079810333.5, scaled tolerance: 35084726045503.402 
[IterativeImputer] Change: 997587456483245.8, scaled tolerance: 35084726045503.402 
[IterativeImputer] Change: 431609673082643.9, scaled tolerance: 35084726045503.402 
[IterativeImputer] Change: 117967056857141.11, scaled tolerance: 35084726045503.402 
[IterativeImputer] Change: 451332183592829.25, scaled tolerance: 35084726045503.402 
[IterativeImputer] Change: 226829829940928.2, scaled tolerance: 35084726045503.402 
[IterativeImputer] Change: 141200



Mit dieser Methode bleiben 0 NaNs bestehen.

8515 Werte wurden für die Metriken verwendet.
r2: 0.44034797517710433, rmse: 0.4595094866419774
[IterativeImputer] Completing matrix with shape (4898, 165)
[IterativeImputer] Change: 4.074677531495463e+16, scaled tolerance: 24412569472460.902 
[IterativeImputer] Change: 1723613661294772.5, scaled tolerance: 24412569472460.902 
[IterativeImputer] Change: 2108405491430387.5, scaled tolerance: 24412569472460.902 
[IterativeImputer] Change: 1015271335528370.8, scaled tolerance: 24412569472460.902 
[IterativeImputer] Change: 374535689251175.2, scaled tolerance: 24412569472460.902 
[IterativeImputer] Change: 219558117741746.5, scaled tolerance: 24412569472460.902 
[IterativeImputer] Change: 166121633083397.12, scaled tolerance: 24412569472460.902 
[IterativeImputer] Change: 149020762532495.94, scaled tolerance: 24412569472460.902 
[IterativeImputer] Change: 134442347329520.14, scaled tolerance: 24412569472460.902 
[IterativeImputer] Change: 432421



Mit dieser Methode bleiben 0 NaNs bestehen.

8515 Werte wurden für die Metriken verwendet.
r2: 0.7923310971040671, rmse: 0.7245819483282079
[IterativeImputer] Completing matrix with shape (4898, 165)
[IterativeImputer] Change: 2.9978401663540936e+16, scaled tolerance: 35084726045503.402 
[IterativeImputer] Change: 3003568625165439.5, scaled tolerance: 35084726045503.402 
[IterativeImputer] Change: 2708235606532678.5, scaled tolerance: 35084726045503.402 
[IterativeImputer] Change: 1504758453866385.0, scaled tolerance: 35084726045503.402 
[IterativeImputer] Change: 711438870770673.6, scaled tolerance: 35084726045503.402 
[IterativeImputer] Change: 287321582420306.75, scaled tolerance: 35084726045503.402 
[IterativeImputer] Change: 3690713592073067.0, scaled tolerance: 35084726045503.402 
[IterativeImputer] Change: 1445641165776650.2, scaled tolerance: 35084726045503.402 
[IterativeImputer] Change: 560895517007246.0, scaled tolerance: 35084726045503.402 
[IterativeImputer] Change: 323296



Mit dieser Methode bleiben 0 NaNs bestehen.

8515 Werte wurden für die Metriken verwendet.
r2: 0.7486558176825678, rmse: 1.679236113665864
[[0.6608870903592907, 1.5892888267905176, 0, 73.32935333251953], [0.8401332984275719, 0.24193285700689618, 0, 72.47668313980103], [0.44034797517710433, 0.4595094866419774, 0, 71.95914244651794], [0.7923310971040671, 0.7245819483282079, 0, 89.02884793281555], [0.7486558176825678, 1.679236113665864, 0, 73.56477570533752]]
MICE
Imputation round 0
[IterativeImputer] Completing matrix with shape (26070, 31)
[IterativeImputer] Ending imputation round 1/10, elapsed time 14.20
[IterativeImputer] Ending imputation round 2/10, elapsed time 28.84
[IterativeImputer] Ending imputation round 3/10, elapsed time 43.19
[IterativeImputer] Ending imputation round 4/10, elapsed time 57.57
[IterativeImputer] Ending imputation round 5/10, elapsed time 72.02
[IterativeImputer] Ending imputation round 6/10, elapsed time 86.45
[IterativeImputer] Ending imputation round 7/10

[IterativeImputer] Ending imputation round 2/10, elapsed time 28.84
[IterativeImputer] Ending imputation round 3/10, elapsed time 43.19
[IterativeImputer] Ending imputation round 4/10, elapsed time 57.35
[IterativeImputer] Ending imputation round 5/10, elapsed time 71.61
[IterativeImputer] Ending imputation round 6/10, elapsed time 85.92
[IterativeImputer] Ending imputation round 7/10, elapsed time 100.28
[IterativeImputer] Ending imputation round 8/10, elapsed time 114.46
[IterativeImputer] Ending imputation round 9/10, elapsed time 128.77
[IterativeImputer] Ending imputation round 10/10, elapsed time 143.37
Imputation round 11
[IterativeImputer] Completing matrix with shape (26070, 31)
[IterativeImputer] Ending imputation round 1/10, elapsed time 14.20
[IterativeImputer] Ending imputation round 2/10, elapsed time 28.53
[IterativeImputer] Ending imputation round 3/10, elapsed time 42.68
[IterativeImputer] Ending imputation round 4/10, elapsed time 56.94
[IterativeImputer] Ending imput

[IterativeImputer] Ending imputation round 9/10, elapsed time 130.42
[IterativeImputer] Ending imputation round 10/10, elapsed time 144.89
Imputation round 9
[IterativeImputer] Completing matrix with shape (26070, 31)
[IterativeImputer] Ending imputation round 1/10, elapsed time 14.29
[IterativeImputer] Ending imputation round 2/10, elapsed time 28.63
[IterativeImputer] Ending imputation round 3/10, elapsed time 42.78
[IterativeImputer] Ending imputation round 4/10, elapsed time 56.98
[IterativeImputer] Ending imputation round 5/10, elapsed time 71.30
[IterativeImputer] Ending imputation round 6/10, elapsed time 85.39
[IterativeImputer] Ending imputation round 7/10, elapsed time 99.64
[IterativeImputer] Ending imputation round 8/10, elapsed time 113.94
[IterativeImputer] Ending imputation round 9/10, elapsed time 128.41
[IterativeImputer] Ending imputation round 10/10, elapsed time 142.63
Imputation round 10
[IterativeImputer] Completing matrix with shape (26070, 31)
[IterativeImputer]

[IterativeImputer] Ending imputation round 4/10, elapsed time 57.31
[IterativeImputer] Ending imputation round 5/10, elapsed time 71.68
[IterativeImputer] Ending imputation round 6/10, elapsed time 85.99
[IterativeImputer] Ending imputation round 7/10, elapsed time 100.36
[IterativeImputer] Ending imputation round 8/10, elapsed time 114.80
[IterativeImputer] Ending imputation round 9/10, elapsed time 129.22
[IterativeImputer] Ending imputation round 10/10, elapsed time 143.47
Imputation round 8
[IterativeImputer] Completing matrix with shape (26070, 31)
[IterativeImputer] Ending imputation round 1/10, elapsed time 14.26
[IterativeImputer] Ending imputation round 2/10, elapsed time 28.51
[IterativeImputer] Ending imputation round 3/10, elapsed time 43.04
[IterativeImputer] Ending imputation round 4/10, elapsed time 57.26
[IterativeImputer] Ending imputation round 5/10, elapsed time 71.49
[IterativeImputer] Ending imputation round 6/10, elapsed time 85.74
[IterativeImputer] Ending imputa

[IterativeImputer] Ending imputation round 1/10, elapsed time 14.18
[IterativeImputer] Ending imputation round 2/10, elapsed time 28.40
[IterativeImputer] Ending imputation round 3/10, elapsed time 42.66
[IterativeImputer] Ending imputation round 4/10, elapsed time 57.08
[IterativeImputer] Ending imputation round 5/10, elapsed time 71.25
[IterativeImputer] Ending imputation round 6/10, elapsed time 85.29
[IterativeImputer] Ending imputation round 7/10, elapsed time 99.67
[IterativeImputer] Ending imputation round 8/10, elapsed time 114.20
[IterativeImputer] Ending imputation round 9/10, elapsed time 128.51
[IterativeImputer] Ending imputation round 10/10, elapsed time 142.77
Imputation round 7
[IterativeImputer] Completing matrix with shape (26070, 31)
[IterativeImputer] Ending imputation round 1/10, elapsed time 14.23
[IterativeImputer] Ending imputation round 2/10, elapsed time 28.42
[IterativeImputer] Ending imputation round 3/10, elapsed time 42.58
[IterativeImputer] Ending imputat

[IterativeImputer] Ending imputation round 8/10, elapsed time 114.94
[IterativeImputer] Ending imputation round 9/10, elapsed time 129.37
[IterativeImputer] Ending imputation round 10/10, elapsed time 143.65
Imputation round 5
[IterativeImputer] Completing matrix with shape (26070, 31)
[IterativeImputer] Ending imputation round 1/10, elapsed time 14.43
[IterativeImputer] Ending imputation round 2/10, elapsed time 28.86
[IterativeImputer] Ending imputation round 3/10, elapsed time 43.21
[IterativeImputer] Ending imputation round 4/10, elapsed time 57.51
[IterativeImputer] Ending imputation round 5/10, elapsed time 71.93
[IterativeImputer] Ending imputation round 6/10, elapsed time 86.46
[IterativeImputer] Ending imputation round 7/10, elapsed time 100.72
[IterativeImputer] Ending imputation round 8/10, elapsed time 114.83
[IterativeImputer] Ending imputation round 9/10, elapsed time 129.20
[IterativeImputer] Ending imputation round 10/10, elapsed time 143.70
Imputation round 6
[Iterativ

In [7]:
r1

{'Backfill': array([-1.09421281e+00,  2.19170003e+00,  3.28484000e+04,  8.34356499e-01]),
 'Overall Mean': array([-0.25154093,  1.80593563,  0.        ,  1.15138745]),
 'Yearly Mean': array([9.96435636e-03, 1.76896922e+00, 5.22980000e+04, 9.21581650e-01]),
 'Yearly Mean per Region': array([1.63351398e-02, 1.75702625e+00, 5.72740000e+04, 1.05105948e+00]),
 'Interpolate 3': array([-5.57856653e-01,  1.99920261e+00,  3.28790000e+04,  9.16369581e-01]),
 'Interpolate all': array([-0.55785265,  1.96691879, 58.        ,  0.88165617]),
 'ICE 1': array([ 0.94874436,  0.2106809 ,  0.        , 10.43908701]),
 'ICE 2': array([ 0.92646911,  0.44874143,  0.        , 20.39443097]),
 'ICE3': array([ 0.69647106,  0.93890985,  0.        , 76.07176051]),
 'MICE': array([9.51523484e-01, 2.36147040e-01, 0.00000000e+00, 1.72219538e+03]),
 'KNN 1': array([  0.27282434,   1.45838447,   0.        , 123.8936542 ]),
 'KNN 2': array([ 0.31693826,  1.45232173,  0.        , 20.72366467])}

In [None]:
r2 , r2_detail =  run_all(0.075)

Testdaten mit 7.5% fehlenden Werten (absolut: 12773)
Testdaten mit 7.5% fehlenden Werten (absolut: 12773)
Testdaten mit 7.5% fehlenden Werten (absolut: 12773)
Testdaten mit 7.5% fehlenden Werten (absolut: 12773)
Testdaten mit 7.5% fehlenden Werten (absolut: 12773)
Backfill
Mit dieser Methode bleiben 33252 NaNs bestehen.

12384 Werte wurden für die Metriken verwendet.
r2: -0.7622360204251646, rmse: 1.9101575370444133
Mit dieser Methode bleiben 33280 NaNs bestehen.

12322 Werte wurden für die Metriken verwendet.
r2: -0.13326755926364164, rmse: 1.5732663625326984
Mit dieser Methode bleiben 33292 NaNs bestehen.

12334 Werte wurden für die Metriken verwendet.
r2: -0.15471382925960686, rmse: 2.34729184379117
Mit dieser Methode bleiben 33312 NaNs bestehen.

12333 Werte wurden für die Metriken verwendet.
r2: -0.38762500614250417, rmse: 2.1233606768359277
Mit dieser Methode bleiben 33191 NaNs bestehen.

12365 Werte wurden für die Metriken verwendet.
r2: -0.12638032635757868, rmse: 1.86746347988



Mit dieser Methode bleiben 0 NaNs bestehen.

12773 Werte wurden für die Metriken verwendet.
r2: 0.9803957089742923, rmse: 0.19838140357980827




Mit dieser Methode bleiben 0 NaNs bestehen.

12773 Werte wurden für die Metriken verwendet.
r2: 0.975549424471611, rmse: 0.22712199686442056




Mit dieser Methode bleiben 0 NaNs bestehen.

12773 Werte wurden für die Metriken verwendet.
r2: 0.9937200129141266, rmse: 0.17013101617828086




Mit dieser Methode bleiben 0 NaNs bestehen.

12773 Werte wurden für die Metriken verwendet.
r2: 0.98316196355757, rmse: 0.2298396172745244




Mit dieser Methode bleiben 0 NaNs bestehen.

12773 Werte wurden für die Metriken verwendet.
r2: 0.9973721004530116, rmse: 0.08877467829614669
[[0.9803957089742923, 0.19838140357980827, 0, 10.540566682815552], [0.975549424471611, 0.22712199686442056, 0, 10.575110912322998], [0.9937200129141266, 0.17013101617828086, 0, 10.457128524780273], [0.98316196355757, 0.2298396172745244, 0, 10.557584285736084], [0.9973721004530116, 0.08877467829614669, 0, 10.619830131530762]]
ICE 2
Mit dieser Methode bleiben 0 NaNs bestehen.

12773 Werte wurden für die Metriken verwendet.
r2: 0.983850565221154, rmse: 0.1800545297095781
Mit dieser Methode bleiben 0 NaNs bestehen.

12773 Werte wurden für die Metriken verwendet.
r2: 0.9878728233694023, rmse: 0.15995377947804115
Mit dieser Methode bleiben 0 NaNs bestehen.

12773 Werte wurden für die Metriken verwendet.
r2: 0.8221999907384432, rmse: 0.9052530783284753
Mit dieser Methode bleiben 0 NaNs bestehen.

12773 Werte wurden für die Metriken verwendet.
r2: 0.9391



Mit dieser Methode bleiben 0 NaNs bestehen.

12773 Werte wurden für die Metriken verwendet.
r2: 0.8627743784501933, rmse: 0.5248594142473385
[IterativeImputer] Completing matrix with shape (4898, 165)
[IterativeImputer] Change: 3.0814033887278932e+16, scaled tolerance: 35084726045503.402 
[IterativeImputer] Change: 4049601941154759.5, scaled tolerance: 35084726045503.402 
[IterativeImputer] Change: 1704981817870652.8, scaled tolerance: 35084726045503.402 
[IterativeImputer] Change: 575654038663918.1, scaled tolerance: 35084726045503.402 
[IterativeImputer] Change: 326801395026729.4, scaled tolerance: 35084726045503.402 
[IterativeImputer] Change: 398497549856261.3, scaled tolerance: 35084726045503.402 
[IterativeImputer] Change: 343697382081806.56, scaled tolerance: 35084726045503.402 
[IterativeImputer] Change: 194503543217342.3, scaled tolerance: 35084726045503.402 
[IterativeImputer] Change: 150577209398347.97, scaled tolerance: 35084726045503.402 
[IterativeImputer] Change: 1463147



Mit dieser Methode bleiben 0 NaNs bestehen.

12773 Werte wurden für die Metriken verwendet.
r2: 0.9235632286905754, rmse: 0.40157421757145706
[IterativeImputer] Completing matrix with shape (4898, 165)
[IterativeImputer] Change: 3.9815915719371384e+16, scaled tolerance: 35084726045503.402 
[IterativeImputer] Change: 3283528362995315.5, scaled tolerance: 35084726045503.402 
[IterativeImputer] Change: 1417978103456871.8, scaled tolerance: 35084726045503.402 
[IterativeImputer] Change: 1195177266616931.5, scaled tolerance: 35084726045503.402 
[IterativeImputer] Change: 980831053524627.5, scaled tolerance: 35084726045503.402 
[IterativeImputer] Change: 678208598410445.6, scaled tolerance: 35084726045503.402 
[IterativeImputer] Change: 437629738609650.75, scaled tolerance: 35084726045503.402 
[IterativeImputer] Change: 214987798954959.03, scaled tolerance: 35084726045503.402 
[IterativeImputer] Change: 235975528872455.78, scaled tolerance: 35084726045503.402 
[IterativeImputer] Change: 2131



Mit dieser Methode bleiben 0 NaNs bestehen.

12773 Werte wurden für die Metriken verwendet.
r2: 0.42566153957947106, rmse: 1.6270019390377553
[IterativeImputer] Completing matrix with shape (4898, 165)
[IterativeImputer] Change: 3.0028657868453636e+16, scaled tolerance: 35084726045503.402 
[IterativeImputer] Change: 2186893233775652.0, scaled tolerance: 35084726045503.402 
[IterativeImputer] Change: 1841838145209969.2, scaled tolerance: 35084726045503.402 
[IterativeImputer] Change: 1144503912188055.8, scaled tolerance: 35084726045503.402 
[IterativeImputer] Change: 739046084409491.6, scaled tolerance: 35084726045503.402 
[IterativeImputer] Change: 266280235620721.6, scaled tolerance: 35084726045503.402 
[IterativeImputer] Change: 227573724886647.6, scaled tolerance: 35084726045503.402 
[IterativeImputer] Change: 3509556655874191.0, scaled tolerance: 35084726045503.402 
[IterativeImputer] Change: 1643665201033639.0, scaled tolerance: 35084726045503.402 
[IterativeImputer] Change: 84973



Mit dieser Methode bleiben 0 NaNs bestehen.

12773 Werte wurden für die Metriken verwendet.
r2: 0.8704849531420322, rmse: 0.6374399535293
[IterativeImputer] Completing matrix with shape (4898, 165)
[IterativeImputer] Change: 2.736174011481228e+16, scaled tolerance: 35084726045503.402 
[IterativeImputer] Change: 2779253917331579.0, scaled tolerance: 35084726045503.402 
[IterativeImputer] Change: 1876123179609837.0, scaled tolerance: 35084726045503.402 
[IterativeImputer] Change: 1164027701359070.2, scaled tolerance: 35084726045503.402 
[IterativeImputer] Change: 843606429797648.2, scaled tolerance: 35084726045503.402 
[IterativeImputer] Change: 421285453560502.5, scaled tolerance: 35084726045503.402 
[IterativeImputer] Change: 4152459440664505.5, scaled tolerance: 35084726045503.402 
[IterativeImputer] Change: 1804981003284089.2, scaled tolerance: 35084726045503.402 
[IterativeImputer] Change: 823946290993471.6, scaled tolerance: 35084726045503.402 
[IterativeImputer] Change: 5385640505



Mit dieser Methode bleiben 0 NaNs bestehen.

12773 Werte wurden für die Metriken verwendet.
r2: 0.8358781586296624, rmse: 0.7015650048239355
[[0.8627743784501933, 0.5248594142473385, 0, 72.85075306892395], [0.9235632286905754, 0.40157421757145706, 0, 72.2787914276123], [0.42566153957947106, 1.6270019390377553, 0, 72.4583535194397], [0.8704849531420322, 0.6374399535293, 0, 74.48256134986877], [0.8358781586296624, 0.7015650048239355, 0, 72.73682904243469]]
MICE
Imputation round 0
[IterativeImputer] Completing matrix with shape (26070, 31)
[IterativeImputer] Ending imputation round 1/10, elapsed time 14.70
[IterativeImputer] Ending imputation round 2/10, elapsed time 29.18
[IterativeImputer] Ending imputation round 3/10, elapsed time 43.87
[IterativeImputer] Ending imputation round 4/10, elapsed time 58.42
[IterativeImputer] Ending imputation round 5/10, elapsed time 72.90
[IterativeImputer] Ending imputation round 6/10, elapsed time 87.32
[IterativeImputer] Ending imputation round 7/10, 

[IterativeImputer] Ending imputation round 2/10, elapsed time 29.51
[IterativeImputer] Ending imputation round 3/10, elapsed time 44.26
[IterativeImputer] Ending imputation round 4/10, elapsed time 58.65
[IterativeImputer] Ending imputation round 5/10, elapsed time 73.14
[IterativeImputer] Ending imputation round 6/10, elapsed time 87.90
[IterativeImputer] Ending imputation round 7/10, elapsed time 102.43
[IterativeImputer] Ending imputation round 8/10, elapsed time 116.83
[IterativeImputer] Ending imputation round 9/10, elapsed time 131.27
[IterativeImputer] Ending imputation round 10/10, elapsed time 145.81
Imputation round 11
[IterativeImputer] Completing matrix with shape (26070, 31)
[IterativeImputer] Ending imputation round 1/10, elapsed time 14.48
[IterativeImputer] Ending imputation round 2/10, elapsed time 28.96
[IterativeImputer] Ending imputation round 3/10, elapsed time 43.42
[IterativeImputer] Ending imputation round 4/10, elapsed time 58.06
[IterativeImputer] Ending imput

[IterativeImputer] Ending imputation round 9/10, elapsed time 130.71
[IterativeImputer] Ending imputation round 10/10, elapsed time 145.46
Imputation round 9
[IterativeImputer] Completing matrix with shape (26070, 31)
[IterativeImputer] Ending imputation round 1/10, elapsed time 14.65
[IterativeImputer] Ending imputation round 2/10, elapsed time 29.21
[IterativeImputer] Ending imputation round 3/10, elapsed time 43.73
[IterativeImputer] Ending imputation round 4/10, elapsed time 58.37
[IterativeImputer] Ending imputation round 5/10, elapsed time 72.80
[IterativeImputer] Ending imputation round 6/10, elapsed time 87.74
[IterativeImputer] Ending imputation round 7/10, elapsed time 102.26
[IterativeImputer] Ending imputation round 8/10, elapsed time 116.85
[IterativeImputer] Ending imputation round 9/10, elapsed time 131.47
[IterativeImputer] Ending imputation round 10/10, elapsed time 146.11
Imputation round 10
[IterativeImputer] Completing matrix with shape (26070, 31)
[IterativeImputer

[IterativeImputer] Ending imputation round 4/10, elapsed time 58.21
[IterativeImputer] Ending imputation round 5/10, elapsed time 72.98
[IterativeImputer] Ending imputation round 6/10, elapsed time 87.45
[IterativeImputer] Ending imputation round 7/10, elapsed time 102.07
[IterativeImputer] Ending imputation round 8/10, elapsed time 116.67
[IterativeImputer] Ending imputation round 9/10, elapsed time 131.17
[IterativeImputer] Ending imputation round 10/10, elapsed time 145.86
Imputation round 8
[IterativeImputer] Completing matrix with shape (26070, 31)
[IterativeImputer] Ending imputation round 1/10, elapsed time 14.61
[IterativeImputer] Ending imputation round 2/10, elapsed time 29.18
[IterativeImputer] Ending imputation round 3/10, elapsed time 43.90
[IterativeImputer] Ending imputation round 4/10, elapsed time 58.24
[IterativeImputer] Ending imputation round 5/10, elapsed time 72.65
[IterativeImputer] Ending imputation round 6/10, elapsed time 87.18
[IterativeImputer] Ending imputa

[IterativeImputer] Ending imputation round 10/10, elapsed time 146.90
Imputation round 6
[IterativeImputer] Completing matrix with shape (26070, 31)
[IterativeImputer] Ending imputation round 1/10, elapsed time 14.86
[IterativeImputer] Ending imputation round 2/10, elapsed time 29.43
[IterativeImputer] Ending imputation round 3/10, elapsed time 43.94
[IterativeImputer] Ending imputation round 4/10, elapsed time 58.31
[IterativeImputer] Ending imputation round 5/10, elapsed time 72.99
[IterativeImputer] Ending imputation round 6/10, elapsed time 87.61
[IterativeImputer] Ending imputation round 7/10, elapsed time 102.28
[IterativeImputer] Ending imputation round 8/10, elapsed time 116.80
[IterativeImputer] Ending imputation round 9/10, elapsed time 131.36
[IterativeImputer] Ending imputation round 10/10, elapsed time 146.01
Imputation round 7
[IterativeImputer] Completing matrix with shape (26070, 31)
[IterativeImputer] Ending imputation round 1/10, elapsed time 14.50
[IterativeImputer] 

[IterativeImputer] Ending imputation round 5/10, elapsed time 72.68
[IterativeImputer] Ending imputation round 6/10, elapsed time 87.37
[IterativeImputer] Ending imputation round 7/10, elapsed time 102.24
[IterativeImputer] Ending imputation round 8/10, elapsed time 116.89
[IterativeImputer] Ending imputation round 9/10, elapsed time 131.63
[IterativeImputer] Ending imputation round 10/10, elapsed time 146.15
Imputation round 5
[IterativeImputer] Completing matrix with shape (26070, 31)
[IterativeImputer] Ending imputation round 1/10, elapsed time 15834.50
[IterativeImputer] Ending imputation round 2/10, elapsed time 15849.68
[IterativeImputer] Ending imputation round 3/10, elapsed time 15865.57
[IterativeImputer] Ending imputation round 4/10, elapsed time 15881.59
[IterativeImputer] Ending imputation round 5/10, elapsed time 15910.75
[IterativeImputer] Ending imputation round 6/10, elapsed time 15948.41
[IterativeImputer] Ending imputation round 7/10, elapsed time 15984.18
[IterativeI

In [None]:
r2

In [None]:
r3, r3_detail = run_all(0.1)

In [None]:
r3