# Vergleich von Imputation Methoden

An dieser Stelle sollen verschiede Methoden zum interpolieren von fehlenden Werten betrachtet und verglichen werden.

### Vorbereitung

In [1]:
import pandas as pd
import numpy as np
import math
import time

from sklearn.preprocessing import StandardScaler
from sklearn.metrics import r2_score, mean_squared_error
from sklearn.experimental import enable_iterative_imputer
from sklearn.impute import IterativeImputer, KNNImputer

In [2]:
results = []

In [3]:
def reset_base():
    base= pd.read_csv('additional_data/base.csv') 
    base.set_index(['Country Name', 'Indicator Name'], inplace=True)
    base = base.sort_index(level=['Country Name', 'Indicator Name'])
    return base
base = reset_base()

def get_cords(frac, rnd_state):
    n = int(base.isna().sum().sum()*frac)
    print(f'Testdaten mit {frac*100}% fehlenden Werten (absolut: {n})')
    #random state to ensure reproducibility
    rnds = np.random.RandomState(rnd_state)

    #coordinates for data entries to be removed randomly
    #5000 entries are selected
    cords = pd.DataFrame([[rnds.randint(0, len(base), size=n*4)[i], 
                  rnds.randint(0, len(base.columns), size=n*4)[i]]
                  for i in range(n*4)])

    #all coordinates pointing to NaN entries are removed and
    #first 1000 remaining entries are selected
    cords['value'] = [base.iloc[cords[0][i], cords[1][i]] for i in cords.index]
    cords = cords.dropna()[:n].reset_index(drop=True)
    
    return cords

def reset_train(cords):
    train = reset_base()
    for i in cords.index:
        train.iloc[cords[0][i], cords[1][i]] = None
    return train

def evaluate(df, t, cords):
    
    
    #scaling original data and imputed data
    #necessary ?????????????????????????????????????
    train = reset_train(cords)
    scaler = StandardScaler().fit(train) #fitting on train?
    norm_base = pd.DataFrame(scaler.transform(base))
    df = pd.DataFrame(scaler.transform(df))

    #getting imputed values for simulated NaNs and true value 
    res =pd.DataFrame({'y_true': [norm_base.iloc[cords[0][i], cords[1][i]] for i in cords.index],
                       'y_pred': [df.iloc[cords[0][i], cords[1][i]] for i in cords.index]
                      })
    res = res.dropna()

   
    #calculate evaluation metrics
    r2 = r2_score(res['y_true'], res['y_pred'])
    rmse = math.sqrt(mean_squared_error(res['y_true'], res['y_pred']))
    still_missing = df.isna().sum().sum()
    
    print(f'Mit dieser Methode bleiben {still_missing} NaNs bestehen.')
    print(f'{len(res)} Werte wurden für die Metriken verwendet.')
    print(f'r2: {r2}, rmse: {rmse}')
    print('')
    
    return [r2, rmse, still_missing, t]



In [4]:
def impute_backfill(df):
    df = df.fillna(method='bfill', limit=3)
    return df

def impute_overall_means(df):
    #fill NaNs with overall mean of that indicator
    values = pd.DataFrame(df.stack()).groupby('Indicator Name')[0].mean()
    df = pd.DataFrame(df.stack(dropna=False))
    
    df[0] = df[0].fillna(df.groupby('Indicator Name')[0].transform('mean'))
    df = df.unstack()
    df.columns = df.columns.droplevel(0)
    df = df.sort_index(level=['Country Name', 'Indicator Name'])
        
    return df

def impute_yearly_means(df):
    #fill NaNs with overall mean of that indicator
    
    for i in df.columns:
        df[i] = df[i].fillna(df.groupby('Indicator Name')[i].transform('mean'))
            
    return df

def impute_yearly_means_per_region(df):
    country_data = pd.read_csv('../Data/WDICountry.csv')
    country_data = country_data.loc[:,['Table Name', 'Region']]
    df = pd.merge(df.reset_index(), country_data, how='left', left_on='Country Name', right_on='Table Name').drop('Table Name', axis=1)
    df = df.set_index(['Country Name', 'Indicator Name', 'Region'])

    for i in df.columns:
        df[i] = df[i].fillna(df.groupby(['Indicator Name', 'Region'])[i].transform('mean'))

    df = df.reset_index().set_index(['Country Name', 'Indicator Name']).drop('Region', axis=1)
    return df

def interpolate3(df):
    df = df.interpolate(limit=3)
    return df

def interpolate_all(df):
    df = df.interpolate()
    return df

def iterative_imputer_1(df):
    col = df.columns
    idx = df.index
    
    iter_imp = IterativeImputer(random_state=999)
    df= iter_imp.fit_transform(df)
    df= pd.DataFrame(df, columns=col, index=idx)
    return df

def iterative_imputer_2(df):
    df = df.unstack().T
    col = df.columns
    idx = df.index

    iter_imp = IterativeImputer(random_state=999)
    df= iter_imp.fit_transform(df)

    df = pd.DataFrame(df, columns=col, index=idx)
    df = df.unstack().T
    df = df.sort_index(level=['Country Name', 'Indicator Name'])
    
    return df

def iterative_imputer_3(df):

    df = df.reset_index()
    df = df.set_index(['Indicator Name', 'Country Name'])
    df = df.unstack().T

    col = df.columns
    idx = df.index

    iter_imp = IterativeImputer(random_state=999, verbose=True)
    df= iter_imp.fit_transform(df)

    df = pd.DataFrame(df, columns=col, index=idx)
    df = df.unstack().T
    df = df.reset_index()
    df = df.set_index(['Country Name', 'Indicator Name'])
    df = df.sort_index(level=['Country Name', 'Indicator Name'])
    
    return df

def mice_imputer(df):
    n_imputations =  12
    dfs = []
    col = df.columns
    idx = df.index
    
    for i in range(n_imputations): 
        print(f'Imputation round {i}')
        iter_imp = IterativeImputer(random_state=i, sample_posterior=True, verbose=2)
        df_temp = iter_imp.fit_transform(df)
        dfs.append(df_temp)
    
    df = np.mean(np.array(dfs), axis=0)
    df = pd.DataFrame(df, columns=col, index=idx)
    return df

def knn_imputer1(df):
    col = df.columns
    idx = df.index
    
    knn_imp = KNNImputer(n_neighbors=2)
    df= knn_imp.fit_transform(df)
    df = pd.DataFrame(df, columns=col, index=idx)
    return df

def knn_imputer2(df, n=4):
    
    df = df.reset_index()
    df = df.set_index(['Indicator Name', 'Country Name'])
    df = df.unstack().T

    col = df.columns
    idx = df.index

    knn_imp = KNNImputer(n_neighbors=n)
    df= knn_imp.fit_transform(df)
    df = pd.DataFrame(df, columns=col, index=idx)

    df = df.unstack().T
    df = df.reset_index()
    df = df.set_index(['Country Name', 'Indicator Name'])
    df = df.sort_index(level=['Country Name', 'Indicator Name'])
    
    return df

In [5]:


functions =  {'Backfill':impute_backfill, 'Overall Mean':impute_overall_means, 
              'Yearly Mean':impute_yearly_means, 'Yearly Mean per Region':impute_yearly_means_per_region, 
              'Interpolate 3':interpolate3, 'Interpolate all':interpolate_all, 
              'ICE 1':iterative_imputer_1, 'ICE 2':iterative_imputer_2, 
              'ICE3':iterative_imputer_3, 'MICE':mice_imputer, 
              'KNN 1':knn_imputer1, 'KNN 2':knn_imputer2
             }



In [6]:
def run_all(frac):
    
    cords_list =[]
    for i in np.arange(1,6):
        cords_list.append(get_cords(frac, i))
    
    results = {}
    detailed_results = {}    
    
    for name, func in functions.items():
        func_runs = []
        print(name)
        
        for cords in cords_list:
            
            t0 = time.time()
            df= func(reset_train(cords)) 
            t1 = time.time()

            t = t1-t0

            func_runs.append(evaluate(df, t, cords))
        
        print(func_runs)
        detailed_results[name] = func_runs
        results[name] = np.mean(np.array(func_runs), axis=0)
        
    return results, detailed_results
r1, r1_detail = run_all(0.05)

Testdaten mit 5.0% fehlenden Werten (absolut: 8515)
Testdaten mit 5.0% fehlenden Werten (absolut: 8515)
Testdaten mit 5.0% fehlenden Werten (absolut: 8515)
Testdaten mit 5.0% fehlenden Werten (absolut: 8515)
Testdaten mit 5.0% fehlenden Werten (absolut: 8515)
Backfill
Mit dieser Methode bleiben 32924 NaNs bestehen.

8195 Werte wurden für die Metriken verwendet.
r2: -0.2630386067986965, rmse: 3.1264580984032304
Mit dieser Methode bleiben 32825 NaNs bestehen.

8263 Werte wurden für die Metriken verwendet.
r2: -2.77533240911633, rmse: 1.1896157293266358
Mit dieser Methode bleiben 32824 NaNs bestehen.

8250 Werte wurden für die Metriken verwendet.
r2: -1.688280205196218, rmse: 1.0226797653748734
Mit dieser Methode bleiben 32841 NaNs bestehen.

8249 Werte wurden für die Metriken verwendet.
r2: -0.6672076075877178, rmse: 2.085863590967128
Mit dieser Methode bleiben 32828 NaNs bestehen.

8240 Werte wurden für die Metriken verwendet.
r2: -0.07720522781720884, rmse: 3.533882957257935
[[-0.26303



Mit dieser Methode bleiben 0 NaNs bestehen.

8515 Werte wurden für die Metriken verwendet.
r2: 0.9942158016960598, rmse: 0.20756410705991854




Mit dieser Methode bleiben 0 NaNs bestehen.

8515 Werte wurden für die Metriken verwendet.
r2: 0.7827329807258437, rmse: 0.2820413273792655




Mit dieser Methode bleiben 0 NaNs bestehen.

8515 Werte wurden für die Metriken verwendet.
r2: 0.988236531487906, rmse: 0.06661977159134161




Mit dieser Methode bleiben 0 NaNs bestehen.

8515 Werte wurden für die Metriken verwendet.
r2: 0.9877198111831341, rmse: 0.1761993484346218




Mit dieser Methode bleiben 0 NaNs bestehen.

8515 Werte wurden für die Metriken verwendet.
r2: 0.9908166506864295, rmse: 0.320979945536532
[[0.9942158016960598, 0.20756410705991854, 0, 11.710053205490112], [0.7827329807258437, 0.2820413273792655, 0, 10.201523065567017], [0.988236531487906, 0.06661977159134161, 0, 10.328932046890259], [0.9877198111831341, 0.1761993484346218, 0, 10.431110858917236], [0.9908166506864295, 0.320979945536532, 0, 10.261184930801392]]
ICE 2
Mit dieser Methode bleiben 0 NaNs bestehen.

8515 Werte wurden für die Metriken verwendet.
r2: 0.897000119367959, rmse: 0.8758889134010717
Mit dieser Methode bleiben 0 NaNs bestehen.

8515 Werte wurden für die Metriken verwendet.
r2: 0.9664793394870981, rmse: 0.1107827472103113
Mit dieser Methode bleiben 0 NaNs bestehen.

8515 Werte wurden für die Metriken verwendet.
r2: 0.8501371093540149, rmse: 0.2377839268308384
Mit dieser Methode bleiben 0 NaNs bestehen.

8515 Werte wurden für die Metriken verwendet.
r2: 0.9653675649847



Mit dieser Methode bleiben 0 NaNs bestehen.

8515 Werte wurden für die Metriken verwendet.
r2: 0.6724652756518652, rmse: 1.5619220044933946
[IterativeImputer] Completing matrix with shape (4898, 165)
[IterativeImputer] Change: 3.042905975534203e+16, scaled tolerance: 35084726045503.402 
[IterativeImputer] Change: 2952750620995729.0, scaled tolerance: 35084726045503.402 
[IterativeImputer] Change: 2637618919460105.0, scaled tolerance: 35084726045503.402 
[IterativeImputer] Change: 839975951480219.5, scaled tolerance: 35084726045503.402 
[IterativeImputer] Change: 416741465011542.9, scaled tolerance: 35084726045503.402 
[IterativeImputer] Change: 206536252721299.34, scaled tolerance: 35084726045503.402 
[IterativeImputer] Change: 257581722062894.0, scaled tolerance: 35084726045503.402 
[IterativeImputer] Change: 3676756867963236.0, scaled tolerance: 35084726045503.402 
[IterativeImputer] Change: 2184993869866275.8, scaled tolerance: 35084726045503.402 
[IterativeImputer] Change: 35310162



Mit dieser Methode bleiben 0 NaNs bestehen.

8515 Werte wurden für die Metriken verwendet.
r2: 0.8649439154415968, rmse: 0.22236832474634613
[IterativeImputer] Completing matrix with shape (4898, 165)
[IterativeImputer] Change: 3.0843589729908572e+16, scaled tolerance: 35084726045503.402 
[IterativeImputer] Change: 1979474405978225.0, scaled tolerance: 35084726045503.402 
[IterativeImputer] Change: 2801112307662757.0, scaled tolerance: 35084726045503.402 
[IterativeImputer] Change: 1574030763879201.2, scaled tolerance: 35084726045503.402 
[IterativeImputer] Change: 970053503110370.4, scaled tolerance: 35084726045503.402 
[IterativeImputer] Change: 444197716419550.9, scaled tolerance: 35084726045503.402 
[IterativeImputer] Change: 120489078070655.97, scaled tolerance: 35084726045503.402 
[IterativeImputer] Change: 250815913508324.62, scaled tolerance: 35084726045503.402 
[IterativeImputer] Change: 334711927449455.7, scaled tolerance: 35084726045503.402 
[IterativeImputer] Change: 169377



Mit dieser Methode bleiben 0 NaNs bestehen.

8515 Werte wurden für die Metriken verwendet.
r2: 0.4368929726829536, rmse: 0.4609256913736006
[IterativeImputer] Completing matrix with shape (4898, 165)
[IterativeImputer] Change: 4.07467735367784e+16, scaled tolerance: 24412569472460.902 
[IterativeImputer] Change: 1722959034065735.5, scaled tolerance: 24412569472460.902 
[IterativeImputer] Change: 2090673085752552.0, scaled tolerance: 24412569472460.902 
[IterativeImputer] Change: 1048890708859029.4, scaled tolerance: 24412569472460.902 
[IterativeImputer] Change: 477907701737168.3, scaled tolerance: 24412569472460.902 
[IterativeImputer] Change: 247554233211173.34, scaled tolerance: 24412569472460.902 
[IterativeImputer] Change: 186004828520420.94, scaled tolerance: 24412569472460.902 
[IterativeImputer] Change: 115911094752149.7, scaled tolerance: 24412569472460.902 
[IterativeImputer] Change: 4551435341310723.0, scaled tolerance: 24412569472460.902 
[IterativeImputer] Change: 29505081



Mit dieser Methode bleiben 0 NaNs bestehen.

8515 Werte wurden für die Metriken verwendet.
r2: 0.7933548811158273, rmse: 0.7227936884335278
[IterativeImputer] Completing matrix with shape (4898, 165)
[IterativeImputer] Change: 2.9978395215752304e+16, scaled tolerance: 35084726045503.402 
[IterativeImputer] Change: 3003152652961603.5, scaled tolerance: 35084726045503.402 
[IterativeImputer] Change: 2710129489422781.5, scaled tolerance: 35084726045503.402 
[IterativeImputer] Change: 1503311021595322.8, scaled tolerance: 35084726045503.402 
[IterativeImputer] Change: 637890235948702.4, scaled tolerance: 35084726045503.402 
[IterativeImputer] Change: 193512071993254.84, scaled tolerance: 35084726045503.402 
[IterativeImputer] Change: 95235849980947.98, scaled tolerance: 35084726045503.402 
[IterativeImputer] Change: 3799024907012754.5, scaled tolerance: 35084726045503.402 
[IterativeImputer] Change: 2165780169160924.2, scaled tolerance: 35084726045503.402 
[IterativeImputer] Change: 758361



Mit dieser Methode bleiben 0 NaNs bestehen.

8515 Werte wurden für die Metriken verwendet.
r2: 0.7509497996987184, rmse: 1.6715554758531586
[[0.6724652756518652, 1.5619220044933946, 0, 85.83509731292725], [0.8649439154415968, 0.22236832474634613, 0, 85.7069501876831], [0.4368929726829536, 0.4609256913736006, 0, 84.84199070930481], [0.7933548811158273, 0.7227936884335278, 0, 85.68876910209656], [0.7509497996987184, 1.6715554758531586, 0, 86.77166724205017]]
MICE
Imputation round 0
[IterativeImputer] Completing matrix with shape (26070, 31)
[IterativeImputer] Ending imputation round 1/10, elapsed time 1.17
[IterativeImputer] Ending imputation round 2/10, elapsed time 2.32
[IterativeImputer] Ending imputation round 3/10, elapsed time 3.47
[IterativeImputer] Ending imputation round 4/10, elapsed time 4.61
[IterativeImputer] Ending imputation round 5/10, elapsed time 5.73
[IterativeImputer] Ending imputation round 6/10, elapsed time 6.89
[IterativeImputer] Ending imputation round 7/10, elap

[IterativeImputer] Ending imputation round 3/10, elapsed time 3.41
[IterativeImputer] Ending imputation round 4/10, elapsed time 4.56
[IterativeImputer] Ending imputation round 5/10, elapsed time 5.77
[IterativeImputer] Ending imputation round 6/10, elapsed time 7.01
[IterativeImputer] Ending imputation round 7/10, elapsed time 8.14
[IterativeImputer] Ending imputation round 8/10, elapsed time 9.28
[IterativeImputer] Ending imputation round 9/10, elapsed time 10.40
[IterativeImputer] Ending imputation round 10/10, elapsed time 11.57
Imputation round 11
[IterativeImputer] Completing matrix with shape (26070, 31)
[IterativeImputer] Ending imputation round 1/10, elapsed time 1.14
[IterativeImputer] Ending imputation round 2/10, elapsed time 2.29
[IterativeImputer] Ending imputation round 3/10, elapsed time 3.44
[IterativeImputer] Ending imputation round 4/10, elapsed time 4.59
[IterativeImputer] Ending imputation round 5/10, elapsed time 5.69
[IterativeImputer] Ending imputation round 6/1

[IterativeImputer] Ending imputation round 1/10, elapsed time 1.13
[IterativeImputer] Ending imputation round 2/10, elapsed time 2.26
[IterativeImputer] Ending imputation round 3/10, elapsed time 3.38
[IterativeImputer] Ending imputation round 4/10, elapsed time 4.62
[IterativeImputer] Ending imputation round 5/10, elapsed time 5.82
[IterativeImputer] Ending imputation round 6/10, elapsed time 6.96
[IterativeImputer] Ending imputation round 7/10, elapsed time 8.10
[IterativeImputer] Ending imputation round 8/10, elapsed time 9.25
[IterativeImputer] Ending imputation round 9/10, elapsed time 10.37
[IterativeImputer] Ending imputation round 10/10, elapsed time 11.55
Imputation round 10
[IterativeImputer] Completing matrix with shape (26070, 31)
[IterativeImputer] Ending imputation round 1/10, elapsed time 1.14
[IterativeImputer] Ending imputation round 2/10, elapsed time 2.27
[IterativeImputer] Ending imputation round 3/10, elapsed time 3.40
[IterativeImputer] Ending imputation round 4/1

[IterativeImputer] Ending imputation round 9/10, elapsed time 10.34
[IterativeImputer] Ending imputation round 10/10, elapsed time 11.49
Imputation round 8
[IterativeImputer] Completing matrix with shape (26070, 31)
[IterativeImputer] Ending imputation round 1/10, elapsed time 1.14
[IterativeImputer] Ending imputation round 2/10, elapsed time 2.32
[IterativeImputer] Ending imputation round 3/10, elapsed time 3.52
[IterativeImputer] Ending imputation round 4/10, elapsed time 4.72
[IterativeImputer] Ending imputation round 5/10, elapsed time 5.82
[IterativeImputer] Ending imputation round 6/10, elapsed time 6.98
[IterativeImputer] Ending imputation round 7/10, elapsed time 8.10
[IterativeImputer] Ending imputation round 8/10, elapsed time 9.28
[IterativeImputer] Ending imputation round 9/10, elapsed time 10.41
[IterativeImputer] Ending imputation round 10/10, elapsed time 11.54
Imputation round 9
[IterativeImputer] Completing matrix with shape (26070, 31)
[IterativeImputer] Ending imputa

[IterativeImputer] Ending imputation round 6/10, elapsed time 6.95
[IterativeImputer] Ending imputation round 7/10, elapsed time 8.10
[IterativeImputer] Ending imputation round 8/10, elapsed time 9.30
[IterativeImputer] Ending imputation round 9/10, elapsed time 10.48
[IterativeImputer] Ending imputation round 10/10, elapsed time 11.74
Imputation round 7
[IterativeImputer] Completing matrix with shape (26070, 31)
[IterativeImputer] Ending imputation round 1/10, elapsed time 1.30
[IterativeImputer] Ending imputation round 2/10, elapsed time 2.51
[IterativeImputer] Ending imputation round 3/10, elapsed time 3.70
[IterativeImputer] Ending imputation round 4/10, elapsed time 4.95
[IterativeImputer] Ending imputation round 5/10, elapsed time 6.13
[IterativeImputer] Ending imputation round 6/10, elapsed time 7.32
[IterativeImputer] Ending imputation round 7/10, elapsed time 8.54
[IterativeImputer] Ending imputation round 8/10, elapsed time 9.72
[IterativeImputer] Ending imputation round 9/10

[IterativeImputer] Ending imputation round 3/10, elapsed time 3.45
[IterativeImputer] Ending imputation round 4/10, elapsed time 4.59
[IterativeImputer] Ending imputation round 5/10, elapsed time 5.75
[IterativeImputer] Ending imputation round 6/10, elapsed time 6.90
[IterativeImputer] Ending imputation round 7/10, elapsed time 8.06
[IterativeImputer] Ending imputation round 8/10, elapsed time 9.24
[IterativeImputer] Ending imputation round 9/10, elapsed time 10.48
[IterativeImputer] Ending imputation round 10/10, elapsed time 11.64
Imputation round 6
[IterativeImputer] Completing matrix with shape (26070, 31)
[IterativeImputer] Ending imputation round 1/10, elapsed time 1.15
[IterativeImputer] Ending imputation round 2/10, elapsed time 2.29
[IterativeImputer] Ending imputation round 3/10, elapsed time 3.43
[IterativeImputer] Ending imputation round 4/10, elapsed time 4.56
[IterativeImputer] Ending imputation round 5/10, elapsed time 5.71
[IterativeImputer] Ending imputation round 6/10

In [18]:
r1 = pd.DataFrame(r1).T
r1

Unnamed: 0,0,1,2,3
Backfill,-1.094213,2.1917,32848.4,0.855777
Overall Mean,-0.251541,1.805936,0.0,1.097199
Yearly Mean,0.009964,1.768969,52298.0,0.941444
Yearly Mean per Region,0.016326,1.770021,58352.6,1.063025
Interpolate 3,-0.557857,1.999203,32879.0,0.924791
Interpolate all,-0.557853,1.966919,58.0,0.890814
ICE 1,0.948744,0.210681,0.0,10.586561
ICE 2,0.926469,0.448741,0.0,24.251318
ICE3,0.703721,0.927913,0.0,85.768895
MICE,0.951523,0.236147,0.0,140.065102


In [12]:
r1_detail = pd.DataFrame(pd.DataFrame(r1_detail).T.stack().to_dict()).T
r1_detail.columns = ['r2', 'rmse', 'still_missing', 't']
r1_detail

Unnamed: 0,Unnamed: 1,r2,rmse,still_missing,t
Backfill,0,-0.263039,3.126458,32924.0,0.823884
Backfill,1,-2.775332,1.189616,32825.0,0.856179
Backfill,2,-1.68828,1.02268,32824.0,0.919926
Backfill,3,-0.667208,2.085864,32841.0,0.82685
Backfill,4,-0.077205,3.533883,32828.0,0.852047
Overall Mean,0,0.056692,2.650681,0.0,1.077665
Overall Mean,1,-0.79021,0.809595,0.0,1.16497
Overall Mean,2,-0.553206,0.765509,0.0,1.078683
Overall Mean,3,-0.089972,1.660006,0.0,1.066062
Overall Mean,4,0.118993,3.143888,0.0,1.098613


In [8]:
r2 , r2_detail =  run_all(0.075)

Testdaten mit 7.5% fehlenden Werten (absolut: 12773)
Testdaten mit 7.5% fehlenden Werten (absolut: 12773)
Testdaten mit 7.5% fehlenden Werten (absolut: 12773)
Testdaten mit 7.5% fehlenden Werten (absolut: 12773)
Testdaten mit 7.5% fehlenden Werten (absolut: 12773)
Backfill
Mit dieser Methode bleiben 33252 NaNs bestehen.

12384 Werte wurden für die Metriken verwendet.
r2: -0.7622360204251646, rmse: 1.9101575370444133
Mit dieser Methode bleiben 33280 NaNs bestehen.

12322 Werte wurden für die Metriken verwendet.
r2: -0.13326755926364164, rmse: 1.5732663625326984
Mit dieser Methode bleiben 33292 NaNs bestehen.

12334 Werte wurden für die Metriken verwendet.
r2: -0.15471382925960686, rmse: 2.34729184379117
Mit dieser Methode bleiben 33312 NaNs bestehen.

12333 Werte wurden für die Metriken verwendet.
r2: -0.38762500614250417, rmse: 2.1233606768359277
Mit dieser Methode bleiben 33191 NaNs bestehen.

12365 Werte wurden für die Metriken verwendet.
r2: -0.12638032635757868, rmse: 1.86746347988



Mit dieser Methode bleiben 0 NaNs bestehen.

12773 Werte wurden für die Metriken verwendet.
r2: 0.9803957096839419, rmse: 0.19838139998923512




Mit dieser Methode bleiben 0 NaNs bestehen.

12773 Werte wurden für die Metriken verwendet.
r2: 0.9755494244776735, rmse: 0.22712199683626336




Mit dieser Methode bleiben 0 NaNs bestehen.

12773 Werte wurden für die Metriken verwendet.
r2: 0.9937200129848803, rmse: 0.17013101521988602




Mit dieser Methode bleiben 0 NaNs bestehen.

12773 Werte wurden für die Metriken verwendet.
r2: 0.9831619635029917, rmse: 0.229839617647022




Mit dieser Methode bleiben 0 NaNs bestehen.

12773 Werte wurden für die Metriken verwendet.
r2: 0.9973721005760114, rmse: 0.08877467621858194
[[0.9803957096839419, 0.19838139998923512, 0, 10.18648386001587], [0.9755494244776735, 0.22712199683626336, 0, 10.117796421051025], [0.9937200129848803, 0.17013101521988602, 0, 10.078530073165894], [0.9831619635029917, 0.229839617647022, 0, 10.344917058944702], [0.9973721005760114, 0.08877467621858194, 0, 10.964144706726074]]
ICE 2
Mit dieser Methode bleiben 0 NaNs bestehen.

12773 Werte wurden für die Metriken verwendet.
r2: 0.9838505622042556, rmse: 0.18005454652769592
Mit dieser Methode bleiben 0 NaNs bestehen.

12773 Werte wurden für die Metriken verwendet.
r2: 0.9878728227407547, rmse: 0.15995378362387666
Mit dieser Methode bleiben 0 NaNs bestehen.

12773 Werte wurden für die Metriken verwendet.
r2: 0.8221999876264665, rmse: 0.905253086250653
Mit dieser Methode bleiben 0 NaNs bestehen.

12773 Werte wurden für die Metriken verwendet.
r2: 0.93



Mit dieser Methode bleiben 0 NaNs bestehen.

12773 Werte wurden für die Metriken verwendet.
r2: 0.8623209307706443, rmse: 0.5257258705130095
[IterativeImputer] Completing matrix with shape (4898, 165)
[IterativeImputer] Change: 3.0814034864734116e+16, scaled tolerance: 35084726045503.402 
[IterativeImputer] Change: 4057322175223235.0, scaled tolerance: 35084726045503.402 
[IterativeImputer] Change: 1704338202139627.8, scaled tolerance: 35084726045503.402 
[IterativeImputer] Change: 563607608696027.9, scaled tolerance: 35084726045503.402 
[IterativeImputer] Change: 366278041760450.8, scaled tolerance: 35084726045503.402 
[IterativeImputer] Change: 420490792342915.0, scaled tolerance: 35084726045503.402 
[IterativeImputer] Change: 492161727699741.25, scaled tolerance: 35084726045503.402 
[IterativeImputer] Change: 304111116287310.9, scaled tolerance: 35084726045503.402 
[IterativeImputer] Change: 212560707125846.62, scaled tolerance: 35084726045503.402 
[IterativeImputer] Change: 1648503



Mit dieser Methode bleiben 0 NaNs bestehen.

12773 Werte wurden für die Metriken verwendet.
r2: 0.9248967198209728, rmse: 0.39805593856601507
[IterativeImputer] Completing matrix with shape (4898, 165)
[IterativeImputer] Change: 3.981588887350839e+16, scaled tolerance: 35084726045503.402 
[IterativeImputer] Change: 3272447647919451.5, scaled tolerance: 35084726045503.402 
[IterativeImputer] Change: 1418769504483334.8, scaled tolerance: 35084726045503.402 
[IterativeImputer] Change: 1185290704657987.8, scaled tolerance: 35084726045503.402 
[IterativeImputer] Change: 984878014627611.0, scaled tolerance: 35084726045503.402 
[IterativeImputer] Change: 699725792920155.2, scaled tolerance: 35084726045503.402 
[IterativeImputer] Change: 446267193102673.9, scaled tolerance: 35084726045503.402 
[IterativeImputer] Change: 282067752214580.44, scaled tolerance: 35084726045503.402 
[IterativeImputer] Change: 181505497690240.4, scaled tolerance: 35084726045503.402 
[IterativeImputer] Change: 2004434



Mit dieser Methode bleiben 0 NaNs bestehen.

12773 Werte wurden für die Metriken verwendet.
r2: 0.411709223258674, rmse: 1.646645609287345
[IterativeImputer] Completing matrix with shape (4898, 165)
[IterativeImputer] Change: 3.0028660204699064e+16, scaled tolerance: 35084726045503.402 
[IterativeImputer] Change: 2187714572436955.5, scaled tolerance: 35084726045503.402 
[IterativeImputer] Change: 1841927527231725.5, scaled tolerance: 35084726045503.402 
[IterativeImputer] Change: 1182919697590624.2, scaled tolerance: 35084726045503.402 
[IterativeImputer] Change: 723035118930045.4, scaled tolerance: 35084726045503.402 
[IterativeImputer] Change: 196045068836978.94, scaled tolerance: 35084726045503.402 
[IterativeImputer] Change: 3405236684923543.0, scaled tolerance: 35084726045503.402 
[IterativeImputer] Change: 1806700450486974.2, scaled tolerance: 35084726045503.402 
[IterativeImputer] Change: 1133879646810791.5, scaled tolerance: 35084726045503.402 
[IterativeImputer] Change: 509388



Mit dieser Methode bleiben 0 NaNs bestehen.

12773 Werte wurden für die Metriken verwendet.
r2: 0.8924299870525689, rmse: 0.5809313016449393
[IterativeImputer] Completing matrix with shape (4898, 165)
[IterativeImputer] Change: 2.7361713732346948e+16, scaled tolerance: 35084726045503.402 
[IterativeImputer] Change: 2776178018187145.5, scaled tolerance: 35084726045503.402 
[IterativeImputer] Change: 1863927789523974.8, scaled tolerance: 35084726045503.402 
[IterativeImputer] Change: 1148278767394427.5, scaled tolerance: 35084726045503.402 
[IterativeImputer] Change: 879431074863914.5, scaled tolerance: 35084726045503.402 
[IterativeImputer] Change: 424335362795948.2, scaled tolerance: 35084726045503.402 
[IterativeImputer] Change: 4163317329132051.0, scaled tolerance: 35084726045503.402 
[IterativeImputer] Change: 1885594848492097.5, scaled tolerance: 35084726045503.402 
[IterativeImputer] Change: 902792927245435.2, scaled tolerance: 35084726045503.402 
[IterativeImputer] Change: 422566



Mit dieser Methode bleiben 0 NaNs bestehen.

12773 Werte wurden für die Metriken verwendet.
r2: 0.834380973318203, rmse: 0.7047577188321924
[[0.8623209307706443, 0.5257258705130095, 0, 94.02702283859253], [0.9248967198209728, 0.39805593856601507, 0, 90.36988401412964], [0.411709223258674, 1.646645609287345, 0, 87.37838196754456], [0.8924299870525689, 0.5809313016449393, 0, 88.13741517066956], [0.834380973318203, 0.7047577188321924, 0, 87.00606322288513]]
MICE
Imputation round 0
[IterativeImputer] Completing matrix with shape (26070, 31)
[IterativeImputer] Ending imputation round 1/10, elapsed time 1.24
[IterativeImputer] Ending imputation round 2/10, elapsed time 2.34
[IterativeImputer] Ending imputation round 3/10, elapsed time 3.46
[IterativeImputer] Ending imputation round 4/10, elapsed time 4.57
[IterativeImputer] Ending imputation round 5/10, elapsed time 5.71
[IterativeImputer] Ending imputation round 6/10, elapsed time 6.82
[IterativeImputer] Ending imputation round 7/10, elapse

[IterativeImputer] Ending imputation round 3/10, elapsed time 3.58
[IterativeImputer] Ending imputation round 4/10, elapsed time 4.82
[IterativeImputer] Ending imputation round 5/10, elapsed time 6.15
[IterativeImputer] Ending imputation round 6/10, elapsed time 7.53
[IterativeImputer] Ending imputation round 7/10, elapsed time 8.92
[IterativeImputer] Ending imputation round 8/10, elapsed time 10.42
[IterativeImputer] Ending imputation round 9/10, elapsed time 11.74
[IterativeImputer] Ending imputation round 10/10, elapsed time 12.89
Imputation round 11
[IterativeImputer] Completing matrix with shape (26070, 31)
[IterativeImputer] Ending imputation round 1/10, elapsed time 1.13
[IterativeImputer] Ending imputation round 2/10, elapsed time 2.26
[IterativeImputer] Ending imputation round 3/10, elapsed time 3.39
[IterativeImputer] Ending imputation round 4/10, elapsed time 4.54
[IterativeImputer] Ending imputation round 5/10, elapsed time 5.70
[IterativeImputer] Ending imputation round 6/

[IterativeImputer] Ending imputation round 1/10, elapsed time 1.15
[IterativeImputer] Ending imputation round 2/10, elapsed time 2.31
[IterativeImputer] Ending imputation round 3/10, elapsed time 3.47
[IterativeImputer] Ending imputation round 4/10, elapsed time 4.60
[IterativeImputer] Ending imputation round 5/10, elapsed time 5.80
[IterativeImputer] Ending imputation round 6/10, elapsed time 7.05
[IterativeImputer] Ending imputation round 7/10, elapsed time 8.25
[IterativeImputer] Ending imputation round 8/10, elapsed time 9.36
[IterativeImputer] Ending imputation round 9/10, elapsed time 10.49
[IterativeImputer] Ending imputation round 10/10, elapsed time 11.61
Imputation round 10
[IterativeImputer] Completing matrix with shape (26070, 31)
[IterativeImputer] Ending imputation round 1/10, elapsed time 1.13
[IterativeImputer] Ending imputation round 2/10, elapsed time 2.23
[IterativeImputer] Ending imputation round 3/10, elapsed time 3.37
[IterativeImputer] Ending imputation round 4/1

[IterativeImputer] Ending imputation round 9/10, elapsed time 10.35
[IterativeImputer] Ending imputation round 10/10, elapsed time 11.47
Imputation round 8
[IterativeImputer] Completing matrix with shape (26070, 31)
[IterativeImputer] Ending imputation round 1/10, elapsed time 1.15
[IterativeImputer] Ending imputation round 2/10, elapsed time 2.25
[IterativeImputer] Ending imputation round 3/10, elapsed time 3.43
[IterativeImputer] Ending imputation round 4/10, elapsed time 4.68
[IterativeImputer] Ending imputation round 5/10, elapsed time 5.85
[IterativeImputer] Ending imputation round 6/10, elapsed time 6.98
[IterativeImputer] Ending imputation round 7/10, elapsed time 8.12
[IterativeImputer] Ending imputation round 8/10, elapsed time 9.23
[IterativeImputer] Ending imputation round 9/10, elapsed time 10.41
[IterativeImputer] Ending imputation round 10/10, elapsed time 11.59
Imputation round 9
[IterativeImputer] Completing matrix with shape (26070, 31)
[IterativeImputer] Ending imputa

[IterativeImputer] Ending imputation round 6/10, elapsed time 6.85
[IterativeImputer] Ending imputation round 7/10, elapsed time 8.01
[IterativeImputer] Ending imputation round 8/10, elapsed time 9.15
[IterativeImputer] Ending imputation round 9/10, elapsed time 10.33
[IterativeImputer] Ending imputation round 10/10, elapsed time 11.55
Imputation round 7
[IterativeImputer] Completing matrix with shape (26070, 31)
[IterativeImputer] Ending imputation round 1/10, elapsed time 1.22
[IterativeImputer] Ending imputation round 2/10, elapsed time 2.32
[IterativeImputer] Ending imputation round 3/10, elapsed time 3.47
[IterativeImputer] Ending imputation round 4/10, elapsed time 4.62
[IterativeImputer] Ending imputation round 5/10, elapsed time 5.76
[IterativeImputer] Ending imputation round 6/10, elapsed time 6.87
[IterativeImputer] Ending imputation round 7/10, elapsed time 8.03
[IterativeImputer] Ending imputation round 8/10, elapsed time 9.15
[IterativeImputer] Ending imputation round 9/10

[IterativeImputer] Ending imputation round 3/10, elapsed time 3.37
[IterativeImputer] Ending imputation round 4/10, elapsed time 4.48
[IterativeImputer] Ending imputation round 5/10, elapsed time 5.64
[IterativeImputer] Ending imputation round 6/10, elapsed time 6.79
[IterativeImputer] Ending imputation round 7/10, elapsed time 8.01
[IterativeImputer] Ending imputation round 8/10, elapsed time 9.26
[IterativeImputer] Ending imputation round 9/10, elapsed time 10.38
[IterativeImputer] Ending imputation round 10/10, elapsed time 11.51
Imputation round 6
[IterativeImputer] Completing matrix with shape (26070, 31)
[IterativeImputer] Ending imputation round 1/10, elapsed time 1.10
[IterativeImputer] Ending imputation round 2/10, elapsed time 2.28
[IterativeImputer] Ending imputation round 3/10, elapsed time 3.42
[IterativeImputer] Ending imputation round 4/10, elapsed time 4.59
[IterativeImputer] Ending imputation round 5/10, elapsed time 5.70
[IterativeImputer] Ending imputation round 6/10

In [19]:
r2 = pd.DataFrame(r2).T
r2

Unnamed: 0,0,1,2,3
Backfill,-0.312845,1.964308,33265.4,1.128651
Overall Mean,-0.052291,1.740381,0.0,1.382833
Yearly Mean,0.014764,1.692044,52298.0,1.204775
Yearly Mean per Region,0.049003,1.681218,58425.0,1.320798
Interpolate 3,-0.219687,1.909891,33296.8,1.190733
Interpolate all,-0.221227,1.880009,59.6,1.167769
ICE 1,0.98604,0.18285,0.0,10.338374
ICE 2,0.945163,0.365983,0.0,26.355917
ICE3,0.785148,0.771223,0.0,89.383753
MICE,0.988788,0.168885,0.0,141.106209


In [13]:
r2_detail = pd.DataFrame(pd.DataFrame(r3_detail).T.stack().to_dict()).T
r2_detail.columns = ['r2', 'rmse', 'still_missing', 't']
r2_detail

Unnamed: 0,Unnamed: 1,r2,rmse,still_missing,t
Backfill,0,-3.96422,1.152188,33619.0,1.433451
Backfill,1,-1.837371,1.679247,33786.0,1.440451
Backfill,2,-1.908139,1.523993,33654.0,1.44645
Backfill,3,-0.897788,1.538364,33643.0,1.433474
Backfill,4,-1.176092,1.413485,33679.0,1.445665
Overall Mean,0,-1.083659,0.734942,0.0,1.674192
Overall Mean,1,-0.203727,1.073588,0.0,1.738185
Overall Mean,2,-0.364231,1.025791,0.0,1.666184
Overall Mean,3,-0.17854,1.191847,0.0,1.675212
Overall Mean,4,-0.280922,1.066565,0.0,1.658654


In [10]:
r3, r3_detail = run_all(0.1)

Testdaten mit 10.0% fehlenden Werten (absolut: 17030)
Testdaten mit 10.0% fehlenden Werten (absolut: 17030)
Testdaten mit 10.0% fehlenden Werten (absolut: 17030)
Testdaten mit 10.0% fehlenden Werten (absolut: 17030)
Testdaten mit 10.0% fehlenden Werten (absolut: 17030)
Backfill
Mit dieser Methode bleiben 33619 NaNs bestehen.

16464 Werte wurden für die Metriken verwendet.
r2: -3.964219549140708, rmse: 1.1521878884113992
Mit dieser Methode bleiben 33786 NaNs bestehen.

16407 Werte wurden für die Metriken verwendet.
r2: -1.8373707535864638, rmse: 1.6792466658151293
Mit dieser Methode bleiben 33654 NaNs bestehen.

16447 Werte wurden für die Metriken verwendet.
r2: -1.9081389310564236, rmse: 1.5239926108614468
Mit dieser Methode bleiben 33643 NaNs bestehen.

16460 Werte wurden für die Metriken verwendet.
r2: -0.8977876217879388, rmse: 1.5383638773729555
Mit dieser Methode bleiben 33679 NaNs bestehen.

16467 Werte wurden für die Metriken verwendet.
r2: -1.1760918948068961, rmse: 1.413484968



Mit dieser Methode bleiben 0 NaNs bestehen.

17030 Werte wurden für die Metriken verwendet.
r2: 0.9370202334659707, rmse: 0.12777336992410396




Mit dieser Methode bleiben 0 NaNs bestehen.

17030 Werte wurden für die Metriken verwendet.
r2: 0.9521427024799756, rmse: 0.2140660895293748




Mit dieser Methode bleiben 0 NaNs bestehen.

17030 Werte wurden für die Metriken verwendet.
r2: 0.9893949691492818, rmse: 0.09044214578504266




Mit dieser Methode bleiben 0 NaNs bestehen.

17030 Werte wurden für die Metriken verwendet.
r2: 0.9872547817984083, rmse: 0.1239430322939622




Mit dieser Methode bleiben 0 NaNs bestehen.

17030 Werte wurden für die Metriken verwendet.
r2: 0.9145067510943837, rmse: 0.27554462283829784
[[0.9370202334659707, 0.12777336992410396, 0, 10.421709060668945], [0.9521427024799756, 0.2140660895293748, 0, 10.416086673736572], [0.9893949691492818, 0.09044214578504266, 0, 10.494918823242188], [0.9872547817984083, 0.1239430322939622, 0, 10.54473614692688], [0.9145067510943837, 0.27554462283829784, 0, 10.655496597290039]]
ICE 2
Mit dieser Methode bleiben 0 NaNs bestehen.

17030 Werte wurden für die Metriken verwendet.
r2: 0.9353924573529703, rmse: 0.12941405261701314
Mit dieser Methode bleiben 0 NaNs bestehen.

17030 Werte wurden für die Metriken verwendet.
r2: 0.9754346077997191, rmse: 0.1533683133456013
Mit dieser Methode bleiben 0 NaNs bestehen.

17030 Werte wurden für die Metriken verwendet.
r2: 0.9360508486955306, rmse: 0.22209167631387725
Mit dieser Methode bleiben 0 NaNs bestehen.

17030 Werte wurden für die Metriken verwendet.
r2: 0.9



Mit dieser Methode bleiben 0 NaNs bestehen.

17030 Werte wurden für die Metriken verwendet.
r2: 0.8656510287868295, rmse: 0.18661949011636286
[IterativeImputer] Completing matrix with shape (4898, 165)
[IterativeImputer] Change: 3.0793701416671844e+16, scaled tolerance: 35084726045503.402 
[IterativeImputer] Change: 1965670947388250.5, scaled tolerance: 35084726045503.402 
[IterativeImputer] Change: 2055781807062901.5, scaled tolerance: 35084726045503.402 
[IterativeImputer] Change: 1202414568308490.8, scaled tolerance: 35084726045503.402 
[IterativeImputer] Change: 693042073549476.0, scaled tolerance: 35084726045503.402 
[IterativeImputer] Change: 378724405818521.25, scaled tolerance: 35084726045503.402 
[IterativeImputer] Change: 255856467206931.5, scaled tolerance: 35084726045503.402 
[IterativeImputer] Change: 267666823038194.47, scaled tolerance: 35084726045503.402 
[IterativeImputer] Change: 154395120361844.72, scaled tolerance: 35084726045503.402 
[IterativeImputer] Change: 1481



Mit dieser Methode bleiben 0 NaNs bestehen.

17030 Werte wurden für die Metriken verwendet.
r2: 0.7873761250218165, rmse: 0.4512111309121132
[IterativeImputer] Completing matrix with shape (4898, 165)
[IterativeImputer] Change: 2.767695010894046e+16, scaled tolerance: 35084726045503.402 
[IterativeImputer] Change: 2215172710472481.0, scaled tolerance: 35084726045503.402 
[IterativeImputer] Change: 1272751790088711.5, scaled tolerance: 35084726045503.402 
[IterativeImputer] Change: 1112711149673679.8, scaled tolerance: 35084726045503.402 
[IterativeImputer] Change: 802807918332657.8, scaled tolerance: 35084726045503.402 
[IterativeImputer] Change: 493386041322739.56, scaled tolerance: 35084726045503.402 
[IterativeImputer] Change: 430603857658830.25, scaled tolerance: 35084726045503.402 
[IterativeImputer] Change: 332478603549620.6, scaled tolerance: 35084726045503.402 
[IterativeImputer] Change: 342605605246312.56, scaled tolerance: 35084726045503.402 
[IterativeImputer] Change: 441335



Mit dieser Methode bleiben 0 NaNs bestehen.

17030 Werte wurden für die Metriken verwendet.
r2: 0.6804537315552623, rmse: 0.4964571441735188
[IterativeImputer] Completing matrix with shape (4898, 165)
[IterativeImputer] Change: 2.9054460292016464e+16, scaled tolerance: 35084726045503.402 
[IterativeImputer] Change: 2686861492950140.0, scaled tolerance: 35084726045503.402 
[IterativeImputer] Change: 2020346442314162.2, scaled tolerance: 35084726045503.402 
[IterativeImputer] Change: 966537983426960.9, scaled tolerance: 35084726045503.402 
[IterativeImputer] Change: 582942911181494.8, scaled tolerance: 35084726045503.402 
[IterativeImputer] Change: 420352635594125.2, scaled tolerance: 35084726045503.402 
[IterativeImputer] Change: 330099304310940.4, scaled tolerance: 35084726045503.402 
[IterativeImputer] Change: 358375333903242.25, scaled tolerance: 35084726045503.402 
[IterativeImputer] Change: 3634484630414166.0, scaled tolerance: 35084726045503.402 
[IterativeImputer] Change: 1767168



Mit dieser Methode bleiben 0 NaNs bestehen.

17030 Werte wurden für die Metriken verwendet.
r2: 0.47922093799031384, rmse: 0.7922736954811134
[IterativeImputer] Completing matrix with shape (4898, 165)
[IterativeImputer] Change: 2.846033471882781e+16, scaled tolerance: 35084726045503.402 
[IterativeImputer] Change: 2443889493057179.5, scaled tolerance: 35084726045503.402 
[IterativeImputer] Change: 1118613781447626.5, scaled tolerance: 35084726045503.402 
[IterativeImputer] Change: 1656357204430435.2, scaled tolerance: 35084726045503.402 
[IterativeImputer] Change: 1009176985672603.5, scaled tolerance: 35084726045503.402 
[IterativeImputer] Change: 559421305245813.94, scaled tolerance: 35084726045503.402 
[IterativeImputer] Change: 312335935597925.06, scaled tolerance: 35084726045503.402 
[IterativeImputer] Change: 220725728023061.8, scaled tolerance: 35084726045503.402 
[IterativeImputer] Change: 164621507370038.3, scaled tolerance: 35084726045503.402 
[IterativeImputer] Change: 82701



Mit dieser Methode bleiben 0 NaNs bestehen.

17030 Werte wurden für die Metriken verwendet.
r2: 0.8722507778212176, rmse: 0.3368256541499248
[[0.8656510287868295, 0.18661949011636286, 0, 86.82750248908997], [0.7873761250218165, 0.4512111309121132, 0, 86.8275876045227], [0.6804537315552623, 0.4964571441735188, 0, 88.22525382041931], [0.47922093799031384, 0.7922736954811134, 0, 87.28822231292725], [0.8722507778212176, 0.3368256541499248, 0, 87.29826974868774]]
MICE
Imputation round 0
[IterativeImputer] Completing matrix with shape (26070, 31)
[IterativeImputer] Ending imputation round 1/10, elapsed time 1.25
[IterativeImputer] Ending imputation round 2/10, elapsed time 2.49
[IterativeImputer] Ending imputation round 3/10, elapsed time 3.74
[IterativeImputer] Ending imputation round 4/10, elapsed time 4.92
[IterativeImputer] Ending imputation round 5/10, elapsed time 6.07
[IterativeImputer] Ending imputation round 6/10, elapsed time 7.20
[IterativeImputer] Ending imputation round 7/10, el

[IterativeImputer] Ending imputation round 3/10, elapsed time 3.36
[IterativeImputer] Ending imputation round 4/10, elapsed time 4.50
[IterativeImputer] Ending imputation round 5/10, elapsed time 5.62
[IterativeImputer] Ending imputation round 6/10, elapsed time 6.73
[IterativeImputer] Ending imputation round 7/10, elapsed time 7.85
[IterativeImputer] Ending imputation round 8/10, elapsed time 8.98
[IterativeImputer] Ending imputation round 9/10, elapsed time 10.07
[IterativeImputer] Ending imputation round 10/10, elapsed time 11.25
Imputation round 11
[IterativeImputer] Completing matrix with shape (26070, 31)
[IterativeImputer] Ending imputation round 1/10, elapsed time 1.17
[IterativeImputer] Ending imputation round 2/10, elapsed time 2.28
[IterativeImputer] Ending imputation round 3/10, elapsed time 3.35
[IterativeImputer] Ending imputation round 4/10, elapsed time 4.48
[IterativeImputer] Ending imputation round 5/10, elapsed time 5.55
[IterativeImputer] Ending imputation round 6/1

[IterativeImputer] Ending imputation round 1/10, elapsed time 1.14
[IterativeImputer] Ending imputation round 2/10, elapsed time 2.24
[IterativeImputer] Ending imputation round 3/10, elapsed time 3.37
[IterativeImputer] Ending imputation round 4/10, elapsed time 4.47
[IterativeImputer] Ending imputation round 5/10, elapsed time 5.61
[IterativeImputer] Ending imputation round 6/10, elapsed time 6.72
[IterativeImputer] Ending imputation round 7/10, elapsed time 7.86
[IterativeImputer] Ending imputation round 8/10, elapsed time 9.07
[IterativeImputer] Ending imputation round 9/10, elapsed time 10.31
[IterativeImputer] Ending imputation round 10/10, elapsed time 11.42
Imputation round 10
[IterativeImputer] Completing matrix with shape (26070, 31)
[IterativeImputer] Ending imputation round 1/10, elapsed time 1.12
[IterativeImputer] Ending imputation round 2/10, elapsed time 2.24
[IterativeImputer] Ending imputation round 3/10, elapsed time 3.40
[IterativeImputer] Ending imputation round 4/1

[IterativeImputer] Ending imputation round 9/10, elapsed time 10.28
[IterativeImputer] Ending imputation round 10/10, elapsed time 11.40
Imputation round 8
[IterativeImputer] Completing matrix with shape (26070, 31)
[IterativeImputer] Ending imputation round 1/10, elapsed time 1.19
[IterativeImputer] Ending imputation round 2/10, elapsed time 2.34
[IterativeImputer] Ending imputation round 3/10, elapsed time 3.53
[IterativeImputer] Ending imputation round 4/10, elapsed time 4.65
[IterativeImputer] Ending imputation round 5/10, elapsed time 5.82
[IterativeImputer] Ending imputation round 6/10, elapsed time 7.00
[IterativeImputer] Ending imputation round 7/10, elapsed time 8.28
[IterativeImputer] Ending imputation round 8/10, elapsed time 9.51
[IterativeImputer] Ending imputation round 9/10, elapsed time 10.68
[IterativeImputer] Ending imputation round 10/10, elapsed time 11.84
Imputation round 9
[IterativeImputer] Completing matrix with shape (26070, 31)
[IterativeImputer] Ending imputa

[IterativeImputer] Ending imputation round 6/10, elapsed time 7.28
[IterativeImputer] Ending imputation round 7/10, elapsed time 8.47
[IterativeImputer] Ending imputation round 8/10, elapsed time 9.63
[IterativeImputer] Ending imputation round 9/10, elapsed time 10.78
[IterativeImputer] Ending imputation round 10/10, elapsed time 11.96
Imputation round 7
[IterativeImputer] Completing matrix with shape (26070, 31)
[IterativeImputer] Ending imputation round 1/10, elapsed time 1.20
[IterativeImputer] Ending imputation round 2/10, elapsed time 2.44
[IterativeImputer] Ending imputation round 3/10, elapsed time 3.67
[IterativeImputer] Ending imputation round 4/10, elapsed time 4.81
[IterativeImputer] Ending imputation round 5/10, elapsed time 5.97
[IterativeImputer] Ending imputation round 6/10, elapsed time 7.10
[IterativeImputer] Ending imputation round 7/10, elapsed time 8.32
[IterativeImputer] Ending imputation round 8/10, elapsed time 9.50
[IterativeImputer] Ending imputation round 9/10

[IterativeImputer] Ending imputation round 3/10, elapsed time 3.27
[IterativeImputer] Ending imputation round 4/10, elapsed time 4.38
[IterativeImputer] Ending imputation round 5/10, elapsed time 5.57
[IterativeImputer] Ending imputation round 6/10, elapsed time 6.81
[IterativeImputer] Ending imputation round 7/10, elapsed time 7.92
[IterativeImputer] Ending imputation round 8/10, elapsed time 9.05
[IterativeImputer] Ending imputation round 9/10, elapsed time 10.13
[IterativeImputer] Ending imputation round 10/10, elapsed time 11.24
Imputation round 6
[IterativeImputer] Completing matrix with shape (26070, 31)
[IterativeImputer] Ending imputation round 1/10, elapsed time 1.06
[IterativeImputer] Ending imputation round 2/10, elapsed time 2.17
[IterativeImputer] Ending imputation round 3/10, elapsed time 3.27
[IterativeImputer] Ending imputation round 4/10, elapsed time 4.38
[IterativeImputer] Ending imputation round 5/10, elapsed time 5.49
[IterativeImputer] Ending imputation round 6/10

In [20]:
r3 = pd.DataFrame(r3).T
r3

Unnamed: 0,0,1,2,3
Backfill,-1.956722,1.461455,33676.2,1.439898
Overall Mean,-0.422216,1.018547,0.0,1.682485
Yearly Mean,0.00362,0.877554,52329.6,1.519553
Yearly Mean per Region,-0.067946,0.905732,58630.2,1.623332
Interpolate 3,-0.89111,1.178038,33706.8,1.519524
Interpolate all,-0.892725,1.159193,58.4,1.505378
ICE 1,0.956064,0.166354,0.0,10.506589
ICE 2,0.958219,0.167993,0.0,24.462234
ICE3,0.736991,0.452677,0.0,87.293367
MICE,0.967982,0.144287,0.0,140.492019


In [14]:
r3_detail = pd.DataFrame(pd.DataFrame(r3_detail).T.stack().to_dict()).T
r3_detail.columns = ['r2', 'rmse', 'still_missing', 't']
r3_detail

Unnamed: 0,Unnamed: 1,r2,rmse,still_missing,t
Backfill,0,-3.96422,1.152188,33619.0,1.433451
Backfill,1,-1.837371,1.679247,33786.0,1.440451
Backfill,2,-1.908139,1.523993,33654.0,1.44645
Backfill,3,-0.897788,1.538364,33643.0,1.433474
Backfill,4,-1.176092,1.413485,33679.0,1.445665
Overall Mean,0,-1.083659,0.734942,0.0,1.674192
Overall Mean,1,-0.203727,1.073588,0.0,1.738185
Overall Mean,2,-0.364231,1.025791,0.0,1.666184
Overall Mean,3,-0.17854,1.191847,0.0,1.675212
Overall Mean,4,-0.280922,1.066565,0.0,1.658654


In [21]:
sets = {'r1':r1, 'r1_detail':r1_detail,
       'r2':r2, 'r2_detail':r2_detail,
       'r3':r3, 'r3_detail':r3_detail,}
for name, df in sets.items():
    df.to_csv(f'additional_data/results/{name}.csv')