# Vergleich von Imputation Methoden

An dieser Stelle sollen verschiede Methoden zum interpolieren von fehlenden Werten betrachtet und verglichen werden.

### Vorbereitung

In [1]:
import pandas as pd
import numpy as np
import math
import time

from sklearn.preprocessing import StandardScaler
from sklearn.metrics import r2_score, mean_squared_error
from sklearn.experimental import enable_iterative_imputer
from sklearn.impute import IterativeImputer, KNNImputer

t0 = time.time()

In [2]:
results = []

In [3]:
def reset_base():
    base= pd.read_csv('additional_data/base.csv') 
    base.set_index(['Country Name', 'Indicator Name'], inplace=True)
    base = base.sort_index(level=['Country Name', 'Indicator Name'])
    return base
base = reset_base()

def get_cords(frac, rnd_state):
    n = int(base.isna().sum().sum()*frac)
    print(f'Testdaten mit {frac*100}% fehlenden Werten (absolut: {n})')
    #random state to ensure reproducibility
    rnds = np.random.RandomState(rnd_state)

    #coordinates for data entries to be removed randomly
    #5000 entries are selected
    cords = pd.DataFrame([[rnds.randint(0, len(base), size=n*4)[i], 
                  rnds.randint(0, len(base.columns), size=n*4)[i]]
                  for i in range(n*4)])

    #all coordinates pointing to NaN entries are removed and
    #first 1000 remaining entries are selected
    cords['value'] = [base.iloc[cords[0][i], cords[1][i]] for i in cords.index]
    cords = cords.dropna()[:n].reset_index(drop=True)
    
    return cords

def reset_train(cords):
    train = reset_base()
    for i in cords.index:
        train.iloc[cords[0][i], cords[1][i]] = None
    return train

def evaluate(df, t, cords):
    
    
    #scaling original data and imputed data
    #necessary ?????????????????????????????????????
    train = reset_train(cords)
    scaler = StandardScaler().fit(train) #fitting on train?
    norm_base = pd.DataFrame(scaler.transform(base))
    df = pd.DataFrame(scaler.transform(df))

    #getting imputed values for simulated NaNs and true value 
    res =pd.DataFrame({'y_true': [norm_base.iloc[cords[0][i], cords[1][i]] for i in cords.index],
                       'y_pred': [df.iloc[cords[0][i], cords[1][i]] for i in cords.index]
                      })
    res = res.dropna()

   
    #calculate evaluation metrics
    r2 = r2_score(res['y_true'], res['y_pred'])
    rmse = math.sqrt(mean_squared_error(res['y_true'], res['y_pred']))
    still_missing = df.isna().sum().sum()
    

    print(f'r2: {r2}, rmse: {rmse}, t: {t}')
    print('')
    
    return [r2, rmse, still_missing, t]



In [4]:
def impute_backfill(df):
    df = df.fillna(method='bfill', limit=3)
    return df

def impute_overall_means(df):
    #fill NaNs with overall mean of that indicator
    values = pd.DataFrame(df.stack()).groupby('Indicator Name')[0].mean()
    df = pd.DataFrame(df.stack(dropna=False))
    
    df[0] = df[0].fillna(df.groupby('Indicator Name')[0].transform('mean'))
    df = df.unstack()
    df.columns = df.columns.droplevel(0)
    df = df.sort_index(level=['Country Name', 'Indicator Name'])
        
    return df

def impute_yearly_means(df):
    #fill NaNs with overall mean of that indicator
    
    for i in df.columns:
        df[i] = df[i].fillna(df.groupby('Indicator Name')[i].transform('mean'))
            
    return df

def impute_yearly_means_per_region(df):
    country_data = pd.read_csv('../Data/WDICountry.csv')
    country_data = country_data.loc[:,['Table Name', 'Region']]
    df = pd.merge(df.reset_index(), country_data, how='left', left_on='Country Name', right_on='Table Name').drop('Table Name', axis=1)
    df = df.set_index(['Country Name', 'Indicator Name', 'Region'])

    for i in df.columns:
        df[i] = df[i].fillna(df.groupby(['Indicator Name', 'Region'])[i].transform('mean'))

    df = df.reset_index().set_index(['Country Name', 'Indicator Name']).drop('Region', axis=1)
    return df

def interpolate3(df):
    df = df.interpolate(limit=3)
    return df

def interpolate_all(df):
    df = df.interpolate()
    return df

def iterative_imputer_1(df):
    col = df.columns
    idx = df.index
    
    iter_imp = IterativeImputer(random_state=999)
    df= iter_imp.fit_transform(df)
    df= pd.DataFrame(df, columns=col, index=idx)
    return df

def iterative_imputer_2(df):
    df = df.unstack().T
    col = df.columns
    idx = df.index

    iter_imp = IterativeImputer(random_state=999)
    df= iter_imp.fit_transform(df)

    df = pd.DataFrame(df, columns=col, index=idx)
    df = df.unstack().T
    df = df.sort_index(level=['Country Name', 'Indicator Name'])
    
    return df

def iterative_imputer_3(df):

    df = df.reset_index()
    df = df.set_index(['Indicator Name', 'Country Name'])
    df = df.unstack().T

    col = df.columns
    idx = df.index

    iter_imp = IterativeImputer(random_state=999, verbose=True)
    df= iter_imp.fit_transform(df)

    df = pd.DataFrame(df, columns=col, index=idx)
    df = df.unstack().T
    df = df.reset_index()
    df = df.set_index(['Country Name', 'Indicator Name'])
    df = df.sort_index(level=['Country Name', 'Indicator Name'])
    
    return df

def mice_imputer(df):
    n_imputations =  12
    dfs = []
    col = df.columns
    idx = df.index
    
    for i in range(n_imputations): 
        print(f'Imputation round {i}')
        iter_imp = IterativeImputer(random_state=i, sample_posterior=True, verbose=2)
        df_temp = iter_imp.fit_transform(df)
        dfs.append(df_temp)
    
    df = np.mean(np.array(dfs), axis=0)
    df = pd.DataFrame(df, columns=col, index=idx)
    return df

def knn_imputer1(df):
    col = df.columns
    idx = df.index
    
    knn_imp = KNNImputer(n_neighbors=2)
    df= knn_imp.fit_transform(df)
    df = pd.DataFrame(df, columns=col, index=idx)
    return df

def knn_imputer2(df, n=4):
    
    df = df.reset_index()
    df = df.set_index(['Indicator Name', 'Country Name'])
    df = df.unstack().T

    col = df.columns
    idx = df.index

    knn_imp = KNNImputer(n_neighbors=n)
    df= knn_imp.fit_transform(df)
    df = pd.DataFrame(df, columns=col, index=idx)

    df = df.unstack().T
    df = df.reset_index()
    df = df.set_index(['Country Name', 'Indicator Name'])
    df = df.sort_index(level=['Country Name', 'Indicator Name'])
    
    return df

In [5]:
functions =  {'Backfill':impute_backfill, 'Overall Mean':impute_overall_means, 
              'Yearly Mean':impute_yearly_means, 'Yearly Mean per Region':impute_yearly_means_per_region, 
              'Interpolate 3':interpolate3, 'Interpolate all':interpolate_all, 
              'ICE 1':iterative_imputer_1, 'ICE 2':iterative_imputer_2, 
              'ICE 3':iterative_imputer_3, 'MICE':mice_imputer, 
              'KNN 1':knn_imputer1, 'KNN 2':knn_imputer2
             }

In [6]:
def run_all(frac):
    
    cords_list =[]
    n = 7
    for i in np.arange(1,n+1):
        cords_list.append(get_cords(frac, i))
    
    results = {}
    detailed_results = {}    
    
    for name, func in functions.items():
        func_runs = []
        print(name)
        
        for count, cords in enumerate(cords_list):
            
            print(f'run: {count+1}/{n}')
            t0 = time.time()
            df= func(reset_train(cords)) 
            t1 = time.time()

            t = t1-t0

            func_runs.append(evaluate(df, t, cords))
        
        print(func_runs)
        print ('')
        detailed_results[name] = func_runs
        results[name] = np.mean(np.array(func_runs), axis=0)
        
    return results, detailed_results


In [7]:
r1, r1_detail = run_all(0.05)

Testdaten mit 5.0% fehlenden Werten (absolut: 8515)
Testdaten mit 5.0% fehlenden Werten (absolut: 8515)
Testdaten mit 5.0% fehlenden Werten (absolut: 8515)
Testdaten mit 5.0% fehlenden Werten (absolut: 8515)
Testdaten mit 5.0% fehlenden Werten (absolut: 8515)
Backfill
run: 0
r2: -0.2630386067986965, rmse: 3.1264580984032304, t: 0.8018028736114502

run: 1
r2: -2.77533240911633, rmse: 1.1896157293266358, t: 0.7937955856323242

run: 2
r2: -1.688280205196218, rmse: 1.0226797653748734, t: 0.793506383895874

run: 3
r2: -0.6672076075877178, rmse: 2.085863590967128, t: 0.7936129570007324

run: 4
r2: -0.07720522781720884, rmse: 3.533882957257935, t: 0.7985427379608154

[[-0.2630386067986965, 3.1264580984032304, 32924, 0.8018028736114502], [-2.77533240911633, 1.1896157293266358, 32825, 0.7937955856323242], [-1.688280205196218, 1.0226797653748734, 32824, 0.793506383895874], [-0.6672076075877178, 2.085863590967128, 32841, 0.7936129570007324], [-0.07720522781720884, 3.533882957257935, 32828, 0.7985



r2: 0.9942158016960598, rmse: 0.20756410705991854, t: 9.451332092285156

run: 1




r2: 0.7827329807258437, rmse: 0.2820413273792655, t: 9.43781852722168

run: 2




r2: 0.988236531487906, rmse: 0.06661977159134161, t: 9.369900703430176

run: 3




r2: 0.9877198111831341, rmse: 0.1761993484346218, t: 9.42542839050293

run: 4




r2: 0.9908166506864295, rmse: 0.320979945536532, t: 9.420212507247925

[[0.9942158016960598, 0.20756410705991854, 0, 9.451332092285156], [0.7827329807258437, 0.2820413273792655, 0, 9.43781852722168], [0.988236531487906, 0.06661977159134161, 0, 9.369900703430176], [0.9877198111831341, 0.1761993484346218, 0, 9.42542839050293], [0.9908166506864295, 0.320979945536532, 0, 9.420212507247925]]
ICE 2
run: 0
r2: 0.897000119367959, rmse: 0.8758889134010717, t: 22.742194652557373

run: 1
r2: 0.9664793394870981, rmse: 0.1107827472103113, t: 22.920295238494873

run: 2
r2: 0.8501371093540149, rmse: 0.2377839268308384, t: 22.966735124588013

run: 3
r2: 0.965367564984733, rmse: 0.29589885856832354, t: 23.002361059188843

run: 4
r2: 0.9533613958864062, rmse: 0.7233527197482782, t: 22.765857696533203

[[0.897000119367959, 0.8758889134010717, 0, 22.742194652557373], [0.9664793394870981, 0.1107827472103113, 0, 22.920295238494873], [0.8501371093540149, 0.2377839268308384, 0, 22.966735124588013], [0.9653675



r2: 0.6724652756518652, rmse: 1.5619220044933946, t: 80.13416600227356

run: 1
[IterativeImputer] Completing matrix with shape (4898, 165)
[IterativeImputer] Change: 3.042905975534203e+16, scaled tolerance: 35084726045503.402 
[IterativeImputer] Change: 2952750620995729.0, scaled tolerance: 35084726045503.402 
[IterativeImputer] Change: 2637618919460105.0, scaled tolerance: 35084726045503.402 
[IterativeImputer] Change: 839975951480219.5, scaled tolerance: 35084726045503.402 
[IterativeImputer] Change: 416741465011542.9, scaled tolerance: 35084726045503.402 
[IterativeImputer] Change: 206536252721299.34, scaled tolerance: 35084726045503.402 
[IterativeImputer] Change: 257581722062894.0, scaled tolerance: 35084726045503.402 
[IterativeImputer] Change: 3676756867963236.0, scaled tolerance: 35084726045503.402 
[IterativeImputer] Change: 2184993869866275.8, scaled tolerance: 35084726045503.402 
[IterativeImputer] Change: 3531016219250393.0, scaled tolerance: 35084726045503.402 




r2: 0.8649439154415968, rmse: 0.22236832474634613, t: 82.42843461036682

run: 2
[IterativeImputer] Completing matrix with shape (4898, 165)
[IterativeImputer] Change: 3.0843589729908572e+16, scaled tolerance: 35084726045503.402 
[IterativeImputer] Change: 1979474405978225.0, scaled tolerance: 35084726045503.402 
[IterativeImputer] Change: 2801112307662757.0, scaled tolerance: 35084726045503.402 
[IterativeImputer] Change: 1574030763879201.2, scaled tolerance: 35084726045503.402 
[IterativeImputer] Change: 970053503110370.4, scaled tolerance: 35084726045503.402 
[IterativeImputer] Change: 444197716419550.9, scaled tolerance: 35084726045503.402 
[IterativeImputer] Change: 120489078070655.97, scaled tolerance: 35084726045503.402 
[IterativeImputer] Change: 250815913508324.62, scaled tolerance: 35084726045503.402 
[IterativeImputer] Change: 334711927449455.7, scaled tolerance: 35084726045503.402 
[IterativeImputer] Change: 169377876238841.16, scaled tolerance: 35084726045503.402 




r2: 0.4368929726829536, rmse: 0.4609256913736006, t: 82.52171683311462

run: 3
[IterativeImputer] Completing matrix with shape (4898, 165)
[IterativeImputer] Change: 4.07467735367784e+16, scaled tolerance: 24412569472460.902 
[IterativeImputer] Change: 1722959034065735.5, scaled tolerance: 24412569472460.902 
[IterativeImputer] Change: 2090673085752552.0, scaled tolerance: 24412569472460.902 
[IterativeImputer] Change: 1048890708859029.4, scaled tolerance: 24412569472460.902 
[IterativeImputer] Change: 477907701737168.3, scaled tolerance: 24412569472460.902 
[IterativeImputer] Change: 247554233211173.34, scaled tolerance: 24412569472460.902 
[IterativeImputer] Change: 186004828520420.94, scaled tolerance: 24412569472460.902 
[IterativeImputer] Change: 115911094752149.7, scaled tolerance: 24412569472460.902 
[IterativeImputer] Change: 4551435341310723.0, scaled tolerance: 24412569472460.902 
[IterativeImputer] Change: 2950508177613738.0, scaled tolerance: 24412569472460.902 




r2: 0.7933548811158273, rmse: 0.7227936884335278, t: 82.35985994338989

run: 4
[IterativeImputer] Completing matrix with shape (4898, 165)
[IterativeImputer] Change: 2.9978395215752304e+16, scaled tolerance: 35084726045503.402 
[IterativeImputer] Change: 3003152652961603.5, scaled tolerance: 35084726045503.402 
[IterativeImputer] Change: 2710129489422781.5, scaled tolerance: 35084726045503.402 
[IterativeImputer] Change: 1503311021595322.8, scaled tolerance: 35084726045503.402 
[IterativeImputer] Change: 637890235948702.4, scaled tolerance: 35084726045503.402 
[IterativeImputer] Change: 193512071993254.84, scaled tolerance: 35084726045503.402 
[IterativeImputer] Change: 95235849980947.98, scaled tolerance: 35084726045503.402 
[IterativeImputer] Change: 3799024907012754.5, scaled tolerance: 35084726045503.402 
[IterativeImputer] Change: 2165780169160924.2, scaled tolerance: 35084726045503.402 
[IterativeImputer] Change: 758361314303099.8, scaled tolerance: 35084726045503.402 




r2: 0.7509497996987184, rmse: 1.6715554758531586, t: 82.6233344078064

[[0.6724652756518652, 1.5619220044933946, 0, 80.13416600227356], [0.8649439154415968, 0.22236832474634613, 0, 82.42843461036682], [0.4368929726829536, 0.4609256913736006, 0, 82.52171683311462], [0.7933548811158273, 0.7227936884335278, 0, 82.35985994338989], [0.7509497996987184, 1.6715554758531586, 0, 82.6233344078064]]
MICE
run: 0
Imputation round 0
[IterativeImputer] Completing matrix with shape (26070, 31)
[IterativeImputer] Ending imputation round 1/10, elapsed time 1.11
[IterativeImputer] Ending imputation round 2/10, elapsed time 2.20
[IterativeImputer] Ending imputation round 3/10, elapsed time 3.30
[IterativeImputer] Ending imputation round 4/10, elapsed time 4.36
[IterativeImputer] Ending imputation round 5/10, elapsed time 5.45
[IterativeImputer] Ending imputation round 6/10, elapsed time 6.53
[IterativeImputer] Ending imputation round 7/10, elapsed time 7.62
[IterativeImputer] Ending imputation round 8/10,

[IterativeImputer] Ending imputation round 4/10, elapsed time 4.38
[IterativeImputer] Ending imputation round 5/10, elapsed time 5.50
[IterativeImputer] Ending imputation round 6/10, elapsed time 6.57
[IterativeImputer] Ending imputation round 7/10, elapsed time 7.67
[IterativeImputer] Ending imputation round 8/10, elapsed time 8.72
[IterativeImputer] Ending imputation round 9/10, elapsed time 9.81
[IterativeImputer] Ending imputation round 10/10, elapsed time 10.89
Imputation round 11
[IterativeImputer] Completing matrix with shape (26070, 31)
[IterativeImputer] Ending imputation round 1/10, elapsed time 1.07
[IterativeImputer] Ending imputation round 2/10, elapsed time 2.12
[IterativeImputer] Ending imputation round 3/10, elapsed time 3.21
[IterativeImputer] Ending imputation round 4/10, elapsed time 4.27
[IterativeImputer] Ending imputation round 5/10, elapsed time 5.33
[IterativeImputer] Ending imputation round 6/10, elapsed time 6.41
[IterativeImputer] Ending imputation round 7/10

[IterativeImputer] Ending imputation round 2/10, elapsed time 2.14
[IterativeImputer] Ending imputation round 3/10, elapsed time 3.26
[IterativeImputer] Ending imputation round 4/10, elapsed time 4.34
[IterativeImputer] Ending imputation round 5/10, elapsed time 5.45
[IterativeImputer] Ending imputation round 6/10, elapsed time 6.53
[IterativeImputer] Ending imputation round 7/10, elapsed time 7.60
[IterativeImputer] Ending imputation round 8/10, elapsed time 8.69
[IterativeImputer] Ending imputation round 9/10, elapsed time 9.78
[IterativeImputer] Ending imputation round 10/10, elapsed time 10.85
Imputation round 10
[IterativeImputer] Completing matrix with shape (26070, 31)
[IterativeImputer] Ending imputation round 1/10, elapsed time 1.08
[IterativeImputer] Ending imputation round 2/10, elapsed time 2.16
[IterativeImputer] Ending imputation round 3/10, elapsed time 3.28
[IterativeImputer] Ending imputation round 4/10, elapsed time 4.38
[IterativeImputer] Ending imputation round 5/10

[IterativeImputer] Ending imputation round 1/10, elapsed time 1.09
[IterativeImputer] Ending imputation round 2/10, elapsed time 2.19
[IterativeImputer] Ending imputation round 3/10, elapsed time 3.25
[IterativeImputer] Ending imputation round 4/10, elapsed time 4.33
[IterativeImputer] Ending imputation round 5/10, elapsed time 5.39
[IterativeImputer] Ending imputation round 6/10, elapsed time 6.48
[IterativeImputer] Ending imputation round 7/10, elapsed time 7.55
[IterativeImputer] Ending imputation round 8/10, elapsed time 8.63
[IterativeImputer] Ending imputation round 9/10, elapsed time 9.72
[IterativeImputer] Ending imputation round 10/10, elapsed time 10.81
Imputation round 9
[IterativeImputer] Completing matrix with shape (26070, 31)
[IterativeImputer] Ending imputation round 1/10, elapsed time 1.08
[IterativeImputer] Ending imputation round 2/10, elapsed time 2.19
[IterativeImputer] Ending imputation round 3/10, elapsed time 3.24
[IterativeImputer] Ending imputation round 4/10,

[IterativeImputer] Ending imputation round 10/10, elapsed time 10.90
Imputation round 7
[IterativeImputer] Completing matrix with shape (26070, 31)
[IterativeImputer] Ending imputation round 1/10, elapsed time 1.07
[IterativeImputer] Ending imputation round 2/10, elapsed time 2.16
[IterativeImputer] Ending imputation round 3/10, elapsed time 3.26
[IterativeImputer] Ending imputation round 4/10, elapsed time 4.35
[IterativeImputer] Ending imputation round 5/10, elapsed time 5.41
[IterativeImputer] Ending imputation round 6/10, elapsed time 6.49
[IterativeImputer] Ending imputation round 7/10, elapsed time 7.59
[IterativeImputer] Ending imputation round 8/10, elapsed time 8.68
[IterativeImputer] Ending imputation round 9/10, elapsed time 9.78
[IterativeImputer] Ending imputation round 10/10, elapsed time 10.94
Imputation round 8
[IterativeImputer] Completing matrix with shape (26070, 31)
[IterativeImputer] Ending imputation round 1/10, elapsed time 1.08
[IterativeImputer] Ending imputati

[IterativeImputer] Ending imputation round 8/10, elapsed time 8.80
[IterativeImputer] Ending imputation round 9/10, elapsed time 9.92
[IterativeImputer] Ending imputation round 10/10, elapsed time 11.00
Imputation round 6
[IterativeImputer] Completing matrix with shape (26070, 31)
[IterativeImputer] Ending imputation round 1/10, elapsed time 1.10
[IterativeImputer] Ending imputation round 2/10, elapsed time 2.19
[IterativeImputer] Ending imputation round 3/10, elapsed time 3.32
[IterativeImputer] Ending imputation round 4/10, elapsed time 4.42
[IterativeImputer] Ending imputation round 5/10, elapsed time 5.52
[IterativeImputer] Ending imputation round 6/10, elapsed time 6.59
[IterativeImputer] Ending imputation round 7/10, elapsed time 7.70
[IterativeImputer] Ending imputation round 8/10, elapsed time 8.78
[IterativeImputer] Ending imputation round 9/10, elapsed time 9.85
[IterativeImputer] Ending imputation round 10/10, elapsed time 10.97
Imputation round 7
[IterativeImputer] Completi

In [8]:
r1 = pd.DataFrame(r1).T
r1

Unnamed: 0,0,1,2,3
Backfill,-1.094213,2.1917,32848.4,0.796252
Overall Mean,-0.251541,1.805936,0.0,1.041184
Yearly Mean,0.009964,1.768969,52298.0,0.879176
Yearly Mean per Region,0.016326,1.770021,58352.6,0.992067
Interpolate 3,-0.557857,1.999203,32879.0,0.861664
Interpolate all,-0.557853,1.966919,58.0,0.840356
ICE 1,0.948744,0.210681,0.0,9.420938
ICE 2,0.926469,0.448741,0.0,22.879489
ICE3,0.703721,0.927913,0.0,82.013502
MICE,0.951523,0.236147,0.0,132.102118


In [9]:
r1_detail = pd.DataFrame(pd.DataFrame(r1_detail).T.stack().to_dict()).T
r1_detail.columns = ['r2', 'rmse', 'still_missing', 't']
r1_detail

Unnamed: 0,Unnamed: 1,r2,rmse,still_missing,t
Backfill,0,-0.263039,3.126458,32924.0,0.801803
Backfill,1,-2.775332,1.189616,32825.0,0.793796
Backfill,2,-1.68828,1.02268,32824.0,0.793506
Backfill,3,-0.667208,2.085864,32841.0,0.793613
Backfill,4,-0.077205,3.533883,32828.0,0.798543
Overall Mean,0,0.056692,2.650681,0.0,1.042257
Overall Mean,1,-0.79021,0.809595,0.0,1.045049
Overall Mean,2,-0.553206,0.765509,0.0,1.039034
Overall Mean,3,-0.089972,1.660006,0.0,1.036537
Overall Mean,4,0.118993,3.143888,0.0,1.043042


In [10]:
r2 , r2_detail =  run_all(0.075)

Testdaten mit 7.5% fehlenden Werten (absolut: 12773)
Testdaten mit 7.5% fehlenden Werten (absolut: 12773)
Testdaten mit 7.5% fehlenden Werten (absolut: 12773)
Testdaten mit 7.5% fehlenden Werten (absolut: 12773)
Testdaten mit 7.5% fehlenden Werten (absolut: 12773)
Backfill
run: 0
r2: -0.7622360204251646, rmse: 1.9101575370444133, t: 1.104684829711914

run: 1
r2: -0.13326755926364164, rmse: 1.5732663625326984, t: 1.1138923168182373

run: 2
r2: -0.15471382925960686, rmse: 2.34729184379117, t: 1.1242446899414062

run: 3
r2: -0.38762500614250417, rmse: 2.1233606768359277, t: 1.1117191314697266

run: 4
r2: -0.12638032635757868, rmse: 1.8674634798834449, t: 1.1321732997894287

[[-0.7622360204251646, 1.9101575370444133, 33252, 1.104684829711914], [-0.13326755926364164, 1.5732663625326984, 33280, 1.1138923168182373], [-0.15471382925960686, 2.34729184379117, 33292, 1.1242446899414062], [-0.38762500614250417, 2.1233606768359277, 33312, 1.1117191314697266], [-0.12638032635757868, 1.86746347988344



r2: 0.9803957096839419, rmse: 0.19838139998923512, t: 9.555473566055298

run: 1




r2: 0.9755494244776735, rmse: 0.22712199683626336, t: 9.513757944107056

run: 2




r2: 0.9937200129848803, rmse: 0.17013101521988602, t: 9.543735980987549

run: 3




r2: 0.9831619635029917, rmse: 0.229839617647022, t: 9.572115659713745

run: 4




r2: 0.9973721005760114, rmse: 0.08877467621858194, t: 9.56584882736206

[[0.9803957096839419, 0.19838139998923512, 0, 9.555473566055298], [0.9755494244776735, 0.22712199683626336, 0, 9.513757944107056], [0.9937200129848803, 0.17013101521988602, 0, 9.543735980987549], [0.9831619635029917, 0.229839617647022, 0, 9.572115659713745], [0.9973721005760114, 0.08877467621858194, 0, 9.56584882736206]]
ICE 2
run: 0
r2: 0.9838505622042556, rmse: 0.18005454652769592, t: 23.370723247528076

run: 1
r2: 0.9878728227407547, rmse: 0.15995378362387666, t: 23.471527099609375

run: 2
r2: 0.8221999876264665, rmse: 0.905253086250653, t: 23.305643320083618

run: 3
r2: 0.9391753592620334, rmse: 0.4368365247925406, t: 23.0691020488739

run: 4
r2: 0.992714126005562, rmse: 0.14781739867029778, t: 23.236597299575806

[[0.9838505622042556, 0.18005454652769592, 0, 23.370723247528076], [0.9878728227407547, 0.15995378362387666, 0, 23.471527099609375], [0.8221999876264665, 0.905253086250653, 0, 23.305643320083618], [0.



r2: 0.8623209307706443, rmse: 0.5257258705130095, t: 82.11966681480408

run: 1
[IterativeImputer] Completing matrix with shape (4898, 165)
[IterativeImputer] Change: 3.0814034864734116e+16, scaled tolerance: 35084726045503.402 
[IterativeImputer] Change: 4057322175223235.0, scaled tolerance: 35084726045503.402 
[IterativeImputer] Change: 1704338202139627.8, scaled tolerance: 35084726045503.402 
[IterativeImputer] Change: 563607608696027.9, scaled tolerance: 35084726045503.402 
[IterativeImputer] Change: 366278041760450.8, scaled tolerance: 35084726045503.402 
[IterativeImputer] Change: 420490792342915.0, scaled tolerance: 35084726045503.402 
[IterativeImputer] Change: 492161727699741.25, scaled tolerance: 35084726045503.402 
[IterativeImputer] Change: 304111116287310.9, scaled tolerance: 35084726045503.402 
[IterativeImputer] Change: 212560707125846.62, scaled tolerance: 35084726045503.402 
[IterativeImputer] Change: 164850327307588.88, scaled tolerance: 35084726045503.402 




r2: 0.9248967198209728, rmse: 0.39805593856601507, t: 82.02079224586487

run: 2
[IterativeImputer] Completing matrix with shape (4898, 165)
[IterativeImputer] Change: 3.981588887350839e+16, scaled tolerance: 35084726045503.402 
[IterativeImputer] Change: 3272447647919451.5, scaled tolerance: 35084726045503.402 
[IterativeImputer] Change: 1418769504483334.8, scaled tolerance: 35084726045503.402 
[IterativeImputer] Change: 1185290704657987.8, scaled tolerance: 35084726045503.402 
[IterativeImputer] Change: 984878014627611.0, scaled tolerance: 35084726045503.402 
[IterativeImputer] Change: 699725792920155.2, scaled tolerance: 35084726045503.402 
[IterativeImputer] Change: 446267193102673.9, scaled tolerance: 35084726045503.402 
[IterativeImputer] Change: 282067752214580.44, scaled tolerance: 35084726045503.402 
[IterativeImputer] Change: 181505497690240.4, scaled tolerance: 35084726045503.402 
[IterativeImputer] Change: 200443426804860.38, scaled tolerance: 35084726045503.402 




r2: 0.411709223258674, rmse: 1.646645609287345, t: 81.23988032341003

run: 3
[IterativeImputer] Completing matrix with shape (4898, 165)
[IterativeImputer] Change: 3.0028660204699064e+16, scaled tolerance: 35084726045503.402 
[IterativeImputer] Change: 2187714572436955.5, scaled tolerance: 35084726045503.402 
[IterativeImputer] Change: 1841927527231725.5, scaled tolerance: 35084726045503.402 
[IterativeImputer] Change: 1182919697590624.2, scaled tolerance: 35084726045503.402 
[IterativeImputer] Change: 723035118930045.4, scaled tolerance: 35084726045503.402 
[IterativeImputer] Change: 196045068836978.94, scaled tolerance: 35084726045503.402 
[IterativeImputer] Change: 3405236684923543.0, scaled tolerance: 35084726045503.402 
[IterativeImputer] Change: 1806700450486974.2, scaled tolerance: 35084726045503.402 
[IterativeImputer] Change: 1133879646810791.5, scaled tolerance: 35084726045503.402 
[IterativeImputer] Change: 509388943169015.9, scaled tolerance: 35084726045503.402 




r2: 0.8924299870525689, rmse: 0.5809313016449393, t: 80.14908242225647

run: 4
[IterativeImputer] Completing matrix with shape (4898, 165)
[IterativeImputer] Change: 2.7361713732346948e+16, scaled tolerance: 35084726045503.402 
[IterativeImputer] Change: 2776178018187145.5, scaled tolerance: 35084726045503.402 
[IterativeImputer] Change: 1863927789523974.8, scaled tolerance: 35084726045503.402 
[IterativeImputer] Change: 1148278767394427.5, scaled tolerance: 35084726045503.402 
[IterativeImputer] Change: 879431074863914.5, scaled tolerance: 35084726045503.402 
[IterativeImputer] Change: 424335362795948.2, scaled tolerance: 35084726045503.402 
[IterativeImputer] Change: 4163317329132051.0, scaled tolerance: 35084726045503.402 
[IterativeImputer] Change: 1885594848492097.5, scaled tolerance: 35084726045503.402 
[IterativeImputer] Change: 902792927245435.2, scaled tolerance: 35084726045503.402 
[IterativeImputer] Change: 422566241255128.2, scaled tolerance: 35084726045503.402 




r2: 0.834380973318203, rmse: 0.7047577188321924, t: 79.6113588809967

[[0.8623209307706443, 0.5257258705130095, 0, 82.11966681480408], [0.9248967198209728, 0.39805593856601507, 0, 82.02079224586487], [0.411709223258674, 1.646645609287345, 0, 81.23988032341003], [0.8924299870525689, 0.5809313016449393, 0, 80.14908242225647], [0.834380973318203, 0.7047577188321924, 0, 79.6113588809967]]
MICE
run: 0
Imputation round 0
[IterativeImputer] Completing matrix with shape (26070, 31)
[IterativeImputer] Ending imputation round 1/10, elapsed time 1.05
[IterativeImputer] Ending imputation round 2/10, elapsed time 2.13
[IterativeImputer] Ending imputation round 3/10, elapsed time 3.20
[IterativeImputer] Ending imputation round 4/10, elapsed time 4.29
[IterativeImputer] Ending imputation round 5/10, elapsed time 5.35
[IterativeImputer] Ending imputation round 6/10, elapsed time 6.42
[IterativeImputer] Ending imputation round 7/10, elapsed time 7.49
[IterativeImputer] Ending imputation round 8/10, ela

[IterativeImputer] Ending imputation round 5/10, elapsed time 5.36
[IterativeImputer] Ending imputation round 6/10, elapsed time 6.46
[IterativeImputer] Ending imputation round 7/10, elapsed time 7.52
[IterativeImputer] Ending imputation round 8/10, elapsed time 8.61
[IterativeImputer] Ending imputation round 9/10, elapsed time 9.69
[IterativeImputer] Ending imputation round 10/10, elapsed time 10.77
Imputation round 11
[IterativeImputer] Completing matrix with shape (26070, 31)
[IterativeImputer] Ending imputation round 1/10, elapsed time 1.07
[IterativeImputer] Ending imputation round 2/10, elapsed time 2.13
[IterativeImputer] Ending imputation round 3/10, elapsed time 3.20
[IterativeImputer] Ending imputation round 4/10, elapsed time 4.27
[IterativeImputer] Ending imputation round 5/10, elapsed time 5.31
[IterativeImputer] Ending imputation round 6/10, elapsed time 6.39
[IterativeImputer] Ending imputation round 7/10, elapsed time 7.48
[IterativeImputer] Ending imputation round 8/10

[IterativeImputer] Ending imputation round 3/10, elapsed time 3.25
[IterativeImputer] Ending imputation round 4/10, elapsed time 4.33
[IterativeImputer] Ending imputation round 5/10, elapsed time 5.40
[IterativeImputer] Ending imputation round 6/10, elapsed time 6.46
[IterativeImputer] Ending imputation round 7/10, elapsed time 7.55
[IterativeImputer] Ending imputation round 8/10, elapsed time 8.62
[IterativeImputer] Ending imputation round 9/10, elapsed time 9.72
[IterativeImputer] Ending imputation round 10/10, elapsed time 10.80
Imputation round 10
[IterativeImputer] Completing matrix with shape (26070, 31)
[IterativeImputer] Ending imputation round 1/10, elapsed time 1.09
[IterativeImputer] Ending imputation round 2/10, elapsed time 2.14
[IterativeImputer] Ending imputation round 3/10, elapsed time 3.24
[IterativeImputer] Ending imputation round 4/10, elapsed time 4.31
[IterativeImputer] Ending imputation round 5/10, elapsed time 5.40
[IterativeImputer] Ending imputation round 6/10

[IterativeImputer] Ending imputation round 1/10, elapsed time 1.07
[IterativeImputer] Ending imputation round 2/10, elapsed time 2.16
[IterativeImputer] Ending imputation round 3/10, elapsed time 3.21
[IterativeImputer] Ending imputation round 4/10, elapsed time 4.30
[IterativeImputer] Ending imputation round 5/10, elapsed time 5.36
[IterativeImputer] Ending imputation round 6/10, elapsed time 6.45
[IterativeImputer] Ending imputation round 7/10, elapsed time 7.54
[IterativeImputer] Ending imputation round 8/10, elapsed time 8.64
[IterativeImputer] Ending imputation round 9/10, elapsed time 9.71
[IterativeImputer] Ending imputation round 10/10, elapsed time 10.81
Imputation round 9
[IterativeImputer] Completing matrix with shape (26070, 31)
[IterativeImputer] Ending imputation round 1/10, elapsed time 1.06
[IterativeImputer] Ending imputation round 2/10, elapsed time 2.14
[IterativeImputer] Ending imputation round 3/10, elapsed time 3.20
[IterativeImputer] Ending imputation round 4/10,

[IterativeImputer] Ending imputation round 10/10, elapsed time 10.65
Imputation round 7
[IterativeImputer] Completing matrix with shape (26070, 31)
[IterativeImputer] Ending imputation round 1/10, elapsed time 1.05
[IterativeImputer] Ending imputation round 2/10, elapsed time 2.11
[IterativeImputer] Ending imputation round 3/10, elapsed time 3.18
[IterativeImputer] Ending imputation round 4/10, elapsed time 4.24
[IterativeImputer] Ending imputation round 5/10, elapsed time 5.34
[IterativeImputer] Ending imputation round 6/10, elapsed time 6.40
[IterativeImputer] Ending imputation round 7/10, elapsed time 7.49
[IterativeImputer] Ending imputation round 8/10, elapsed time 8.57
[IterativeImputer] Ending imputation round 9/10, elapsed time 9.64
[IterativeImputer] Ending imputation round 10/10, elapsed time 10.70
Imputation round 8
[IterativeImputer] Completing matrix with shape (26070, 31)
[IterativeImputer] Ending imputation round 1/10, elapsed time 1.08
[IterativeImputer] Ending imputati

[IterativeImputer] Ending imputation round 8/10, elapsed time 8.51
[IterativeImputer] Ending imputation round 9/10, elapsed time 9.58
[IterativeImputer] Ending imputation round 10/10, elapsed time 10.66
Imputation round 6
[IterativeImputer] Completing matrix with shape (26070, 31)
[IterativeImputer] Ending imputation round 1/10, elapsed time 1.07
[IterativeImputer] Ending imputation round 2/10, elapsed time 2.15
[IterativeImputer] Ending imputation round 3/10, elapsed time 3.22
[IterativeImputer] Ending imputation round 4/10, elapsed time 4.32
[IterativeImputer] Ending imputation round 5/10, elapsed time 5.40
[IterativeImputer] Ending imputation round 6/10, elapsed time 6.48
[IterativeImputer] Ending imputation round 7/10, elapsed time 7.56
[IterativeImputer] Ending imputation round 8/10, elapsed time 8.65
[IterativeImputer] Ending imputation round 9/10, elapsed time 9.71
[IterativeImputer] Ending imputation round 10/10, elapsed time 10.80
Imputation round 7
[IterativeImputer] Completi

In [11]:
r2 = pd.DataFrame(r2).T
r2

Unnamed: 0,0,1,2,3
Backfill,-0.312845,1.964308,33265.4,1.117343
Overall Mean,-0.052291,1.740381,0.0,1.360476
Yearly Mean,0.014764,1.692044,52298.0,1.200444
Yearly Mean per Region,0.049003,1.681218,58425.0,1.309644
Interpolate 3,-0.219687,1.909891,33296.8,1.203304
Interpolate all,-0.221227,1.880009,59.6,1.168807
ICE 1,0.98604,0.18285,0.0,9.550186
ICE 2,0.945163,0.365983,0.0,23.290719
ICE3,0.785148,0.771223,0.0,81.028156
MICE,0.988788,0.168885,0.0,130.558931


In [12]:
r2_detail = pd.DataFrame(pd.DataFrame(r2_detail).T.stack().to_dict()).T
r2_detail.columns = ['r2', 'rmse', 'still_missing', 't']
r2_detail

Unnamed: 0,Unnamed: 1,r2,rmse,still_missing,t
Backfill,0,-0.762236,1.910158,33252.0,1.104685
Backfill,1,-0.133268,1.573266,33280.0,1.113892
Backfill,2,-0.154714,2.347292,33292.0,1.124245
Backfill,3,-0.387625,2.123361,33312.0,1.111719
Backfill,4,-0.12638,1.867463,33191.0,1.132173
Overall Mean,0,-0.122387,1.501055,0.0,1.353251
Overall Mean,1,-0.088914,1.515694,0.0,1.36347
Overall Mean,2,0.032755,2.111409,0.0,1.354509
Overall Mean,3,-0.007757,1.778103,0.0,1.378921
Overall Mean,4,-0.075152,1.795642,0.0,1.352227


In [13]:
r3, r3_detail = run_all(0.1)

Testdaten mit 10.0% fehlenden Werten (absolut: 17030)
Testdaten mit 10.0% fehlenden Werten (absolut: 17030)
Testdaten mit 10.0% fehlenden Werten (absolut: 17030)
Testdaten mit 10.0% fehlenden Werten (absolut: 17030)
Testdaten mit 10.0% fehlenden Werten (absolut: 17030)
Backfill
run: 0
r2: -3.964219549140708, rmse: 1.1521878884113992, t: 1.4466934204101562

run: 1
r2: -1.8373707535864638, rmse: 1.6792466658151293, t: 1.436490535736084

run: 2
r2: -1.9081389310564236, rmse: 1.5239926108614468, t: 1.4265525341033936

run: 3
r2: -0.8977876217879388, rmse: 1.5383638773729555, t: 1.4446756839752197

run: 4
r2: -1.1760918948068961, rmse: 1.4134849682042556, t: 1.466137409210205

[[-3.964219549140708, 1.1521878884113992, 33619, 1.4466934204101562], [-1.8373707535864638, 1.6792466658151293, 33786, 1.436490535736084], [-1.9081389310564236, 1.5239926108614468, 33654, 1.4265525341033936], [-0.8977876217879388, 1.5383638773729555, 33643, 1.4446756839752197], [-1.1760918948068961, 1.4134849682042556



r2: 0.9370202334659707, rmse: 0.12777336992410396, t: 9.834977388381958

run: 1




r2: 0.9521427024799756, rmse: 0.2140660895293748, t: 9.787731409072876

run: 2




r2: 0.9893949691492818, rmse: 0.09044214578504266, t: 9.85459280014038

run: 3




r2: 0.9872547817984083, rmse: 0.1239430322939622, t: 9.84728479385376

run: 4




r2: 0.9145067510943837, rmse: 0.27554462283829784, t: 9.864806413650513

[[0.9370202334659707, 0.12777336992410396, 0, 9.834977388381958], [0.9521427024799756, 0.2140660895293748, 0, 9.787731409072876], [0.9893949691492818, 0.09044214578504266, 0, 9.85459280014038], [0.9872547817984083, 0.1239430322939622, 0, 9.84728479385376], [0.9145067510943837, 0.27554462283829784, 0, 9.864806413650513]]
ICE 2
run: 0
r2: 0.9353924573529703, rmse: 0.12941405261701314, t: 23.000120162963867

run: 1
r2: 0.9754346077997191, rmse: 0.1533683133456013, t: 23.263725519180298

run: 2
r2: 0.9360508486955306, rmse: 0.22209167631387725, t: 23.077012538909912

run: 3
r2: 0.9788027743296512, rmse: 0.1598409276001701, t: 23.27027440071106

run: 4
r2: 0.9654162535346161, rmse: 0.17525162313588508, t: 24.333306550979614

[[0.9353924573529703, 0.12941405261701314, 0, 23.000120162963867], [0.9754346077997191, 0.1533683133456013, 0, 23.263725519180298], [0.9360508486955306, 0.22209167631387725, 0, 23.077012538909912],



r2: 0.8656510287868295, rmse: 0.18661949011636286, t: 82.09881234169006

run: 1
[IterativeImputer] Completing matrix with shape (4898, 165)
[IterativeImputer] Change: 3.0793701416671844e+16, scaled tolerance: 35084726045503.402 
[IterativeImputer] Change: 1965670947388250.5, scaled tolerance: 35084726045503.402 
[IterativeImputer] Change: 2055781807062901.5, scaled tolerance: 35084726045503.402 
[IterativeImputer] Change: 1202414568308490.8, scaled tolerance: 35084726045503.402 
[IterativeImputer] Change: 693042073549476.0, scaled tolerance: 35084726045503.402 
[IterativeImputer] Change: 378724405818521.25, scaled tolerance: 35084726045503.402 
[IterativeImputer] Change: 255856467206931.5, scaled tolerance: 35084726045503.402 
[IterativeImputer] Change: 267666823038194.47, scaled tolerance: 35084726045503.402 
[IterativeImputer] Change: 154395120361844.72, scaled tolerance: 35084726045503.402 
[IterativeImputer] Change: 148140249946584.5, scaled tolerance: 35084726045503.402 




r2: 0.7873761250218165, rmse: 0.4512111309121132, t: 81.2285966873169

run: 2
[IterativeImputer] Completing matrix with shape (4898, 165)
[IterativeImputer] Change: 2.767695010894046e+16, scaled tolerance: 35084726045503.402 
[IterativeImputer] Change: 2215172710472481.0, scaled tolerance: 35084726045503.402 
[IterativeImputer] Change: 1272751790088711.5, scaled tolerance: 35084726045503.402 
[IterativeImputer] Change: 1112711149673679.8, scaled tolerance: 35084726045503.402 
[IterativeImputer] Change: 802807918332657.8, scaled tolerance: 35084726045503.402 
[IterativeImputer] Change: 493386041322739.56, scaled tolerance: 35084726045503.402 
[IterativeImputer] Change: 430603857658830.25, scaled tolerance: 35084726045503.402 
[IterativeImputer] Change: 332478603549620.6, scaled tolerance: 35084726045503.402 
[IterativeImputer] Change: 342605605246312.56, scaled tolerance: 35084726045503.402 
[IterativeImputer] Change: 441335845081200.56, scaled tolerance: 35084726045503.402 




r2: 0.6804537315552623, rmse: 0.4964571441735188, t: 81.7561821937561

run: 3
[IterativeImputer] Completing matrix with shape (4898, 165)
[IterativeImputer] Change: 2.9054460292016464e+16, scaled tolerance: 35084726045503.402 
[IterativeImputer] Change: 2686861492950140.0, scaled tolerance: 35084726045503.402 
[IterativeImputer] Change: 2020346442314162.2, scaled tolerance: 35084726045503.402 
[IterativeImputer] Change: 966537983426960.9, scaled tolerance: 35084726045503.402 
[IterativeImputer] Change: 582942911181494.8, scaled tolerance: 35084726045503.402 
[IterativeImputer] Change: 420352635594125.2, scaled tolerance: 35084726045503.402 
[IterativeImputer] Change: 330099304310940.4, scaled tolerance: 35084726045503.402 
[IterativeImputer] Change: 358375333903242.25, scaled tolerance: 35084726045503.402 
[IterativeImputer] Change: 3634484630414166.0, scaled tolerance: 35084726045503.402 
[IterativeImputer] Change: 1767168676599967.5, scaled tolerance: 35084726045503.402 




r2: 0.47922093799031384, rmse: 0.7922736954811134, t: 81.32916283607483

run: 4
[IterativeImputer] Completing matrix with shape (4898, 165)
[IterativeImputer] Change: 2.846033471882781e+16, scaled tolerance: 35084726045503.402 
[IterativeImputer] Change: 2443889493057179.5, scaled tolerance: 35084726045503.402 
[IterativeImputer] Change: 1118613781447626.5, scaled tolerance: 35084726045503.402 
[IterativeImputer] Change: 1656357204430435.2, scaled tolerance: 35084726045503.402 
[IterativeImputer] Change: 1009176985672603.5, scaled tolerance: 35084726045503.402 
[IterativeImputer] Change: 559421305245813.94, scaled tolerance: 35084726045503.402 
[IterativeImputer] Change: 312335935597925.06, scaled tolerance: 35084726045503.402 
[IterativeImputer] Change: 220725728023061.8, scaled tolerance: 35084726045503.402 
[IterativeImputer] Change: 164621507370038.3, scaled tolerance: 35084726045503.402 
[IterativeImputer] Change: 82701702766819.66, scaled tolerance: 35084726045503.402 




r2: 0.8722507778212176, rmse: 0.3368256541499248, t: 81.10620617866516

[[0.8656510287868295, 0.18661949011636286, 0, 82.09881234169006], [0.7873761250218165, 0.4512111309121132, 0, 81.2285966873169], [0.6804537315552623, 0.4964571441735188, 0, 81.7561821937561], [0.47922093799031384, 0.7922736954811134, 0, 81.32916283607483], [0.8722507778212176, 0.3368256541499248, 0, 81.10620617866516]]
MICE
run: 0
Imputation round 0
[IterativeImputer] Completing matrix with shape (26070, 31)
[IterativeImputer] Ending imputation round 1/10, elapsed time 1.07
[IterativeImputer] Ending imputation round 2/10, elapsed time 2.15
[IterativeImputer] Ending imputation round 3/10, elapsed time 3.23
[IterativeImputer] Ending imputation round 4/10, elapsed time 4.32
[IterativeImputer] Ending imputation round 5/10, elapsed time 5.42
[IterativeImputer] Ending imputation round 6/10, elapsed time 6.47
[IterativeImputer] Ending imputation round 7/10, elapsed time 7.55
[IterativeImputer] Ending imputation round 8/10

[IterativeImputer] Ending imputation round 4/10, elapsed time 4.32
[IterativeImputer] Ending imputation round 5/10, elapsed time 5.39
[IterativeImputer] Ending imputation round 6/10, elapsed time 6.48
[IterativeImputer] Ending imputation round 7/10, elapsed time 7.53
[IterativeImputer] Ending imputation round 8/10, elapsed time 8.62
[IterativeImputer] Ending imputation round 9/10, elapsed time 9.69
[IterativeImputer] Ending imputation round 10/10, elapsed time 10.79
Imputation round 11
[IterativeImputer] Completing matrix with shape (26070, 31)
[IterativeImputer] Ending imputation round 1/10, elapsed time 1.07
[IterativeImputer] Ending imputation round 2/10, elapsed time 2.16
[IterativeImputer] Ending imputation round 3/10, elapsed time 3.23
[IterativeImputer] Ending imputation round 4/10, elapsed time 4.30
[IterativeImputer] Ending imputation round 5/10, elapsed time 5.35
[IterativeImputer] Ending imputation round 6/10, elapsed time 6.43
[IterativeImputer] Ending imputation round 7/10

[IterativeImputer] Ending imputation round 2/10, elapsed time 2.15
[IterativeImputer] Ending imputation round 3/10, elapsed time 3.23
[IterativeImputer] Ending imputation round 4/10, elapsed time 4.31
[IterativeImputer] Ending imputation round 5/10, elapsed time 5.39
[IterativeImputer] Ending imputation round 6/10, elapsed time 6.47
[IterativeImputer] Ending imputation round 7/10, elapsed time 7.57
[IterativeImputer] Ending imputation round 8/10, elapsed time 8.64
[IterativeImputer] Ending imputation round 9/10, elapsed time 9.72
[IterativeImputer] Ending imputation round 10/10, elapsed time 10.79
Imputation round 10
[IterativeImputer] Completing matrix with shape (26070, 31)
[IterativeImputer] Ending imputation round 1/10, elapsed time 1.09
[IterativeImputer] Ending imputation round 2/10, elapsed time 2.16
[IterativeImputer] Ending imputation round 3/10, elapsed time 3.25
[IterativeImputer] Ending imputation round 4/10, elapsed time 4.34
[IterativeImputer] Ending imputation round 5/10

[IterativeImputer] Ending imputation round 1/10, elapsed time 1.07
[IterativeImputer] Ending imputation round 2/10, elapsed time 2.15
[IterativeImputer] Ending imputation round 3/10, elapsed time 3.23
[IterativeImputer] Ending imputation round 4/10, elapsed time 4.30
[IterativeImputer] Ending imputation round 5/10, elapsed time 5.41
[IterativeImputer] Ending imputation round 6/10, elapsed time 6.49
[IterativeImputer] Ending imputation round 7/10, elapsed time 7.59
[IterativeImputer] Ending imputation round 8/10, elapsed time 8.64
[IterativeImputer] Ending imputation round 9/10, elapsed time 9.73
[IterativeImputer] Ending imputation round 10/10, elapsed time 10.79
Imputation round 9
[IterativeImputer] Completing matrix with shape (26070, 31)
[IterativeImputer] Ending imputation round 1/10, elapsed time 1.08
[IterativeImputer] Ending imputation round 2/10, elapsed time 2.15
[IterativeImputer] Ending imputation round 3/10, elapsed time 3.25
[IterativeImputer] Ending imputation round 4/10,

[IterativeImputer] Ending imputation round 10/10, elapsed time 10.79
Imputation round 7
[IterativeImputer] Completing matrix with shape (26070, 31)
[IterativeImputer] Ending imputation round 1/10, elapsed time 1.08
[IterativeImputer] Ending imputation round 2/10, elapsed time 2.18
[IterativeImputer] Ending imputation round 3/10, elapsed time 3.26
[IterativeImputer] Ending imputation round 4/10, elapsed time 4.34
[IterativeImputer] Ending imputation round 5/10, elapsed time 5.40
[IterativeImputer] Ending imputation round 6/10, elapsed time 6.48
[IterativeImputer] Ending imputation round 7/10, elapsed time 7.54
[IterativeImputer] Ending imputation round 8/10, elapsed time 8.63
[IterativeImputer] Ending imputation round 9/10, elapsed time 9.69
[IterativeImputer] Ending imputation round 10/10, elapsed time 10.77
Imputation round 8
[IterativeImputer] Completing matrix with shape (26070, 31)
[IterativeImputer] Ending imputation round 1/10, elapsed time 1.07
[IterativeImputer] Ending imputati

[IterativeImputer] Ending imputation round 8/10, elapsed time 8.66
[IterativeImputer] Ending imputation round 9/10, elapsed time 9.73
[IterativeImputer] Ending imputation round 10/10, elapsed time 10.83
Imputation round 6
[IterativeImputer] Completing matrix with shape (26070, 31)
[IterativeImputer] Ending imputation round 1/10, elapsed time 1.07
[IterativeImputer] Ending imputation round 2/10, elapsed time 2.16
[IterativeImputer] Ending imputation round 3/10, elapsed time 3.23
[IterativeImputer] Ending imputation round 4/10, elapsed time 4.32
[IterativeImputer] Ending imputation round 5/10, elapsed time 5.37
[IterativeImputer] Ending imputation round 6/10, elapsed time 6.47
[IterativeImputer] Ending imputation round 7/10, elapsed time 7.51
[IterativeImputer] Ending imputation round 8/10, elapsed time 8.60
[IterativeImputer] Ending imputation round 9/10, elapsed time 9.69
[IterativeImputer] Ending imputation round 10/10, elapsed time 10.78
Imputation round 7
[IterativeImputer] Completi

In [14]:
r3 = pd.DataFrame(r3).T
r3

Unnamed: 0,0,1,2,3
Backfill,-1.956722,1.461455,33676.2,1.44411
Overall Mean,-0.422216,1.018547,0.0,1.68665
Yearly Mean,0.00362,0.877554,52329.6,1.516319
Yearly Mean per Region,-0.067946,0.905732,58630.2,1.620215
Interpolate 3,-0.89111,1.178038,33706.8,1.502049
Interpolate all,-0.892725,1.159193,58.4,1.485679
ICE 1,0.956064,0.166354,0.0,9.837879
ICE 2,0.958219,0.167993,0.0,23.388888
ICE3,0.736991,0.452677,0.0,81.503792
MICE,0.967982,0.144287,0.0,131.616426


In [15]:
r3_detail = pd.DataFrame(pd.DataFrame(r3_detail).T.stack().to_dict()).T
r3_detail.columns = ['r2', 'rmse', 'still_missing', 't']
r3_detail

Unnamed: 0,Unnamed: 1,r2,rmse,still_missing,t
Backfill,0,-3.96422,1.152188,33619.0,1.446693
Backfill,1,-1.837371,1.679247,33786.0,1.436491
Backfill,2,-1.908139,1.523993,33654.0,1.426553
Backfill,3,-0.897788,1.538364,33643.0,1.444676
Backfill,4,-1.176092,1.413485,33679.0,1.466137
Overall Mean,0,-1.083659,0.734942,0.0,1.685571
Overall Mean,1,-0.203727,1.073588,0.0,1.687289
Overall Mean,2,-0.364231,1.025791,0.0,1.689113
Overall Mean,3,-0.17854,1.191847,0.0,1.684852
Overall Mean,4,-0.280922,1.066565,0.0,1.686424


In [16]:
t1 = time.time()
t = t1-t0
print(f'comlete runtime: {t/60}min')

comlete runtime: 108.46655972798665min


In [17]:
sets = {'r1':r1, 'r1_detail':r1_detail,
       'r2':r2, 'r2_detail':r2_detail,
       'r3':r3, 'r3_detail':r3_detail,}
for name, df in sets.items():
    df.to_csv(f'additional_data/results/{name}.csv')