In [1]:
import pandas as pd
import numpy as np
import math
import time

from sklearn.preprocessing import StandardScaler
from sklearn.metrics import r2_score, mean_squared_error
from sklearn.experimental import enable_iterative_imputer
from sklearn.impute import IterativeImputer, KNNImputer

In [2]:
def reset_base():
    base= pd.read_csv('additional_data/base.csv') 
    base.set_index(['Country Name', 'Indicator Name'], inplace=True)
    base = base.sort_index(level=['Country Name', 'Indicator Name'])
    return base

In [3]:
base = reset_base()
base.isna().sum().sum()

170307

In [4]:
def get_cords(frac):
    n = int(base.isna().sum().sum()*frac)
    print(f'Testdaten mit {frac*100}% fehlenden Werten (absolut: {n})')
    #random state to ensure reproducibility
    rnds = np.random.RandomState(n)

    #coordinates for data entries to be removed randomly
    #5000 entries are selected
    cords = pd.DataFrame([[rnds.randint(0, len(base), size=n*4)[i], 
                  rnds.randint(0, len(base.columns), size=n*4)[i]]
                  for i in range(n*4)])

    #all coordinates pointing to NaN entries are removed and
    #first 1000 remaining entries are selected
    cords['value'] = [base.iloc[cords[0][i], cords[1][i]] for i in cords.index]
    cords = cords.dropna()[:n].reset_index(drop=True)
    
    return cords

In [5]:
cords = get_cords(0.05)

Testdaten mit 5.0% fehlenden Werten (absolut: 8515)


In [6]:
results = []

In [7]:
def evaluate(method, df, t):
    
        
    #scaling original data and imputed data
    #necessary ?????????????????????????????????????
    train = reset_train(cords)
    scaler = StandardScaler().fit(train) #fitting on train?
    norm_base = pd.DataFrame(scaler.transform(base))
    df = pd.DataFrame(scaler.transform(df))

    #getting imputed values for simulated NaNs and true value 
    res =pd.DataFrame({'y_true': [norm_base.iloc[cords[0][i], cords[1][i]] for i in cords.index],
                       'y_pred': [df.iloc[cords[0][i], cords[1][i]] for i in cords.index]
                      })
    res = res.dropna()

   
    #calculate evaluation metrics
    r2 = r2_score(res['y_true'], res['y_pred'])
    rmse = math.sqrt(mean_squared_error(res['y_true'], res['y_pred']))
    still_missing = df.isna().sum().sum()
    
    print(f'Mit dieser Methode bleiben {still_missing} NaNs bestehen.')
    print('')
    print(f'{len(res)} Werte wurden für die Metriken verwendet.')
    print(f'r2: {r2}, rmse: {rmse}')
    
    results.append([method, r2, rmse, still_missing, t])


In [8]:
#getting train data by changing randomly chosen values to NaN
def reset_train(cords):
    train = base.copy()
    for i in cords.index:
        train.iloc[cords[0][i], cords[1][i]] = None
    return train

In [9]:
def iterative_imputer_1(df):
    col = df.columns
    idx = df.index
    
    iter_imp = IterativeImputer(random_state=999)
    df= iter_imp.fit_transform(df)
    df= pd.DataFrame(df, columns=col, index=idx)
    return df

def iterative_imputer_2(df):
    df = df.unstack().T
    col = df.columns
    idx = df.index

    iter_imp = IterativeImputer(random_state=999)
    df= iter_imp.fit_transform(df)

    df = pd.DataFrame(df, columns=col, index=idx)
    df = df.unstack().T
    df = df.sort_index(level=['Country Name', 'Indicator Name'])
    
    return df

def iterative_imputer_3(df):

    df = df.reset_index()
    df = df.set_index(['Indicator Name', 'Country Name'])
    df = df.unstack().T

    col = df.columns
    idx = df.index

    iter_imp = IterativeImputer(random_state=999, verbose=True)
    df= iter_imp.fit_transform(df)

    df = pd.DataFrame(df, columns=col, index=idx)
    df = df.unstack().T
    df = df.reset_index()
    df = df.set_index(['Country Name', 'Indicator Name'])
    df = df.sort_index(level=['Country Name', 'Indicator Name'])
    
    return df

def mice_imputer2(df):
    n_imputations =  12
    dfs = []

    df = df.reset_index()
    df = df.set_index(['Indicator Name', 'Country Name'])
    df = df.unstack().T

    col = df.columns
    idx = df.index
    
    for i in range(n_imputations): 
        print(f'Imputation round {i}')
        iter_imp = IterativeImputer(random_state=i+200, sample_posterior=True, verbose=2)
        df_temp = iter_imp.fit_transform(df)
        dfs.append(df_temp)
    
    df = np.mean(np.array(dfs), axis=0)
    df = pd.DataFrame(df, columns=col, index=idx)
    df = df.unstack().T
    df = df.reset_index()
    df = df.set_index(['Country Name', 'Indicator Name'])
    df = df.sort_index(level=['Country Name', 'Indicator Name'])
    return dfs, df

In [10]:
base = reset_base()
train = reset_train(cords)

t0 = time.time()
df = iterative_imputer_1(train)
t1 = time.time()

t = t1-t0

#df.to_csv('additional_data/imputed_sets/ice.csv')
evaluate('Iterative Imputer 1', df, t)



Mit dieser Methode bleiben 0 NaNs bestehen.

8515 Werte wurden für die Metriken verwendet.
r2: 0.9609051476884861, rmse: 0.07921973204963073


In [11]:
base = reset_base()
train = reset_train(cords)

t0 = time.time()
df = iterative_imputer_3(train)
t1 = time.time()

t = t1-t0

#df.to_csv('additional_data/imputed_sets/ice.csv')
evaluate('Iterative Imputer 3', df, t)

[IterativeImputer] Completing matrix with shape (4898, 165)
[IterativeImputer] Change: 3.147511124721088e+16, scaled tolerance: 35084726045503.402 
[IterativeImputer] Change: 1034671076716780.0, scaled tolerance: 35084726045503.402 
[IterativeImputer] Change: 659123757395787.5, scaled tolerance: 35084726045503.402 
[IterativeImputer] Change: 616363768648911.4, scaled tolerance: 35084726045503.402 
[IterativeImputer] Change: 2396881809792168.5, scaled tolerance: 35084726045503.402 
[IterativeImputer] Change: 603334623851478.6, scaled tolerance: 35084726045503.402 
[IterativeImputer] Change: 3452555317561406.0, scaled tolerance: 35084726045503.402 
[IterativeImputer] Change: 1566687283686987.5, scaled tolerance: 35084726045503.402 
[IterativeImputer] Change: 562306276969730.8, scaled tolerance: 35084726045503.402 
[IterativeImputer] Change: 263990229283100.84, scaled tolerance: 35084726045503.402 




Mit dieser Methode bleiben 0 NaNs bestehen.

8515 Werte wurden für die Metriken verwendet.
r2: 0.662632017747099, rmse: 0.2327156083233093


In [12]:
df = df.reset_index()
sample = 'Primary education, duration (years)'

df.loc[df['Indicator Name']==sample].head(5)

Unnamed: 0,Country Name,Indicator Name,1990,1991,1992,1993,1994,1995,1996,1997,...,2011,2012,2013,2014,2015,2016,2017,2018,2019,2020
118,Afghanistan,"Primary education, duration (years)",6.0,6.0,6.0,6.0,6.0,6.0,6.0,6.0,...,6.0,6.0,6.0,6.0,6.0,6.0,6.0,6.0,6.0,6.0
283,Albania,"Primary education, duration (years)",4.0,4.0,4.0,4.0,4.0,4.0,4.0,4.0,...,5.0,5.0,5.0,5.0,5.0,5.0,5.0,5.0,5.0,4.534271
448,Algeria,"Primary education, duration (years)",6.0,6.0,6.0,6.0,6.0,6.0,6.0,6.0,...,5.0,5.0,5.0,5.0,5.0,5.0,5.0,5.0,5.0,5.0
613,Angola,"Primary education, duration (years)",4.0,4.0,4.0,4.0,4.0,4.0,4.0,4.0,...,6.0,6.0,6.0,6.0,6.0,6.0,6.0,6.0,6.0,6.0
778,Argentina,"Primary education, duration (years)",7.0,7.0,7.0,7.0,7.0,7.0,7.0,7.0,...,6.0,6.0,6.0,6.0,6.0,6.0,6.0,6.0,6.0,6.0


In [17]:
base = reset_base()
train = reset_train(cords)

t0 = time.time()

n_imputations =  12
dfs = []

df= train
df = df.reset_index()
df = df.set_index(['Indicator Name', 'Country Name'])
df = df.unstack().T

col = df.columns
idx = df.index
    
for i in range(n_imputations): 
    print(f'Imputation round {i}')
    iter_imp = IterativeImputer(random_state=i+100, sample_posterior=True, verbose=2)
    df_temp = iter_imp.fit_transform(df)
    dfs.append(df_temp)
    
df = np.mean(np.array(dfs), axis=0)
df = pd.DataFrame(df, columns=col, index=idx)
df = df.unstack().T
df = df.reset_index()
df = df.set_index(['Country Name', 'Indicator Name'])
df = df.sort_index(level=['Country Name', 'Indicator Name'])

t1 = time.time()

t = t1-t0

#df.to_csv('additional_data/imputed_sets/ice3.csv')
evaluate('mice2', df, t)

Imputation round 0
[IterativeImputer] Completing matrix with shape (4898, 165)
[IterativeImputer] Ending imputation round 1/10, elapsed time 26.15
[IterativeImputer] Ending imputation round 2/10, elapsed time 53.23
[IterativeImputer] Ending imputation round 3/10, elapsed time 81.74
[IterativeImputer] Ending imputation round 4/10, elapsed time 110.18
[IterativeImputer] Ending imputation round 5/10, elapsed time 138.93
[IterativeImputer] Ending imputation round 6/10, elapsed time 167.25
[IterativeImputer] Ending imputation round 7/10, elapsed time 195.50
[IterativeImputer] Ending imputation round 8/10, elapsed time 223.48
[IterativeImputer] Ending imputation round 9/10, elapsed time 251.85
[IterativeImputer] Ending imputation round 10/10, elapsed time 279.77
Imputation round 1
[IterativeImputer] Completing matrix with shape (4898, 165)
[IterativeImputer] Ending imputation round 1/10, elapsed time 26.21
[IterativeImputer] Ending imputation round 2/10, elapsed time 47.82
[IterativeImputer]

[IterativeImputer] Ending imputation round 8/10, elapsed time 241.11
[IterativeImputer] Ending imputation round 9/10, elapsed time 271.19
[IterativeImputer] Ending imputation round 10/10, elapsed time 301.00
Imputation round 11
[IterativeImputer] Completing matrix with shape (4898, 165)
[IterativeImputer] Ending imputation round 1/10, elapsed time 30.90
[IterativeImputer] Ending imputation round 2/10, elapsed time 61.92
[IterativeImputer] Ending imputation round 3/10, elapsed time 91.91
[IterativeImputer] Ending imputation round 4/10, elapsed time 121.00
[IterativeImputer] Ending imputation round 5/10, elapsed time 152.08
[IterativeImputer] Ending imputation round 6/10, elapsed time 182.78
[IterativeImputer] Ending imputation round 7/10, elapsed time 212.42
[IterativeImputer] Ending imputation round 8/10, elapsed time 242.89
[IterativeImputer] Ending imputation round 9/10, elapsed time 273.36
[IterativeImputer] Ending imputation round 10/10, elapsed time 303.11
Mit dieser Methode bleib

In [26]:
for n, df in enumerate(dfs):
    
    df = pd.DataFrame(df, columns=col, index=idx)
    df = df.unstack().T
    df = df.reset_index()
    df = df.set_index(['Country Name', 'Indicator Name'])
    df = df.sort_index(level=['Country Name', 'Indicator Name'])
    
    print( df)
    evaluate(n, df, None)

                                                                         1990  \
Country Name Indicator Name                                                     
Afghanistan  Access to clean fuels and technologies for cook...  2.924508e+01   
             Access to electricity (% of population)            -8.945553e+01   
             Access to electricity, rural (% of rural popula... -8.088296e+01   
             Access to electricity, urban (% of urban popula...  1.319779e+01   
             Adjusted savings: education expenditure (curren...  1.760480e+10   
...                                                                       ...   
Zimbabwe     Unemployment, youth total (% of total labor for...  1.292959e+01   
             Urban population                                    3.024147e+06   
             Urban population (% of total population)            2.898800e+01   
             Urban population growth (annual %)                  5.285272e+00   
             Vulnerable empl

Mit dieser Methode bleiben 0 NaNs bestehen.

8515 Werte wurden für die Metriken verwendet.
r2: -10.255851314770759, rmse: 1.3441964822259558
                                                                         1990  \
Country Name Indicator Name                                                     
Afghanistan  Access to clean fuels and technologies for cook...  4.995379e+01   
             Access to electricity (% of population)            -6.766353e+00   
             Access to electricity, rural (% of rural popula... -3.260051e+01   
             Access to electricity, urban (% of urban popula...  4.186040e+01   
             Adjusted savings: education expenditure (curren...  1.078368e+10   
...                                                                       ...   
Zimbabwe     Unemployment, youth total (% of total labor for...  2.370812e+01   
             Urban population                                    3.024147e+06   
             Urban population (% of total populat

Mit dieser Methode bleiben 0 NaNs bestehen.

8515 Werte wurden für die Metriken verwendet.
r2: -15.19636688271721, rmse: 1.612435620613404
                                                                         1990  \
Country Name Indicator Name                                                     
Afghanistan  Access to clean fuels and technologies for cook...  7.570907e+01   
             Access to electricity (% of population)             7.022433e+01   
             Access to electricity, rural (% of rural popula... -8.939071e+00   
             Access to electricity, urban (% of urban popula...  8.940663e+01   
             Adjusted savings: education expenditure (curren...  9.046298e+09   
...                                                                       ...   
Zimbabwe     Unemployment, youth total (% of total labor for...  2.176429e+01   
             Urban population                                    3.024147e+06   
             Urban population (% of total populatio

Mit dieser Methode bleiben 0 NaNs bestehen.

8515 Werte wurden für die Metriken verwendet.
r2: -11.299108483490118, rmse: 1.405110233567266
                                                                         1990  \
Country Name Indicator Name                                                     
Afghanistan  Access to clean fuels and technologies for cook...  5.945385e+01   
             Access to electricity (% of population)             2.643124e+01   
             Access to electricity, rural (% of rural popula...  1.928861e+00   
             Access to electricity, urban (% of urban popula...  7.645104e+01   
             Adjusted savings: education expenditure (curren... -2.680707e+10   
...                                                                       ...   
Zimbabwe     Unemployment, youth total (% of total labor for... -3.064036e+00   
             Urban population                                    3.024147e+06   
             Urban population (% of total populati

Mit dieser Methode bleiben 0 NaNs bestehen.

8515 Werte wurden für die Metriken verwendet.
r2: -11.204048787140213, rmse: 1.3996696586011013
                                                                         1990  \
Country Name Indicator Name                                                     
Afghanistan  Access to clean fuels and technologies for cook...  4.971552e+00   
             Access to electricity (% of population)            -5.105192e+01   
             Access to electricity, rural (% of rural popula... -4.150226e+01   
             Access to electricity, urban (% of urban popula...  1.590103e+00   
             Adjusted savings: education expenditure (curren... -9.773772e+09   
...                                                                       ...   
Zimbabwe     Unemployment, youth total (% of total labor for...  3.310756e+01   
             Urban population                                    3.024147e+06   
             Urban population (% of total populat

Mit dieser Methode bleiben 0 NaNs bestehen.

8515 Werte wurden für die Metriken verwendet.
r2: -12.935053089871001, rmse: 1.4956428340729728
                                                                         1990  \
Country Name Indicator Name                                                     
Afghanistan  Access to clean fuels and technologies for cook...  4.647173e+01   
             Access to electricity (% of population)             8.449316e+01   
             Access to electricity, rural (% of rural popula...  7.259987e+01   
             Access to electricity, urban (% of urban popula...  8.298106e+01   
             Adjusted savings: education expenditure (curren...  9.240181e+09   
...                                                                       ...   
Zimbabwe     Unemployment, youth total (% of total labor for... -2.006483e+01   
             Urban population                                    3.024147e+06   
             Urban population (% of total populat

Mit dieser Methode bleiben 0 NaNs bestehen.

8515 Werte wurden für die Metriken verwendet.
r2: -18.942852260900466, rmse: 1.789234306551852
                                                                         1990  \
Country Name Indicator Name                                                     
Afghanistan  Access to clean fuels and technologies for cook...  3.165441e+01   
             Access to electricity (% of population)             2.026994e+01   
             Access to electricity, rural (% of rural popula...  2.307038e+01   
             Access to electricity, urban (% of urban popula...  8.695414e+00   
             Adjusted savings: education expenditure (curren...  3.327580e+09   
...                                                                       ...   
Zimbabwe     Unemployment, youth total (% of total labor for...  1.047815e+01   
             Urban population                                    3.024147e+06   
             Urban population (% of total populati

Mit dieser Methode bleiben 0 NaNs bestehen.

8515 Werte wurden für die Metriken verwendet.
r2: -9.126864504593847, rmse: 1.2750026291878873
                                                                         1990  \
Country Name Indicator Name                                                     
Afghanistan  Access to clean fuels and technologies for cook...  5.530080e+01   
             Access to electricity (% of population)            -1.118956e+02   
             Access to electricity, rural (% of rural popula... -7.137223e+01   
             Access to electricity, urban (% of urban popula... -8.677200e+01   
             Adjusted savings: education expenditure (curren... -3.163626e+09   
...                                                                       ...   
Zimbabwe     Unemployment, youth total (% of total labor for...  2.739021e+01   
             Urban population                                    3.024147e+06   
             Urban population (% of total populati

Mit dieser Methode bleiben 0 NaNs bestehen.

8515 Werte wurden für die Metriken verwendet.
r2: -19.669599050132977, rmse: 1.821543750854179
                                                                         1990  \
Country Name Indicator Name                                                     
Afghanistan  Access to clean fuels and technologies for cook...  7.349913e+01   
             Access to electricity (% of population)             8.260771e+01   
             Access to electricity, rural (% of rural popula...  7.151801e+01   
             Access to electricity, urban (% of urban popula...  6.010149e+01   
             Adjusted savings: education expenditure (curren...  1.307567e+10   
...                                                                       ...   
Zimbabwe     Unemployment, youth total (% of total labor for... -2.384391e+01   
             Urban population                                    3.024147e+06   
             Urban population (% of total populati

Mit dieser Methode bleiben 0 NaNs bestehen.

8515 Werte wurden für die Metriken verwendet.
r2: -17.464524683277723, rmse: 1.7216411994985008
                                                                         1990  \
Country Name Indicator Name                                                     
Afghanistan  Access to clean fuels and technologies for cook...  6.669653e+01   
             Access to electricity (% of population)             4.729998e+00   
             Access to electricity, rural (% of rural popula... -2.050136e+01   
             Access to electricity, urban (% of urban popula...  8.807048e+01   
             Adjusted savings: education expenditure (curren...  1.514534e+10   
...                                                                       ...   
Zimbabwe     Unemployment, youth total (% of total labor for... -3.287987e+01   
             Urban population                                    3.024147e+06   
             Urban population (% of total populat

Mit dieser Methode bleiben 0 NaNs bestehen.

8515 Werte wurden für die Metriken verwendet.
r2: -15.07855538253611, rmse: 1.6065605324254386
                                                                         1990  \
Country Name Indicator Name                                                     
Afghanistan  Access to clean fuels and technologies for cook...  6.206874e+01   
             Access to electricity (% of population)             6.644400e+00   
             Access to electricity, rural (% of rural popula...  4.131132e+01   
             Access to electricity, urban (% of urban popula...  1.189029e+01   
             Adjusted savings: education expenditure (curren...  4.947963e+09   
...                                                                       ...   
Zimbabwe     Unemployment, youth total (% of total labor for...  7.313374e+00   
             Urban population                                    3.024147e+06   
             Urban population (% of total populati

Mit dieser Methode bleiben 0 NaNs bestehen.

8515 Werte wurden für die Metriken verwendet.
r2: -11.205644613254337, rmse: 1.3997611674281143
                                                                         1990  \
Country Name Indicator Name                                                     
Afghanistan  Access to clean fuels and technologies for cook...  3.454872e+01   
             Access to electricity (% of population)            -6.064774e+01   
             Access to electricity, rural (% of rural popula... -2.817413e+01   
             Access to electricity, urban (% of urban popula...  2.338104e+01   
             Adjusted savings: education expenditure (curren...  2.245518e+10   
...                                                                       ...   
Zimbabwe     Unemployment, youth total (% of total labor for... -3.500794e+00   
             Urban population                                    3.024147e+06   
             Urban population (% of total populat

Mit dieser Methode bleiben 0 NaNs bestehen.

8515 Werte wurden für die Metriken verwendet.
r2: -15.27298706446307, rmse: 1.616245096591005


In [24]:
results

[['mice2', 0.1338974995721145, 0.3728706732644282, 0, 5156.868764162064],
 ['mice2', -1.148990984652209, 0.5873417562280595, 0, 5437.622924089432],
 [0, -10.255851314770759, 1.3441964822259558, 0, None],
 [0, -10.255851314770759, 1.3441964822259558, 0, None],
 [1, -15.19636688271721, 1.612435620613404, 0, None],
 [2, -11.299108483490118, 1.405110233567266, 0, None],
 [3, -11.204048787140213, 1.3996696586011013, 0, None],
 [4, -12.935053089871001, 1.4956428340729728, 0, None],
 [5, -18.942852260900466, 1.789234306551852, 0, None],
 [6, -9.126864504593847, 1.2750026291878873, 0, None],
 [7, -19.669599050132977, 1.821543750854179, 0, None],
 [8, -17.464524683277723, 1.7216411994985008, 0, None],
 [9, -15.07855538253611, 1.6065605324254386, 0, None],
 [10, -11.205644613254337, 1.3997611674281143, 0, None],
 [11, -15.27298706446307, 1.616245096591005, 0, None]]

In [14]:
df = df.reset_index()
sample = 'Primary education, duration (years)'

df.loc[df['Indicator Name']==sample].head(5)

Unnamed: 0,Country Name,Indicator Name,1990,1991,1992,1993,1994,1995,1996,1997,...,2011,2012,2013,2014,2015,2016,2017,2018,2019,2020
118,Afghanistan,"Primary education, duration (years)",6.0,6.0,6.0,6.0,6.0,6.0,6.0,6.0,...,6.0,6.0,6.0,6.0,6.0,6.0,6.0,6.0,6.0,6.0
283,Albania,"Primary education, duration (years)",4.0,4.0,4.0,4.0,4.0,4.0,4.0,4.0,...,5.0,5.0,5.0,5.0,5.0,5.0,5.0,5.0,5.0,5.844722
448,Algeria,"Primary education, duration (years)",6.0,6.0,6.0,6.0,6.0,6.0,6.0,6.0,...,5.0,5.0,5.0,5.0,5.0,5.0,5.0,5.0,5.0,5.0
613,Angola,"Primary education, duration (years)",4.0,4.0,4.0,4.0,4.0,4.0,4.0,4.0,...,6.0,6.0,6.0,6.0,6.0,6.0,6.0,6.0,6.0,6.0
778,Argentina,"Primary education, duration (years)",7.0,7.0,7.0,7.0,7.0,7.0,7.0,7.0,...,6.0,6.0,6.0,6.0,6.0,6.0,6.0,6.0,6.0,6.0


In [19]:
train

Unnamed: 0_level_0,Unnamed: 1_level_0,1990,1991,1992,1993,1994,1995,1996,1997,1998,1999,...,2011,2012,2013,2014,2015,2016,2017,2018,2019,2020
Country Name,Indicator Name,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1,Unnamed: 5_level_1,Unnamed: 6_level_1,Unnamed: 7_level_1,Unnamed: 8_level_1,Unnamed: 9_level_1,Unnamed: 10_level_1,Unnamed: 11_level_1,Unnamed: 12_level_1,Unnamed: 13_level_1,Unnamed: 14_level_1,Unnamed: 15_level_1,Unnamed: 16_level_1,Unnamed: 17_level_1,Unnamed: 18_level_1,Unnamed: 19_level_1,Unnamed: 20_level_1,Unnamed: 21_level_1,Unnamed: 22_level_1
Afghanistan,Access to clean fuels and technologies for cooking (% of population),,,,,,,,,,,...,21.5000,23.0000,24.8000,26.7000,28.6000,30.3000,32.2000,34.1000,36.0000,
Afghanistan,Access to electricity (% of population),,,,,,,,,,,...,43.2220,69.1000,68.9829,89.5000,71.5000,97.7000,97.7000,98.7156,97.7000,
Afghanistan,"Access to electricity, rural (% of rural population)",,,,,,,,,,,...,29.5729,60.8492,61.3158,86.5005,64.5734,,97.0920,98.3096,96.9022,
Afghanistan,"Access to electricity, urban (% of urban population)",,,,,,,,,,,...,86.5678,95.0000,92.7734,98.7000,,99.5000,99.5000,99.9021,100.0000,
Afghanistan,Adjusted savings: education expenditure (current US$),,,,,,,,,,,...,509167006.6272,442309235.9284,577464375.5885,611295312.2687,520962469.5818,471950680.4330,495836072.9351,480958390.7067,508272010.1646,
...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...
Zimbabwe,"Unemployment, youth total (% of total labor force ages 15-24) (modeled ILO estimate)",,10.0940,10.2330,10.2350,10.2380,11.6680,12.9900,14.4190,13.2110,12.0350,...,8.4850,7.8940,7.3330,6.7070,6.7610,6.8250,6.8720,6.9260,7.0050,7.6330
Zimbabwe,Urban population,3024147.0000,3176318.0000,3324547.0000,3432105.0000,3528870.0000,3620850.0000,3708423.0000,3791270.0000,3869370.0000,3942523.0000,...,4257061.0000,4306228.0000,4359432.0000,4416224.0000,4473872.0000,4531238.0000,4589452.0000,4650597.0000,4717307.0000,4792105.0000
Zimbabwe,Urban population (% of total population),28.9880,29.7380,30.4990,30.9400,31.3350,31.7320,32.1320,32.5340,32.9390,33.3470,...,33.0150,32.8340,32.6540,32.5040,32.3850,32.2960,32.2370,32.2090,32.2100,32.2420
Zimbabwe,Urban population growth (annual %),5.2853,4.9094,4.5611,3.1840,2.7804,2.5731,2.3898,2.2094,2.0391,1.8729,...,0.9897,1.1483,1.2279,1.2943,1.2969,1.2741,1.2765,1.3235,1.4242,1.5732


In [18]:
pd.set_option('display.float_format', lambda x: '%.4f' % x)
df

Unnamed: 0,Country Name,Indicator Name,1990,1991,1992,1993,1994,1995,1996,1997,...,2011,2012,2013,2014,2015,2016,2017,2018,2019,2020
0,Afghanistan,Access to clean fuels and technologies for coo...,29.4575,22.0750,23.5862,21.9614,10.3508,20.7365,16.9366,18.7364,...,21.5000,23.0000,24.8000,26.7000,28.6000,30.3000,32.2000,34.1000,36.0000,59.5495
1,Afghanistan,Access to electricity (% of population),11.8792,35.2282,-2.3580,-3.2978,8.1572,9.0876,0.9951,3.3190,...,43.2220,69.1000,68.9829,89.5000,71.5000,97.7000,97.7000,98.7156,97.7000,63.3983
2,Afghanistan,"Access to electricity, rural (% of rural popul...",-2.4899,17.2622,-33.6331,-15.1792,12.4947,9.1568,6.3015,-27.7207,...,29.5729,60.8492,61.3158,86.5005,64.5734,80.1732,97.0920,98.3096,96.9022,53.6242
3,Afghanistan,"Access to electricity, urban (% of urban popul...",52.9144,53.8749,22.5756,27.6089,30.8971,39.6573,38.2673,47.4518,...,86.5678,95.0000,92.7734,98.7000,104.4756,99.5000,99.5000,99.9021,100.0000,99.0683
4,Afghanistan,Adjusted savings: education expenditure (curre...,8218881142.2973,11941693048.8438,6018443786.5547,10591592596.6165,5760399048.2563,5725885166.7034,14446148649.6670,8182677094.4393,...,509167006.6272,442309235.9284,577464375.5885,611295312.2687,520962469.5818,471950680.4330,495836072.9351,480958390.7067,508272010.1646,-1199140666.8109
...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...
26065,Zimbabwe,"Unemployment, youth total (% of total labor fo...",2.3868,10.0940,10.2330,10.2350,10.2380,11.6680,12.9900,14.4190,...,8.4850,7.8940,7.3330,6.7070,6.7610,6.8250,6.8720,6.9260,7.0050,7.6330
26066,Zimbabwe,Urban population,3024147.0000,3176318.0000,3324547.0000,3432105.0000,3528870.0000,3620850.0000,3708423.0000,3791270.0000,...,4257061.0000,4306228.0000,4359432.0000,4416224.0000,4473872.0000,4531238.0000,4589452.0000,4650597.0000,4717307.0000,4792105.0000
26067,Zimbabwe,Urban population (% of total population),28.9880,29.7380,30.4990,30.9400,31.3350,31.7320,32.1320,32.5340,...,33.0150,32.8340,32.6540,32.5040,32.3850,32.2960,32.2370,32.2090,32.2100,32.2420
26068,Zimbabwe,Urban population growth (annual %),5.2853,4.9094,4.5611,3.1840,2.7804,2.5731,2.3898,2.2094,...,0.9897,1.1483,1.2279,1.2943,1.2969,1.2741,1.2765,1.3235,1.4242,1.5732


In [15]:
results = pd.DataFrame(results, columns=['Methode', 'r2', 'RSME', 'Remaining NaNs', 'Time'])
results = results.set_index('Methode')

In [16]:
results

Unnamed: 0_level_0,r2,RSME,Remaining NaNs,Time
Methode,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1
Iterative Imputer 1,0.960905,0.07922,0,21.638224
Iterative Imputer 3,0.662632,0.232716,0,158.91465
mice2,0.00698,0.399257,0,5666.134484
