In [None]:
# Specific Libraries
import datawig

# Basic Libraries
import numpy as np
import pandas as pd
import seaborn as sb
import matplotlib.pyplot as plt # we only need pyplot
sb.set() # set the default Seaborn style for graphics

In [None]:
# Import into pandas dataframe
df = pd.read_excel('16raw.xlsx')
        
df_train = df.loc[df['Missing'] == 0]
df_test = df.loc[df['Missing'] == 1]

In [None]:
# Uses values provided from World Happiness Report
predictors = ["Life Ladder","Log GDP per capita", "Social Support", "Healthy life expectancy at birth", "Freedom to make life choices", "Generosity", "Perceptions of corruption", "Happiness"]

In [None]:
array = np.array(predictors)

# 1. `Log GDP (PPP)` Imputation

In [None]:
#Initialize a SimpleImputer model
imputer = datawig.SimpleImputer(
    input_columns=predictors, # column(s) containing information about the column we want to impute
    output_column='Log GDP per capita', # the column we'd like to impute values for
    output_path = 'imputer_model' # stores model data and metrics
    )

#Fit an imputer model on the train data
imputer.fit(train_df=df_train, num_epochs=50)

#Impute missing values and return original dataframe with predictions
imputed = imputer.predict(df_test)

outputdf = imputed[['Country','Log GDP per capita','Log GDP per capita_imputed']]

# Code to generate error between `Log GDP per capita` and the `imputed Log GDP per capita`.
y_errs = 100 * abs(imputed["Log GDP per capita"] - imputed["Log GDP per capita_imputed"]) / imputed["Log GDP per capita"]
# Code to generate dataframe to the existing data frame
y_errs = pd.DataFrame(y_errs, columns = ["Error"], index = outputdf.index)
# Code to concat error dataframe to the existing data frame
outputdf_final = pd.concat([outputdf, y_errs], axis = 1)

#Print output
outputdf_final

#Outputs to excel with imputed values
export_excel = outputdf_final.to_excel (r'C:\Users\SAMARITAN\Documents\Jupyter_Thunder\Final_Project\1. Log GDP per capita_imputed.xlsx', index = None, header=True) #Don't forget to add '.xlsx' at the end of the path

# 2. `Social Support` Imputation
Not required, because all data is available.

# 3. Healthy life expectancy at birth Imputation

In [None]:
#Initialize a SimpleImputer model
imputer = datawig.SimpleImputer(
    input_columns=predictors, # column(s) containing information about the column we want to impute
    output_column='Healthy life expectancy at birth', # the column we'd like to impute values for
    output_path = 'imputer_model' # stores model data and metrics
    )

#Fit an imputer model on the train data
imputer.fit(train_df=df_train, num_epochs=50)

#Impute missing values and return original dataframe with predictions
imputed = imputer.predict(df_test)

outputdf = imputed[['Country','Healthy life expectancy at birth','Healthy life expectancy at birth_imputed']]

# Code to generate error between `Healthy life expectancy at birth` and the `imputed Healthy life expectancy at birth`.
y_errs = 100 * abs(imputed["Healthy life expectancy at birth"] - imputed["Healthy life expectancy at birth_imputed"]) / imputed["Healthy life expectancy at birth"]
y_errs = pd.DataFrame(y_errs, columns = ["Error"], index = outputdf.index)
# Code to concat error dataframe to the existing data frame
outputdf_final = pd.concat([outputdf, y_errs], axis = 1)

#Print output
outputdf_final

#Outputs to excel with imputed values
export_excel = outputdf_final.to_excel (r'C:\Users\SAMARITAN\Documents\Jupyter_Thunder\Final_Project\3. Healthy life expectancy at birth_imputed.xlsx', index = None, header=True) #Don't forget to add '.xlsx' at the end of the path

# 4. `Freedom to make life choices` Imputation

In [None]:
#Initialize a SimpleImputer model
imputer = datawig.SimpleImputer(
    input_columns=predictors, # column(s) containing information about the column we want to impute
    output_column='Freedom to make life choices', # the column we'd like to impute values for
    output_path = 'imputer_model' # stores model data and metrics
    )

#Fit an imputer model on the train data
imputer.fit(train_df=df_train, num_epochs=50)

#Impute missing values and return original dataframe with predictions
imputed = imputer.predict(df_test)

outputdf = imputed[['Country','Freedom to make life choices','Freedom to make life choices_imputed']]

# Code to generate error between `Freedom to make life choices` and the `imputed Freedom to make life choices`.
y_errs = 100 * abs(imputed["Freedom to make life choices"] - imputed["Freedom to make life choices_imputed"]) / imputed["Freedom to make life choices"]
y_errs = pd.DataFrame(y_errs, columns = ["Error"], index = outputdf.index)
# Code to concat error dataframe to the existing data frame
outputdf_final = pd.concat([outputdf, y_errs], axis = 1)

#Print output
outputdf_final

#Outputs to excel with imputed values
export_excel = outputdf_final.to_excel (r'C:\Users\SAMARITAN\Documents\Jupyter_Thunder\Final_Project\4. Freedom to make life choices_imputed.xlsx', index = None, header=True) #Don't forget to add '.xlsx' at the end of the path

# 5. 'Generosity' Imputation

In [None]:
#Initialize a SimpleImputer model
imputer = datawig.SimpleImputer(
    input_columns=predictors, # column(s) containing information about the column we want to impute
    output_column='Generosity', # the column we'd like to impute values for
    output_path = 'imputer_model' # stores model data and metrics
    )

#Fit an imputer model on the train data
imputer.fit(train_df=df_train, num_epochs=50)

#Impute missing values and return original dataframe with predictions
imputed = imputer.predict(df_test)

outputdf = imputed[['Country','Generosity','Generosity_imputed']]

# Code to generate error between `Generosity` and the `imputed Generosity`.
y_errs = 100 * abs(imputed["Generosity"] - imputed["Generosity_imputed"]) / imputed["Generosity"]
y_errs = pd.DataFrame(y_errs, columns = ["Error"], index = outputdf.index)
# Code to concat error dataframe to the existing data frame
outputdf_final = pd.concat([outputdf, y_errs], axis = 1)

#Print output
outputdf_final

#Outputs to excel with imputed values
export_excel = outputdf_final.to_excel (r'C:\Users\SAMARITAN\Documents\Jupyter_Thunder\Final_Project\5. Generosity_imputed.xlsx', index = None, header=True) #Don't forget to add '.xlsx' at the end of the path

# 6. 'Perceptions of corruption' Imputation

In [None]:
#Initialize a SimpleImputer model
imputer = datawig.SimpleImputer(
    input_columns=predictors, # column(s) containing information about the column we want to impute
    output_column='Perceptions of corruption', # the column we'd like to impute values for
    output_path = 'imputer_model' # stores model data and metrics
    )

#Fit an imputer model on the train data
imputer.fit(train_df=df_train, num_epochs=50)

#Impute missing values and return original dataframe with predictions
imputed = imputer.predict(df_test)

outputdf = imputed[['Country','Perceptions of corruption','Perceptions of corruption_imputed']]

# Code to generate error between `Perceptions of corruption` and the `imputed Perceptions of corruption`.
y_errs = 100 * abs(imputed["Perceptions of corruption"] - imputed["Perceptions of corruption_imputed"]) / imputed["Perceptions of corruption"]
y_errs = pd.DataFrame(y_errs, columns = ["Error"], index = outputdf.index)
# Code to concat error dataframe to the existing data frame
outputdf_final = pd.concat([outputdf, y_errs], axis = 1)

#Print output
outputdf_final

#Outputs to excel with imputed values
export_excel = outputdf_final.to_excel (r'C:\Users\SAMARITAN\Documents\Jupyter_Thunder\Final_Project\6. Perceptions of corruption_imputed.xlsx', index = None, header=True) #Don't forget to add '.xlsx' at the end of the path

# 7. 'Happiness' Imputation
For `Hong Kong`.

In [None]:
#Initialize a SimpleImputer model
imputer = datawig.SimpleImputer(
    input_columns=predictors, # column(s) containing information about the column we want to impute
    output_column='Happiness', # the column we'd like to impute values for
    output_path = 'imputer_model' # stores model data and metrics
    )

#Fit an imputer model on the train data
imputer.fit(train_df=df_train, num_epochs=50)

#Impute missing values and return original dataframe with predictions
imputed = imputer.predict(df_test)

outputdf = imputed[['Country','Happiness','Happiness_imputed']]

# Code to generate error between `Happiness` and the `imputed Happiness`.
y_errs = 100 * abs(imputed["Happiness"] - imputed["Happiness_imputed"]) / imputed["Happiness"]
y_errs = pd.DataFrame(y_errs, columns = ["Error"], index = outputdf.index)
# Code to concat error dataframe to the existing data frame
outputdf_final = pd.concat([outputdf, y_errs], axis = 1)

#Print output
outputdf_final

#Outputs to excel with imputed values
export_excel = outputdf_final.to_excel (r'C:\Users\SAMARITAN\Documents\Jupyter_Thunder\Final_Project\7. Happiness_imputed.xlsx', index = None, header=True) #Don't forget to add '.xlsx' at the end of the path