# Imputation Research Project <img src="https://chroniclesofai.com/content/images/2021/05/file-20201210-18-elk4m.jpg" alt="Alt text image not displaying" width="360" align="right" />
## Notebook 3.0: Autoencoder Model

**Author:** Chike Odenigbo

**Date:** November 25th, 2022

**Notebook Structure:**

* 1.0 Preprocessing

* **1.1 Exploratory Data Analysis**

* 1.2 Masking

* 2.* Models


Water Sugar lutein_zeaxanthin Alcohol

In [113]:
import pandas as pd
import os
from src.visualization.visualize import histogram, box_plot, bar_plot
from pathlib import Path
from notebook_config import ROOT_DIR  # setup.py file changed the root of the project so it is set in the config file

ROOT_DIR = ROOT_DIR.as_posix()  # convert root path to windows readable path (i.e. change backslash to forward slash)
from joblib import load
import tensorflow as tf
from keras import backend as K
import keras
import numpy as np
from tensorflow.keras import layers
from tensorflow.keras.models import Sequential
import matplotlib.pyplot as plt
import pickle
from sklearn.metrics import mean_squared_error

In [76]:
notebook_nm = "3.1-autoencoder-perf-eval"
fig_dir = f"{ROOT_DIR}/reports/figures/"
model_dir = f"{ROOT_DIR}/models/autoencoders/"
scaler_dir = f'{ROOT_DIR}/models/scalers/'
output_prefix = notebook_nm

In [77]:
pd.set_option('display.max_columns', None)

# Ground Truth Included
water_df = pd.read_csv(f'{ROOT_DIR}/data/processed/water.csv')
sugars_df = pd.read_csv(f'{ROOT_DIR}/data/processed/sugars.csv')
lutein_df = pd.read_csv(f'{ROOT_DIR}/data/processed/lutein.csv')

# Scaled Data without Ground Truth to prevent Data Leakage, rows are included with NaN
water_mcar_scaled_df = pd.read_csv(f'{ROOT_DIR}/data/processed/water_mcar_scaled.csv', index_col = 0)
water_mar_scaled_df = pd.read_csv(f'{ROOT_DIR}/data/processed/water_mar_scaled.csv', index_col = 0)

lutein_mar_scaled_df = pd.read_csv(f'{ROOT_DIR}/data/processed/lutein_mar_scaled.csv', index_col = 0)
lutein_mcar_scaled_df = pd.read_csv(f'{ROOT_DIR}/data/processed/utein_mcar_scaled.csv', index_col = 0)

sugars_mar_scaled_df = pd.read_csv(f'{ROOT_DIR}/data/processed/sugars_mar_scaled.csv', index_col = 0)
sugars_mcar_scaled_df = pd.read_csv(f'{ROOT_DIR}/data/processed/sugars_mcar_scaled.csv', index_col = 0)

water_mcar_scaled_df = pd.read_csv(f'{ROOT_DIR}/data/processed/water_mcar_scaled.csv', index_col = 0)
water_mar_scaled_df = pd.read_csv(f'{ROOT_DIR}/data/processed/water_mar_scaled.csv', index_col = 0)

# Scalers to return back to origin scale for model evaluation
scaler_lutein_mar = load(f'{scaler_dir}/scaler_lutein_mar.joblib')
scaler_lutein_mcar = load(f'{scaler_dir}/scaler_lutein_mcar.joblib')

scaler_sugars_mar = load(f'{scaler_dir}/scaler_sugars_mar.joblib')
scaler_sugars_mcar = load(f'{scaler_dir}/scaler_sugars_mcar.joblib')

scaler_water_mcar = load(f'{scaler_dir}/scaler_water_mcar.joblib')
scaler_water_mar = load(f'{scaler_dir}/scaler_water_mar.joblib')

In [89]:
def scale_ground_truth(scaler,raw_df,target_col,non_target_na_col):
    raw_filtered_df = raw_df[raw_df[target_col].isnull()].drop([target_col, non_target_na_col], axis = 'columns').set_index('name')
    scaled_ground_truth_df = pd.DataFrame(scaler.transform(raw_filtered_df),columns = raw_filtered_df.columns, index = raw_filtered_df.index)
    return scaled_ground_truth_df

def train_test_split_scaled(scaled_df,drop_cols = ['name','dataset_type']):
    train_df = scaled_df[scaled_df.dataset_type == 'training'].drop(drop_cols, axis = 'columns')
    val_df = scaled_df[scaled_df.dataset_type == 'validation'].drop(drop_cols, axis = 'columns')
    return train_df, val_df

def load_autoencoder(model_name,path = model_dir):
    model = keras.models.load_model(f'{model_dir}/{model_name}.h5')
    file = open(f'{path}/{model_name}_history.pkl','rb')
    history = pickle.load(file)
    return model,history

def impute_validation_for_prediction(train_data,val_data,target_col,constant_impute_val = 0):
    """
    can't pass null value to the tensorflow predict method so we impute with constant and random 
    and take the aggregate performance of the two 
    """
    max_target = train_data[target_col].max()
    min_target = train_data[target_col].min()
    val_data_constant = val_data.copy()
    val_data_random = val_data.copy()
    val_data_random[target_col] = np.random.uniform(min_target, max_target, size=len(val_data))
    val_data_constant[target_col] = constant_impute_val
    return val_data_random, val_data_constant

def predict(model,test_data):
    pred_data = pd.DataFrame(model.predict(test_data), columns = test_data.columns, index = test_data.index)
    return pred_data

def inverse_scale_predictions(scaler,predictions):
    unscaled_pred = pd.DataFrame(scaler.inverse_transform(predictions), columns = predictions.columns, index = predictions.index)
    return unscaled_pred

def mse_all_predictions()

pd.DataFrame({'columns':water_mcar_ground_truth_scaled_df.columns,
              'mse_mcar_rand_scaled':list(mean_squared_error(water_mcar_ground_truth_scaled_df, water_mcar_rand_pred, multioutput='raw_values')),
              'mse_mar_rand_scaled':list(mean_squared_error(water_mar_ground_truth_scaled_df, water_mar_rand_pred, multioutput='raw_values')),
              'mse_mcar_const_scaled':list(mean_squared_error(water_mcar_ground_truth_scaled_df, water_mcar_const_pred, multioutput='raw_values')),
              'mse_mar_const_scaled':list(mean_squared_error(water_mar_ground_truth_scaled_df, water_mar_const_pred, multioutput='raw_values')),
              
              'mse_mcar_rand_unscaled':list(mean_squared_error(water_mcar_ground_truth_unscaled_df, water_mcar_pred_rand_unscaled_df, multioutput='raw_values')),
              'mse_mar_rand_unscaled':list(mean_squared_error(water_mar_ground_truth_unscaled_df, water_mar_pred_rand_unscaled_df, multioutput='raw_values')),
              'mse_mcar_const_unscaled':list(mean_squared_error(water_mcar_ground_truth_unscaled_df, water_mcar_pred_const_unscaled_df, multioutput='raw_values')),
              'mse_mar_const_unscaled':list(mean_squared_error(water_mar_ground_truth_unscaled_df, water_mar_pred_const_unscaled_df, multioutput='raw_values'))},
            )

In [111]:
water_mcar_ground_truth_unscaled_df = water_df[water_df.water_mcar.isnull()].drop(['water_mcar','water_mar'], axis = 'columns').set_index('name')
water_mar_ground_truth_unscaled_df = water_df[water_df.water_mar.isnull()].drop(['water_mcar','water_mar'], axis = 'columns').set_index('name')

lutein_mcar_ground_truth_unscaled_df = lutein_df[lutein_df.lutein_zeaxanthin_mcar.isnull()].drop(['lutein_zeaxanthin_mcar','lutein_zeaxanthin_mar'], axis = 'columns').set_index('name')
lutein_mar_ground_truth_unscaled_df = lutein_df[lutein_df.lutein_zeaxanthin_mar.isnull()].drop(['lutein_zeaxanthin_mcar','lutein_zeaxanthin_mar'], axis = 'columns').set_index('name')

sugars_mcar_ground_truth_unscaled_df = sugars_df[sugars_df.sugars_mcar.isnull()].drop(['sugars_mcar','sugars_mar'], axis = 'columns').set_index('name')
sugars_mar_ground_truth_unscaled_df = sugars_df[sugars_df.sugars_mar.isnull()].drop(['sugars_mcar','sugars_mar'], axis = 'columns').set_index('name')

In [79]:
water_mar_val_rand, water_mar_val_const = impute_validation_for_prediction(water_mar_scaled_df[water_mar_scaled_df.dataset_type == 'training'].drop(['name','dataset_type'], axis = 'columns'),water_mar_scaled_df[water_mar_scaled_df.dataset_type == 'validation'].drop(['dataset_type'], axis = 'columns').set_index('name'),target_col = 'water_mar',constant_impute_val = 0)
water_mcar_val_rand, water_mcar_val_const = impute_validation_for_prediction(water_mcar_scaled_df[water_mcar_scaled_df.dataset_type == 'training'].drop(['name','dataset_type'], axis = 'columns'),water_mcar_scaled_df[water_mcar_scaled_df.dataset_type == 'validation'].drop(['dataset_type'], axis = 'columns').set_index('name'),target_col = 'water_mcar',constant_impute_val = 0)

sugars_mar_val_rand, sugars_mar_val_const = impute_validation_for_prediction(sugars_mar_scaled_df[sugars_mar_scaled_df.dataset_type == 'training'].drop(['name','dataset_type'], axis = 'columns'),sugars_mar_scaled_df[sugars_mar_scaled_df.dataset_type == 'validation'].drop(['dataset_type'], axis = 'columns').set_index('name'),target_col = 'sugars_mar',constant_impute_val = 0)
sugars_mcar_val_rand, sugars_mcar_val_const = impute_validation_for_prediction(sugars_mcar_scaled_df[sugars_mcar_scaled_df.dataset_type == 'training'].drop(['name','dataset_type'], axis = 'columns'),sugars_mcar_scaled_df[sugars_mcar_scaled_df.dataset_type == 'validation'].drop(['dataset_type'], axis = 'columns').set_index('name'),target_col = 'sugars_mcar',constant_impute_val = 0)

lutein_mar_val_rand, lutein_mar_val_const = impute_validation_for_prediction(lutein_mar_scaled_df[lutein_mar_scaled_df.dataset_type == 'training'].drop(['name','dataset_type'], axis = 'columns'),lutein_mar_scaled_df[lutein_mar_scaled_df.dataset_type == 'validation'].drop(['dataset_type'], axis = 'columns').set_index('name'),target_col = 'lutein_zeaxanthin_mar',constant_impute_val = 0)
lutein_mcar_val_rand, lutein_mcar_val_const = impute_validation_for_prediction(lutein_mcar_scaled_df[lutein_mcar_scaled_df.dataset_type == 'training'].drop(['name','dataset_type'], axis = 'columns'),lutein_mcar_scaled_df[lutein_mcar_scaled_df.dataset_type == 'validation'].drop(['dataset_type'], axis = 'columns').set_index('name'),target_col = 'lutein_zeaxanthin_mcar',constant_impute_val = 0)

In [80]:
lutein_mcar_model, lutein_mcar_history = load_autoencoder(model_name = 'lutein_mcar_model',path = model_dir)
lutein_mar_model, lutein_mar_history = load_autoencoder(model_name = 'lutein_mar_model',path = model_dir)

water_mcar_model, water_mcar_history = load_autoencoder(model_name = 'water_mcar_model',path = model_dir)
water_mar_model, water_mar_history = load_autoencoder(model_name = 'water_mar_model',path = model_dir)

sugars_mcar_model, sugars_mcar_history = load_autoencoder(model_name = 'sugars_mcar_model',path = model_dir)
sugars_mar_model, sugars_mar_history = load_autoencoder(model_name = 'sugars_mar_model',path = model_dir)

Keras model archive loading:
File Name                                             Modified             Size
config.json                                    2022-11-28 17:04:08         1884
metadata.json                                  2022-11-28 17:04:08           64
variables.h5                                   2022-11-28 17:04:08        36504
Keras weights file (<HDF5 file "variables.h5" (mode r)>) loading:
...layers\dense
......vars
.........0
.........1
...layers\dense_1
......vars
.........0
.........1
...layers\input_layer
......vars
...metrics\mean
......vars
.........0
.........1
...vars
Keras model archive loading:
File Name                                             Modified             Size
config.json                                    2022-11-28 17:04:08         1878
metadata.json                                  2022-11-28 17:04:08           64
variables.h5                                   2022-11-28 17:04:08        36504
Keras weights file (<HDF5 file "variables.h5" 

In [81]:
water_mar_rand_pred = predict(water_mar_model, water_mar_val_rand)
water_mar_const_pred = predict(water_mar_model, water_mar_val_const)
water_mcar_rand_pred = predict(water_mcar_model, water_mcar_val_rand)
water_mcar_const_pred = predict(water_mcar_model, water_mcar_val_const)
sugars_mar_rand_pred = predict(sugars_mar_model, sugars_mar_val_rand)
sugars_mar_const_pred = predict(sugars_mar_model, sugars_mar_val_const)
sugars_mcar_rand_pred = predict(sugars_mcar_model, sugars_mcar_val_rand)
sugars_mcar_const_pred = predict(sugars_mcar_model, sugars_mcar_val_const)
lutein_mar_rand_pred = predict(lutein_mar_model, lutein_mar_val_rand)
lutein_mar_const_pred = predict(lutein_mar_model, lutein_mar_val_const)
lutein_mcar_rand_pred = predict(lutein_mcar_model, lutein_mcar_val_rand)
lutein_mcar_const_pred = predict(lutein_mcar_model, lutein_mcar_val_const)



In [82]:
import warnings
warnings.filterwarnings('ignore') 
water_mcar_ground_truth_scaled_df = scale_ground_truth(scaler = scaler_water_mcar,raw_df = water_df,target_col = 'water_mcar',non_target_na_col = 'water_mar')
water_mar_ground_truth_scaled_df = scale_ground_truth(scaler = scaler_water_mar,raw_df = water_df,target_col = 'water_mar',non_target_na_col = 'water_mcar')

sugars_mcar_ground_truth_scaled_df = scale_ground_truth(scaler = scaler_sugars_mcar,raw_df = sugars_df,target_col = 'sugars_mcar',non_target_na_col = 'sugars_mar')
sugars_mar_ground_truth_scaled_df = scale_ground_truth(scaler = scaler_sugars_mar,raw_df = sugars_df,target_col = 'sugars_mar',non_target_na_col = 'sugars_mcar')

lutein_mcar_ground_truth_scaled_df = scale_ground_truth(scaler = scaler_lutein_mcar,raw_df = lutein_df,target_col = 'lutein_zeaxanthin_mcar',non_target_na_col = 'lutein_zeaxanthin_mar')
lutein_mar_ground_truth_scaled_df = scale_ground_truth(scaler = scaler_lutein_mar,raw_df = lutein_df,target_col = 'lutein_zeaxanthin_mar',non_target_na_col = 'lutein_zeaxanthin_mcar')

In [83]:
water_mcar_rand_pred

Unnamed: 0_level_0,total_fat,saturated_fat,cholesterol,sodium,choline,folate,folic_acid,niacin,pantothenic_acid,riboflavin,thiamin,vitamin_a,vitamin_a_rae,carotene_alpha,carotene_beta,cryptoxanthin_beta,lutein_zeaxanthin,vitamin_b12,vitamin_b6,vitamin_c,vitamin_d,vitamin_e,tocopherol_alpha,vitamin_k,calcium,copper,irom,magnesium,manganese,phosphorous,potassium,selenium,zink,protein,alanine,arginine,aspartic_acid,cystine,glutamic_acid,glycine,histidine,hydroxyproline,isoleucine,leucine,lysine,methionine,phenylalanine,proline,serine,threonine,tryptophan,tyrosine,valine,carbohydrate,fiber,sugars,fructose,galactose,glucose,lactose,maltose,sucrose,fat,saturated_fatty_acids,monounsaturated_fatty_acids,polyunsaturated_fatty_acids,fatty_acids_total_trans,alcohol,ash,caffeine,theobromine,water_mcar
name,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1,Unnamed: 5_level_1,Unnamed: 6_level_1,Unnamed: 7_level_1,Unnamed: 8_level_1,Unnamed: 9_level_1,Unnamed: 10_level_1,Unnamed: 11_level_1,Unnamed: 12_level_1,Unnamed: 13_level_1,Unnamed: 14_level_1,Unnamed: 15_level_1,Unnamed: 16_level_1,Unnamed: 17_level_1,Unnamed: 18_level_1,Unnamed: 19_level_1,Unnamed: 20_level_1,Unnamed: 21_level_1,Unnamed: 22_level_1,Unnamed: 23_level_1,Unnamed: 24_level_1,Unnamed: 25_level_1,Unnamed: 26_level_1,Unnamed: 27_level_1,Unnamed: 28_level_1,Unnamed: 29_level_1,Unnamed: 30_level_1,Unnamed: 31_level_1,Unnamed: 32_level_1,Unnamed: 33_level_1,Unnamed: 34_level_1,Unnamed: 35_level_1,Unnamed: 36_level_1,Unnamed: 37_level_1,Unnamed: 38_level_1,Unnamed: 39_level_1,Unnamed: 40_level_1,Unnamed: 41_level_1,Unnamed: 42_level_1,Unnamed: 43_level_1,Unnamed: 44_level_1,Unnamed: 45_level_1,Unnamed: 46_level_1,Unnamed: 47_level_1,Unnamed: 48_level_1,Unnamed: 49_level_1,Unnamed: 50_level_1,Unnamed: 51_level_1,Unnamed: 52_level_1,Unnamed: 53_level_1,Unnamed: 54_level_1,Unnamed: 55_level_1,Unnamed: 56_level_1,Unnamed: 57_level_1,Unnamed: 58_level_1,Unnamed: 59_level_1,Unnamed: 60_level_1,Unnamed: 61_level_1,Unnamed: 62_level_1,Unnamed: 63_level_1,Unnamed: 64_level_1,Unnamed: 65_level_1,Unnamed: 66_level_1,Unnamed: 67_level_1,Unnamed: 68_level_1,Unnamed: 69_level_1,Unnamed: 70_level_1,Unnamed: 71_level_1,Unnamed: 72_level_1
"Teff, uncooked",-0.668610,-0.506727,-0.297671,-0.479004,-0.177100,-0.336905,-0.173793,-0.003349,0.176143,0.111424,0.250534,0.074841,0.093365,-0.167304,-0.118624,-0.009097,-0.128199,-0.421896,0.331531,-0.177140,-0.354079,-0.247404,-0.240003,0.008265,0.648202,0.911958,0.804547,2.351649,1.328213,1.501520,0.375981,-0.049553,0.462020,-0.361292,-0.122656,0.464544,-0.026350,0.635988,0.657930,-0.113494,-0.009034,-0.397631,0.079801,0.134077,-0.302205,0.046085,0.386867,0.410375,0.393218,-0.006995,0.590201,0.180074,0.191409,1.480768,1.266319,-0.290603,0.490229,-0.099414,0.337317,-0.024743,-0.167921,0.087397,-0.665297,-0.504298,-0.714917,-0.223788,-0.283060,0.080129,0.255639,-0.270325,-0.103887,-0.916746
"Sherbet, orange",-0.441350,-0.344566,-0.574796,-0.333786,-0.478964,-0.311282,-0.341568,-0.656914,-0.139757,-0.491206,-0.321426,-0.049235,-0.071133,-0.026859,-0.050432,-0.074524,-0.054386,-0.343510,-0.576605,0.009495,0.128581,-0.286975,-0.290524,-0.092425,-0.273541,-0.128583,-0.568568,-0.338575,-0.059123,-0.676219,-0.311893,-0.431494,-0.413333,-0.881761,-0.848999,-0.864516,-0.887101,-0.896417,-0.878738,-0.753587,-0.859162,-0.271333,-0.894836,-0.907825,-0.828771,-0.845865,-0.928441,-0.863634,-0.928456,-0.884501,-0.886815,-0.911433,-0.924980,0.883834,0.000298,1.315100,-0.328354,0.045506,-0.270487,-0.209524,-0.191918,-0.368016,-0.439152,-0.344946,-0.295304,-0.480593,-0.573193,-0.087388,-0.561130,0.168365,-0.231341,-0.253182
"Cauliflower, raw",-0.294124,-0.287516,-0.725589,-0.269311,-0.093651,-0.004898,-0.470878,-0.332102,0.276594,-0.521384,-0.086509,0.222372,0.117846,-0.173485,-0.001218,-0.340422,0.032956,-0.526780,-0.184319,1.090239,0.164633,-0.240889,-0.248447,0.187789,-0.705540,-0.094694,-0.631756,0.073055,-0.181275,-0.408053,0.432714,-0.159970,-0.301107,-0.618424,-0.643977,-0.751460,-0.621854,-0.591342,-0.654350,-0.614210,-0.630451,0.050507,-0.711123,-0.690000,-0.634240,-0.676896,-0.720617,-0.671388,-0.726952,-0.689828,-0.747254,-0.760251,-0.706946,0.540765,0.601339,-0.083026,0.450553,0.111814,0.252909,-0.363784,-0.183763,-0.564985,-0.293036,-0.288313,-0.018502,-0.425323,-0.724102,-0.184639,-0.326441,0.445486,-0.322932,-0.120332
"Winged bean tuber, raw",-0.421538,-0.370283,-0.713767,-0.064781,-0.599554,0.120670,-0.596527,-0.372008,-0.012220,-0.163376,0.390720,0.462592,0.418206,-0.135960,-0.101224,-0.169043,0.030794,-0.297827,-0.812274,0.022576,-0.180221,-0.299501,-0.301398,-0.118017,-0.631490,1.295192,0.123659,0.663575,-0.505190,-0.336604,0.924096,0.072984,-0.008932,-0.065381,-0.074545,0.203964,0.103512,0.437790,0.155950,-0.170036,-0.068703,-0.249859,0.050653,-0.011824,-0.160603,-0.107809,0.165816,-0.274165,0.129434,0.035228,0.315667,-0.003013,0.043665,0.706672,0.259302,0.068683,-0.294805,-0.210789,-0.316780,-0.170690,-0.049468,-0.473714,-0.420106,-0.376677,-0.309104,-0.336918,-0.712574,-0.046025,0.144667,0.377760,-0.326556,-0.421384
"Chewing gum, sugarless",-1.035013,-0.697422,-0.065841,-0.257632,-0.439802,-0.354744,0.095095,-0.890581,-0.757751,-0.408670,-0.524147,-0.218943,-0.147373,-0.004646,-0.176554,-0.070962,-0.339265,-0.303019,-0.567380,-0.467214,-0.513468,-0.163311,-0.160930,-0.080215,-0.307935,-0.299666,0.008197,-0.755570,0.142071,-0.844547,-1.158117,-0.167849,-1.016521,-1.307756,-0.856084,-0.944205,-0.937979,-0.608021,-0.719743,-0.920147,-0.814386,-0.538981,-0.776902,-0.787632,-0.810768,-0.701238,-0.796615,-0.682412,-0.775280,-0.812624,-0.629503,-0.789161,-0.793919,0.717161,-0.195171,0.013921,0.370329,-0.358958,0.220578,0.018078,-0.082090,0.103601,-1.034385,-0.697946,-1.193214,-0.455207,-0.059606,0.067230,-0.688342,-0.596336,0.104354,0.396253
...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...
"Beef, broiled, cooked, choice, trimmed to 1/8""fat, separable lean only, small end (ribs 10-12), rib",-0.185035,-0.067617,0.292679,-0.384925,1.429637,-0.275035,-0.221243,0.881480,0.096511,-0.230168,-0.332056,-0.110426,-0.081653,-0.063733,-0.112801,-0.065286,-0.116632,-0.013940,0.553748,-0.093200,-0.066406,-0.120862,-0.138075,-0.102185,-0.475526,-0.175396,-0.108739,-0.211712,-0.086144,0.379880,0.299807,0.627139,0.852184,1.580477,1.733872,1.430027,1.474441,1.277617,1.441182,1.821421,1.535500,3.172192,1.356303,1.455768,1.535172,1.482102,1.325051,1.491662,1.357215,1.447189,0.897070,1.365572,1.360387,-0.738986,-0.460414,-0.462812,-0.156347,-0.174531,-0.102239,-0.112513,-0.199581,-0.232433,-0.184271,-0.071345,-0.073802,-0.367824,0.286097,-0.033129,-0.351674,-0.009819,-0.139159,0.296884
"Lamb, braised, cooked, choice, trimmed to 1/8"" fat, separable lean and fat, arm, shoulder, domestic",0.783904,0.832677,0.315734,-0.287672,-0.705307,-0.147595,-0.404855,0.648403,0.241845,-0.198488,-0.222440,-0.017937,0.019615,0.029015,-0.090366,-0.169561,-0.080878,0.214337,-0.398062,-0.043736,0.034055,-0.286195,-0.304099,-0.046536,-0.597933,0.005713,0.002872,-0.215637,-0.098653,0.461129,0.401467,0.931171,1.181218,1.952859,1.716251,1.621821,1.493845,1.584114,1.660627,1.464162,1.679635,-0.282307,1.801550,1.752322,1.764718,1.827549,1.734782,1.396539,1.659227,1.786497,1.777933,1.731752,1.774773,-0.226975,-0.312126,-0.433366,-0.169538,-0.058484,-0.222401,-0.232051,-0.193478,-0.454036,0.784444,0.824749,0.933257,-0.168685,0.305702,-0.097791,-0.249471,0.206179,-0.221591,-0.869582
"Beef, grilled, cooked, choice, trimmed to 0"" fat, separable lean and fat, boneless, chuck eye steak",0.710389,0.815041,0.033916,-0.255993,0.726451,-0.416659,-0.592250,0.393653,0.200520,-0.316734,-0.175895,0.025327,-0.011685,-0.133166,-0.062058,0.023839,0.006189,0.011751,0.188709,-0.138264,-0.047726,-0.191348,-0.203681,-0.067792,-0.424083,0.067096,-0.123380,-0.146661,-0.015863,0.354473,0.367033,0.633980,1.908643,1.294218,1.252001,1.067333,0.995747,0.961814,1.095128,1.152125,1.281899,1.509116,1.173753,1.201657,1.299969,1.322409,1.067501,0.915474,1.055275,1.198702,0.942273,1.158153,1.159698,-0.332973,-0.241914,-0.266573,-0.257015,0.032846,-0.207932,-0.192230,-0.155115,-0.301539,0.712337,0.814279,0.880400,-0.231280,0.034087,-0.183864,-0.195918,-0.040560,-0.237180,-0.498834
"Beef, broiled, cooked, all grades, trimmed to 0"" fat, separable lean only, mock tender steak, chuck",-0.504811,-0.410217,0.277975,-0.433911,1.234877,-0.256251,-0.055633,-0.063187,-0.542639,-0.185506,-0.318449,-0.224010,-0.193122,-0.080266,-0.096388,-0.139267,-0.137032,0.347540,0.220165,-0.103718,-0.148563,-0.295213,-0.297471,-0.080653,-0.259568,0.060267,-0.028648,-0.473824,0.059759,0.124112,0.053169,0.639115,1.388445,1.018737,1.387394,1.237695,1.229054,1.137389,1.230230,1.198173,1.377961,0.934455,1.391382,1.386608,1.454110,1.478076,1.287660,1.066736,1.341679,1.398982,1.269595,1.388536,1.363993,-1.061276,-0.768611,-0.495807,-0.081652,-0.094477,-0.062752,-0.083978,-0.261719,-0.111815,-0.503680,-0.409839,-0.474541,-0.262310,0.282832,0.029716,-0.338277,-0.123600,-0.054559,0.985117


In [93]:
water_mcar_pred_const_unscaled_df = inverse_scale_predictions(scaler = scaler_water_mcar,predictions = water_mcar_const_pred)
water_mcar_pred_rand_unscaled_df = inverse_scale_predictions(scaler = scaler_water_mcar,predictions = water_mcar_rand_pred)
water_mar_pred_const_unscaled_df = inverse_scale_predictions(scaler = scaler_water_mar,predictions = water_mar_const_pred)
water_mar_pred_rand_unscaled_df = inverse_scale_predictions(scaler = scaler_water_mar,predictions = water_mar_rand_pred)

sugars_mcar_pred_const_unscaled_df = inverse_scale_predictions(scaler = scaler_sugars_mcar,predictions = sugars_mcar_const_pred)
sugars_mcar_pred_rand_unscaled_df = inverse_scale_predictions(scaler = scaler_sugars_mcar,predictions = sugars_mcar_rand_pred)
sugars_mar_pred_const_unscaled_df = inverse_scale_predictions(scaler = scaler_sugars_mar,predictions = sugars_mar_const_pred)
sugars_mar_pred_rand_unscaled_df = inverse_scale_predictions(scaler = scaler_sugars_mar,predictions = sugars_mar_rand_pred)

lutein_mcar_pred_const_unscaled_df = inverse_scale_predictions(scaler = scaler_lutein_mcar,predictions = lutein_mcar_const_pred)
lutein_mcar_pred_rand_unscaled_df = inverse_scale_predictions(scaler = scaler_lutein_mcar,predictions = lutein_mcar_rand_pred)
lutein_mar_pred_const_unscaled_df = inverse_scale_predictions(scaler = scaler_lutein_mar,predictions = lutein_mar_const_pred)
lutein_mar_pred_rand_unscaled_df = inverse_scale_predictions(scaler = scaler_lutein_mar,predictions = lutein_mar_rand_pred)

In [109]:
water_mcar_pred_rand_unscaled_df

Unnamed: 0_level_0,total_fat,saturated_fat,cholesterol,sodium,choline,folate,folic_acid,niacin,pantothenic_acid,riboflavin,thiamin,vitamin_a,vitamin_a_rae,carotene_alpha,carotene_beta,cryptoxanthin_beta,lutein_zeaxanthin,vitamin_b12,vitamin_b6,vitamin_c,vitamin_d,vitamin_e,tocopherol_alpha,vitamin_k,calcium,copper,irom,magnesium,manganese,phosphorous,potassium,selenium,zink,protein,alanine,arginine,aspartic_acid,cystine,glutamic_acid,glycine,histidine,hydroxyproline,isoleucine,leucine,lysine,methionine,phenylalanine,proline,serine,threonine,tryptophan,tyrosine,valine,carbohydrate,fiber,sugars,fructose,galactose,glucose,lactose,maltose,sucrose,fat,saturated_fatty_acids,monounsaturated_fatty_acids,polyunsaturated_fatty_acids,fatty_acids_total_trans,alcohol,ash,caffeine,theobromine,water_mcar
name,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1,Unnamed: 5_level_1,Unnamed: 6_level_1,Unnamed: 7_level_1,Unnamed: 8_level_1,Unnamed: 9_level_1,Unnamed: 10_level_1,Unnamed: 11_level_1,Unnamed: 12_level_1,Unnamed: 13_level_1,Unnamed: 14_level_1,Unnamed: 15_level_1,Unnamed: 16_level_1,Unnamed: 17_level_1,Unnamed: 18_level_1,Unnamed: 19_level_1,Unnamed: 20_level_1,Unnamed: 21_level_1,Unnamed: 22_level_1,Unnamed: 23_level_1,Unnamed: 24_level_1,Unnamed: 25_level_1,Unnamed: 26_level_1,Unnamed: 27_level_1,Unnamed: 28_level_1,Unnamed: 29_level_1,Unnamed: 30_level_1,Unnamed: 31_level_1,Unnamed: 32_level_1,Unnamed: 33_level_1,Unnamed: 34_level_1,Unnamed: 35_level_1,Unnamed: 36_level_1,Unnamed: 37_level_1,Unnamed: 38_level_1,Unnamed: 39_level_1,Unnamed: 40_level_1,Unnamed: 41_level_1,Unnamed: 42_level_1,Unnamed: 43_level_1,Unnamed: 44_level_1,Unnamed: 45_level_1,Unnamed: 46_level_1,Unnamed: 47_level_1,Unnamed: 48_level_1,Unnamed: 49_level_1,Unnamed: 50_level_1,Unnamed: 51_level_1,Unnamed: 52_level_1,Unnamed: 53_level_1,Unnamed: 54_level_1,Unnamed: 55_level_1,Unnamed: 56_level_1,Unnamed: 57_level_1,Unnamed: 58_level_1,Unnamed: 59_level_1,Unnamed: 60_level_1,Unnamed: 61_level_1,Unnamed: 62_level_1,Unnamed: 63_level_1,Unnamed: 64_level_1,Unnamed: 65_level_1,Unnamed: 66_level_1,Unnamed: 67_level_1,Unnamed: 68_level_1,Unnamed: 69_level_1,Unnamed: 70_level_1,Unnamed: 71_level_1,Unnamed: 72_level_1
"Teff, uncooked",1.586582,0.708267,8.764428,-32.286152,17.961695,5.277362,2.531662,3.906757,0.783826,0.308297,0.358730,846.288452,179.822800,-24.731606,-12.414214,3.449653,-8.905613,-0.505658,0.454844,-2.612990,-30.696802,-0.015092,0.015839,9.008528,194.909668,0.724198,7.799089,174.806534,8.987095,439.313660,397.424316,13.322851,3.927631,9.537534,0.497161,1.046197,0.881304,0.214854,2.968096,0.452085,0.321211,-0.002599,0.511794,0.954434,0.524353,0.273865,0.623105,0.783596,0.628453,0.429403,0.190826,0.428740,0.632776,64.804832,7.830595,2.675920,0.647594,-0.002818,0.680227,0.109827,0.002344,0.841651,1.636724,0.723293,-0.617048,1.341046,10.635510,0.047975,2.348922,-11.976479,-1.378532,22.041119
"Sherbet, orange",5.338634,1.823372,-26.723379,75.612495,1.183014,8.641212,-14.925651,0.897972,0.363621,0.064694,0.083137,413.613373,41.092861,9.032392,55.539478,-4.022089,53.637424,-0.144665,0.017069,5.991105,34.199368,-0.180453,-0.195285,2.779926,29.426039,0.109883,-0.415913,15.626378,0.116799,57.924301,180.556229,2.201436,0.824646,4.198254,0.007206,-0.020145,-0.049851,-0.001775,0.102801,0.017609,-0.001895,0.008122,-0.001932,-0.002907,0.005348,0.003322,-0.006293,0.011408,-0.010202,-0.000766,-0.002361,-0.011478,-0.011122,48.015186,2.151540,24.223820,-0.165146,0.008234,-0.181785,-0.266357,-0.008272,-0.731060,5.369590,1.818758,2.440822,-0.064939,-26.518072,-0.028886,0.576457,10.126916,-9.943694,41.735260
"Cauliflower, raw",7.769320,2.215676,-46.033363,123.518486,22.600094,48.864014,-28.380610,2.393294,0.917446,0.052495,0.196330,1360.748535,200.469009,-26.217789,104.581413,-34.387440,127.644165,-0.988683,0.206174,55.814671,39.046726,0.012137,-0.019448,20.113779,-48.132175,0.129891,-0.793954,39.982479,-0.664212,104.888474,415.310577,10.107697,1.222470,6.899715,0.145504,0.070564,0.237092,0.041352,0.521197,0.112214,0.085030,0.035440,0.094902,0.197239,0.197086,0.054574,0.093154,0.127930,0.087168,0.094666,0.015893,0.049495,0.114634,38.365864,4.847658,5.461523,0.608202,0.013290,0.560515,-0.580408,-0.004664,-1.411265,7.781465,2.208079,4.457981,0.237661,-45.843010,-0.073507,1.085754,24.089685,-16.098837,45.678165
"Winged bean tuber, raw",5.665727,1.646525,-44.519547,275.486664,-5.519812,65.348785,-41.454624,2.209583,0.533269,0.197214,0.426277,2198.434814,453.778107,-17.196274,4.925652,-14.816096,125.811905,0.065722,-0.096537,6.594151,-7.320594,-0.232798,-0.240726,1.196805,-34.837734,0.950451,3.725508,74.923454,-2.735250,117.401413,570.230164,16.890913,2.258183,12.573159,0.529614,0.837127,1.021788,0.186835,2.032096,0.413706,0.298533,0.009944,0.496430,0.820374,0.663921,0.227186,0.517328,0.368690,0.500988,0.450102,0.154918,0.354899,0.547562,43.032238,3.313368,7.497402,-0.131836,-0.011312,-0.247439,-0.187297,0.054746,-1.096074,5.683973,1.600619,2.340261,0.721665,-44.366829,-0.009907,2.108104,20.677288,-16.342377,36.743149
"Chewing gum, sugarless",-4.462701,-0.603056,38.451885,132.196091,3.359773,2.935398,30.510017,-0.177747,-0.458427,0.098058,-0.014542,-178.182266,-23.203939,14.372697,-70.140488,-3.615275,-187.746429,0.041813,0.021516,-15.985726,-52.127422,0.336329,0.346279,3.535225,23.251131,0.008879,3.034727,-9.047175,1.403182,28.444834,-86.235512,9.878295,-1.313558,-0.171856,0.002427,-0.084081,-0.104890,0.038994,0.399264,-0.095446,0.015123,-0.014596,0.060230,0.107530,0.023092,0.047190,0.056788,0.121248,0.063815,0.034469,0.031294,0.037835,0.064471,43.327259,1.274713,6.762516,0.528550,-0.022612,0.514663,0.197004,0.040314,0.897607,-4.455624,-0.607933,-4.102567,0.074049,39.250401,0.042056,0.300394,-28.402609,12.615692,61.010063
...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...
"Beef, broiled, cooked, choice, trimmed to 1/8""fat, separable lean only, small end (ribs 10-12), rib",9.570363,3.727818,84.362961,37.616192,107.269997,13.399754,-2.405571,7.980199,0.677901,0.170214,0.078015,200.233963,32.221260,0.167605,-6.610916,-2.967058,0.895039,1.373121,0.561966,1.256719,7.982399,0.513718,0.441788,2.176202,-6.837003,0.082246,2.335126,23.132818,-0.055968,242.879898,373.408722,33.026897,5.310701,29.457336,1.749482,1.820832,2.504851,0.305558,4.428561,1.765445,0.908239,0.300411,1.184631,2.168856,2.335346,0.709432,1.072042,1.438973,1.094272,1.142271,0.230963,0.906867,1.307007,2.370981,0.084896,0.364943,0.005633,-0.008547,0.056832,-0.068859,-0.011662,-0.262842,9.576793,3.699617,4.054992,0.552459,83.520081,-0.003990,1.030995,1.149109,-3.748889,58.060860
"Lamb, braised, cooked, choice, trimmed to 1/8"" fat, separable lean and fat, arm, shoulder, domestic",25.567469,9.918724,87.315254,109.876137,-11.397970,30.130405,-21.510801,6.907194,0.871223,0.183020,0.130833,522.756409,117.625793,22.465101,15.744843,-14.875205,31.190090,2.424413,0.103137,3.537083,21.489784,-0.177196,-0.252012,5.618562,-28.813200,0.189169,3.002867,22.900602,-0.135948,257.109161,405.459229,41.879757,6.477074,33.277451,1.737595,1.974713,2.525842,0.348886,4.837740,1.522949,0.963021,0.007190,1.419318,2.441342,2.561596,0.814211,1.268105,1.381318,1.240209,1.308606,0.346176,1.054552,1.546012,16.772062,0.750081,0.760099,-0.007464,0.000303,-0.113587,-0.312221,-0.008962,-1.028119,25.566916,9.859797,11.393786,1.642728,86.030617,-0.033659,1.252785,12.032197,-9.288476,23.440928
"Beef, grilled, cooked, choice, trimmed to 0"" fat, separable lean and fat, boneless, chuck eye steak",24.353745,9.797443,51.226501,133.413513,68.184357,-5.192845,-41.009636,5.734415,0.816253,0.135221,0.153260,673.622559,91.229126,-16.524685,43.954102,7.210892,104.963814,1.491436,0.385995,-0.820746,10.493967,0.219162,0.167625,4.303701,2.398691,0.225408,2.247532,26.981895,0.393393,238.430374,394.603149,33.226101,9.055676,26.520723,1.424435,1.529832,1.987003,0.260914,3.783302,1.311148,0.811854,0.159248,1.088410,1.935369,2.103521,0.660994,0.948800,1.089741,0.948369,1.020458,0.236875,0.823212,1.191255,13.790692,1.065036,2.998401,-0.094317,0.007268,-0.093066,-0.231151,0.008009,-0.501490,24.376678,9.787820,11.008600,1.300024,51.248363,-0.073151,1.369001,-0.399780,-10.336136,34.444477
"Beef, broiled, cooked, all grades, trimmed to 0"" fat, separable lean only, mock tender steak, chuck",4.290896,1.371922,82.479950,1.218485,96.444542,15.865883,14.826507,3.631287,-0.172289,0.188268,0.084572,-195.850601,-61.786697,-3.806907,9.743958,-11.415711,-16.389729,3.037858,0.401159,0.771841,-3.064088,-0.214880,-0.224317,3.508097,31.934618,0.221377,2.814292,7.623715,0.876901,198.087021,295.650238,33.375610,7.211659,23.694677,1.515765,1.666518,2.239393,0.285735,4.035215,1.342404,0.848364,0.110470,1.203121,2.105309,2.255449,0.708211,1.054150,1.181422,1.086765,1.118639,0.279688,0.916128,1.309086,-6.693895,-1.297597,-0.077836,0.079794,-0.002442,0.112834,-0.010766,-0.039151,0.153697,4.304462,1.372652,1.134658,1.130141,83.101967,0.024844,1.060068,-4.583754,1.936415,78.487160


In [112]:
from sklearn.metrics import mean_squared_error
pd.DataFrame({'columns':water_mcar_ground_truth_scaled_df.columns,
              'mse_mcar_rand_scaled':list(mean_squared_error(water_mcar_ground_truth_scaled_df, water_mcar_rand_pred, multioutput='raw_values')),
              'mse_mar_rand_scaled':list(mean_squared_error(water_mar_ground_truth_scaled_df, water_mar_rand_pred, multioutput='raw_values')),
              'mse_mcar_const_scaled':list(mean_squared_error(water_mcar_ground_truth_scaled_df, water_mcar_const_pred, multioutput='raw_values')),
              'mse_mar_const_scaled':list(mean_squared_error(water_mar_ground_truth_scaled_df, water_mar_const_pred, multioutput='raw_values')),
              
              'mse_mcar_rand_unscaled':list(mean_squared_error(water_mcar_ground_truth_unscaled_df, water_mcar_pred_rand_unscaled_df, multioutput='raw_values')),
              'mse_mar_rand_unscaled':list(mean_squared_error(water_mar_ground_truth_unscaled_df, water_mar_pred_rand_unscaled_df, multioutput='raw_values')),
              'mse_mcar_const_unscaled':list(mean_squared_error(water_mcar_ground_truth_unscaled_df, water_mcar_pred_const_unscaled_df, multioutput='raw_values')),
              'mse_mar_const_unscaled':list(mean_squared_error(water_mar_ground_truth_unscaled_df, water_mar_pred_const_unscaled_df, multioutput='raw_values'))},
            )

Unnamed: 0,columns,mse_mcar_rand_scaled,mse_mar_rand_scaled,mse_mcar_const_scaled,mse_mar_const_scaled,mse_mcar_rand_unscaled,mse_mar_rand_unscaled,mse_mcar_const_unscaled,mse_mar_const_unscaled
0,total_fat,0.049978,0.101386,0.027505,0.072619,13.622911,27.635586,7.497311,19.794182
1,saturated_fat,0.027722,0.100853,0.017252,0.096547,1.310903,4.768993,0.815804,4.565399
2,cholesterol,0.053075,0.044921,0.026007,0.040372,870.355468,736.646012,426.482711,662.046743
3,sodium,0.058239,0.073987,0.053064,0.073868,32151.737970,40845.569698,29294.827443,40780.095474
4,choline,0.026710,0.320105,0.019590,0.283489,82.522324,988.978966,60.523644,875.850509
...,...,...,...,...,...,...,...,...,...
67,alcohol,0.018797,0.060884,0.014223,0.052197,0.003957,0.012817,0.002994,0.010988
68,ash,0.100599,0.126921,0.098296,0.114551,0.473754,0.597711,0.462907,0.539457
69,caffeine,0.087774,0.030727,0.065425,0.030563,222.827621,78.005599,166.092153,77.590152
70,theobromine,0.032601,0.235013,0.021111,0.228568,147.228022,1061.342886,95.338251,1032.240467


In [87]:
water_mcar_true = pd.DataFrame(scaler_water_mcar.transform(water_df[water_df.water_mcar.isnull()].drop(['name','water_mcar', 'water_mar'], axis = 'columns')),columns = water_df.drop(['name','water_mcar', 'water_mar'], axis = 'columns').columns)# a = np.array(x) # your x
water_mcar_true
# b = np.array(y) # your y
# mses = ((a-b)**2).mean(axis=1)

Unnamed: 0,total_fat,saturated_fat,cholesterol,sodium,choline,folate,folic_acid,niacin,pantothenic_acid,riboflavin,thiamin,vitamin_a,vitamin_a_rae,carotene_alpha,carotene_beta,cryptoxanthin_beta,lutein_zeaxanthin,vitamin_b12,vitamin_b6,vitamin_c,vitamin_d,vitamin_e,tocopherol_alpha,vitamin_k,calcium,copper,irom,magnesium,manganese,phosphorous,potassium,selenium,zink,protein,alanine,arginine,aspartic_acid,cystine,glutamic_acid,glycine,histidine,hydroxyproline,isoleucine,leucine,lysine,methionine,phenylalanine,proline,serine,threonine,tryptophan,tyrosine,valine,carbohydrate,fiber,sugars,fructose,galactose,glucose,lactose,maltose,sucrose,fat,saturated_fatty_acids,monounsaturated_fatty_acids,polyunsaturated_fatty_acids,fatty_acids_total_trans,alcohol,ash,caffeine,theobromine,water
0,-0.619342,-0.551556,-0.366113,-0.419401,-0.264566,-0.377104,-0.198124,-0.121463,0.295053,0.016684,0.315431,-0.165265,-0.119859,-0.06443,-0.101149,-0.039304,-0.039796,-0.312098,0.387865,-0.120460,-0.125774,-0.224649,-0.224649,-0.106650,0.565155,1.057292,0.776284,2.507023,1.367768,1.442629,0.469791,-0.355989,0.378058,0.005471,0.247723,-0.195034,-0.083019,0.785573,0.862210,-0.076787,-0.062212,-0.367017,0.059323,0.257674,-0.452719,0.554252,0.543382,0.213058,0.379865,0.157415,0.193964,0.252622,0.283687,1.776759,1.304084,-0.352894,0.311358,-0.062457,0.372412,-0.07869,-0.150615,0.023213,-0.620268,-0.544198,-0.549419,-0.273112,-0.366113,-0.024432,0.265352,-0.032626,-0.083373,-1.362212
1,-0.643570,-0.435218,-0.358304,-0.373641,-0.361717,-0.346635,-0.198124,-0.838286,-0.244720,-0.411286,-0.437932,-0.154655,-0.105630,-0.06443,-0.105163,0.004479,-0.109427,-0.283870,-0.564302,-0.070570,-0.125774,-0.241400,-0.241400,-0.137364,-0.136664,-0.267279,-0.475648,-0.467465,-0.075670,-0.778567,-0.580093,-0.455583,-0.510558,-1.183777,-0.859682,-0.839409,-0.841020,-0.883861,-0.933870,-0.779530,-0.854175,-0.367017,-0.891171,-0.904661,-0.834197,-0.856819,-0.915290,-0.882456,-0.907343,-0.882938,-0.868764,-0.882973,-0.905697,0.257549,-0.189534,1.322267,-0.162020,-0.062457,-0.142311,-0.07869,-0.173220,-0.156322,-0.643289,-0.440772,-0.557515,-0.454119,-0.358304,-0.024432,-0.642443,-0.032626,-0.083373,0.567750
2,-0.746538,-0.595182,-0.366113,-0.395175,0.296750,0.057075,-0.198124,-0.741841,0.088316,-0.502818,-0.390198,-0.167846,-0.119859,-0.06443,-0.106167,-0.039304,-0.116508,-0.312098,-0.230317,0.925066,-0.125774,-0.224649,-0.224649,0.113204,-0.314904,-0.248647,-0.428847,-0.349161,-0.053148,-0.755727,0.063794,-0.486491,-0.569799,-1.103844,-0.687716,-0.732221,-0.677402,-0.742383,-0.796040,-0.674929,-0.706833,-0.367017,-0.756470,-0.789298,-0.614036,-0.790881,-0.779454,-0.765315,-0.729369,-0.727905,-0.715853,-0.756520,-0.688973,-0.646582,-0.033484,-0.347678,0.814953,-0.062457,0.520483,-0.07869,-0.173220,-0.156322,-0.747491,-0.590602,-0.625578,-0.463069,-0.366113,-0.024432,-0.476551,-0.032626,-0.083373,1.442770
3,-0.710196,-0.580640,-0.366113,-0.388446,-0.500247,-0.232377,-0.198124,-0.495732,-0.325912,-0.282648,0.292602,-0.167846,-0.119859,-0.06443,-0.106167,-0.039304,-0.117689,-0.312098,-0.456431,-0.120460,-0.125774,-0.243793,-0.243793,-0.137364,-0.270344,2.032935,-0.164755,-0.197057,0.005816,-0.750017,0.974116,-0.483057,-0.253846,-0.160244,-0.271142,-0.325903,0.762798,0.509692,-0.504827,-0.204961,-0.220078,-0.367017,-0.084863,-0.208130,-0.233572,-0.385363,0.027205,0.237806,0.052889,0.037060,1.057909,-0.007722,0.132847,0.175776,-0.479340,-0.490007,-0.162020,-0.062457,-0.142311,-0.07869,-0.173220,-0.156322,-0.709930,-0.577219,-0.598133,-0.436950,-0.366113,-0.024432,0.094852,-0.032626,-0.083373,0.274617
4,-0.740481,-0.595182,-0.366113,-0.426130,-0.500247,-0.377104,-0.198124,-0.851971,-0.413117,-0.651247,-0.493967,-0.167846,-0.119859,-0.06443,-0.106167,-0.039304,-0.117689,-0.312098,-0.612014,-0.120460,-0.125774,-0.243793,-0.243793,-0.137364,-0.326044,-0.314706,-0.499049,-0.602669,-0.077391,-1.006968,-0.884591,-0.489926,-0.645966,-1.291004,-0.859682,-0.839409,-0.841020,-0.883861,-0.933870,-0.779530,-0.854175,-0.367017,-0.891171,-0.904661,-0.834197,-0.856819,-0.915290,-0.882456,-0.907343,-0.882938,-0.868764,-0.882973,-0.905697,2.547208,0.055687,-0.490007,-0.162020,-0.062457,-0.142311,-0.07869,-0.173220,-0.156322,-0.740221,-0.601075,-0.617482,-0.426356,-0.366113,-0.024432,-0.227714,-0.032626,-0.083373,-1.541461
...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...
1471,-0.213525,-0.115289,0.321081,-0.357491,1.439172,-0.308549,-0.198124,0.961156,0.004869,-0.292543,-0.338314,-0.167846,-0.119859,-0.06443,-0.106167,-0.039304,-0.117689,0.072238,0.609829,-0.120460,-0.081150,-0.143288,-0.143288,-0.113116,-0.348324,-0.163956,-0.178126,-0.197057,-0.075670,0.243526,0.231902,0.609044,0.851987,1.466686,1.690166,1.440208,1.541135,1.698103,1.343811,1.758889,1.521714,3.132008,1.550517,1.544079,1.591637,1.572992,1.419004,1.343219,1.398041,1.422158,0.553304,1.351026,1.528554,-0.823283,-0.479340,-0.490007,-0.162020,-0.062457,-0.142311,-0.07869,-0.173220,-0.156322,-0.216185,-0.108383,-0.134591,-0.406265,0.321081,-0.024432,-0.319876,-0.032626,-0.083373,0.437020
1472,0.628393,0.728159,0.570970,-0.338648,-0.500247,-0.232377,-0.198124,0.583847,0.045465,-0.032792,-0.348691,-0.167846,-0.119859,-0.06443,-0.106167,-0.039304,-0.117689,0.250292,-0.383826,-0.120460,-0.125774,-0.243793,-0.243793,-0.137364,-0.298194,-0.075877,-0.092881,-0.163256,-0.073793,0.192135,0.101856,0.794495,1.114340,1.740603,1.912537,1.462643,1.689038,1.740546,1.485931,1.458345,1.737465,-0.367017,1.954619,1.728007,1.951808,1.774102,1.730382,1.268974,1.484959,1.832179,1.906562,1.708069,2.003614,-0.823283,-0.479340,-0.490007,-0.162020,-0.062457,-0.142311,-0.07869,-0.173220,-0.156322,0.607731,0.730226,0.691223,-0.176489,0.570970,-0.024432,-0.310660,-0.032626,-0.083373,-0.145877
1473,0.446684,0.655448,0.305463,-0.339994,0.863463,-0.331401,-0.198124,0.157880,0.147706,-0.186169,-0.356992,-0.160964,-0.111559,-0.06443,-0.106167,-0.039304,-0.117689,0.315434,0.128560,-0.120460,-0.081150,-0.219863,-0.219863,-0.111499,-0.353894,-0.180894,-0.087867,-0.247758,-0.075514,0.089355,0.171637,0.413290,1.788559,1.141105,1.249873,1.207136,1.226842,0.969494,1.181847,0.897034,1.271759,1.635792,1.121751,1.279615,1.378577,1.480679,1.067919,0.808661,1.075205,1.356881,1.279629,1.249367,1.048292,-0.823283,-0.479340,-0.490007,-0.162020,-0.062457,-0.142311,-0.07869,-0.173220,-0.156322,0.468392,0.652693,0.670502,-0.322793,0.305463,-0.024432,-0.393606,-0.032626,-0.083373,0.184993
1474,-0.437633,-0.347965,0.125856,-0.339994,1.273656,-0.316166,-0.198124,-0.062380,-0.413117,-0.082268,-0.263600,-0.167846,-0.119859,-0.06443,-0.106167,-0.039304,-0.117689,0.424004,0.055955,-0.120460,-0.125774,-0.205506,-0.205506,-0.137364,-0.392884,-0.114835,-0.009308,-0.213957,-0.077391,0.289206,0.044762,0.361776,1.562879,1.233710,1.460384,1.335513,1.545757,1.082676,1.292862,1.034047,1.342799,1.046731,1.645377,1.587612,1.632219,1.566398,1.431543,0.924152,1.242832,1.558831,1.271983,1.470040,1.521619,-0.823283,-0.479340,-0.490007,-0.162020,-0.062457,-0.142311,-0.07869,-0.173220,-0.156322,-0.436098,-0.343601,-0.272363,-0.398593,0.125856,-0.024432,-0.200066,-0.032626,-0.083373,0.573478


In [88]:
from sklearn.metrics import mean_squared_error
pd.DataFrame({'columns':water_mcar_true.columns,
              'mse':list(mean_squared_error(water_mcar_true, water_mcar_pred, multioutput='raw_values'))}).nlargest(10,'mse')

NameError: name 'water_mcar_pred' is not defined