# Imputation Research Project <img src="https://chroniclesofai.com/content/images/2021/05/file-20201210-18-elk4m.jpg" alt="Alt text image not displaying" width="500" align="right" />
## Notebook 3.0: Autoencoder Model

**Author:** Chike Odenigbo

**Date:** November 25th, 2022

**Notebook Structure:**

* 1.0 Preprocessing

* **1.1 Exploratory Data Analysis**

* 1.2 Masking

* 2.* Models


Water Sugar lutein_zeaxanthin Alcohol

In [3]:
import pandas as pd
import os
from src.visualization.visualize import histogram, box_plot, bar_plot
from pathlib import Path
from notebook_config import ROOT_DIR  # setup.py file changed the root of the project so it is set in the config file

ROOT_DIR = ROOT_DIR.as_posix()  # convert root path to windows readable path (i.e. change backslash to forward slash)
from joblib import load
import tensorflow as tf
from keras import backend as K
import keras
import numpy as np
from tensorflow.keras import layers
from tensorflow.keras.models import Sequential
import matplotlib.pyplot as plt

In [4]:
notebook_nm = "3.0-autoencoder"
fig_dir = f"{ROOT_DIR}/reports/figures/"
output_prefix = notebook_nm

In [5]:
pd.set_option('display.max_columns', None)

# Ground Truth Included
water_df = pd.read_csv(f'{ROOT_DIR}/data/processed/water.csv')
sugars_df = pd.read_csv(f'{ROOT_DIR}/data/processed/sugars.csv')
lutein_df = pd.read_csv(f'{ROOT_DIR}/data/processed/lutein.csv')

# Scaled Data without Ground Truth to prevent Data Leakage
water_mcar_scaled_df = pd.read_csv(f'{ROOT_DIR}/data/processed/water_mcar_scaled.csv', index_col = 0)
water_mar_scaled_df = pd.read_csv(f'{ROOT_DIR}/data/processed/water_mar_scaled.csv', index_col = 0)

lutein_mar_scaled_df = pd.read_csv(f'{ROOT_DIR}/data/processed/lutein_mar_scaled.csv', index_col = 0)
lutein_mcar_scaled_df = pd.read_csv(f'{ROOT_DIR}/data/processed/utein_mcar_scaled.csv', index_col = 0)

sugars_mar_scaled_df = pd.read_csv(f'{ROOT_DIR}/data/processed/sugars_mar_scaled.csv', index_col = 0)
sugars_mcar_scaled_df = pd.read_csv(f'{ROOT_DIR}/data/processed/sugars_mcar_scaled.csv', index_col = 0)

water_mcar_scaled_df = pd.read_csv(f'{ROOT_DIR}/data/processed/water_mcar_scaled.csv', index_col = 0)
water_mar_scaled_df = pd.read_csv(f'{ROOT_DIR}/data/processed/water_mar_scaled.csv', index_col = 0)

# Scalers to return back to origin scale for model evaluation
scaler_lutein_mar = load(f'{ROOT_DIR}/models/scaler_lutein_mar.joblib')
scaler_lutein_mcar = load(f'{ROOT_DIR}/models/scaler_lutein_mcar.joblib')

scaler_sugars_mar = load(f'{ROOT_DIR}/models/scaler_sugars_mar.joblib')
scaler_sugars_mcar = load(f'{ROOT_DIR}/models/scaler_sugars_mcar.joblib')

scaler_water_mcar = load(f'{ROOT_DIR}/models/scaler_water_mcar.joblib')
scaler_water_mar = load(f'{ROOT_DIR}/models/scaler_water_mar.joblib')

In [7]:
water_mcar_scaled_df = pd.DataFrame(scaler_water_mcar.transform(water_df[water_df.water_mcar.isnull()].drop(['name','water_mcar', 'water_mar'], axis = 'columns')),columns = water_df.drop(['name','water_mcar', 'water_mar'], axis = 'columns').columns)
# a = np.array(x) # your x
# b = np.array(y) # your y
# mses = ((a-b)**2).mean(axis=1)

Feature names unseen at fit time:
- water
Feature names seen at fit time, yet now missing:
- water_mcar



In [41]:
scaler_water_mcar.transformwater_df[water_df.water_mcar.isnull()]['water']

1        8.82
2       66.10
3       92.07
13      57.40
20       3.50
        ...  
7166    62.22
7167    44.92
7170    54.74
7186    66.27
7187    47.33
Name: water, Length: 1476, dtype: float64

In [20]:
water_mcar_train_df = water_mcar_scaled_df[water_mcar_scaled_df.dataset_type == 'training'].drop(['name','dataset_type','serving_size'], axis = 'columns') 
water_mcar_test_df = water_mcar_scaled_df[water_mcar_scaled_df.dataset_type == 'validation'].drop(['name','dataset_type','serving_size'], axis = 'columns') 

water_mcar_train_df

Unnamed: 0,total_fat,saturated_fat,cholesterol,sodium,choline,folate,folic_acid,niacin,pantothenic_acid,riboflavin,thiamin,vitamin_a,vitamin_a_rae,carotene_alpha,carotene_beta,cryptoxanthin_beta,lutein_zeaxanthin,vitamin_b12,vitamin_b6,vitamin_c,vitamin_d,vitamin_e,tocopherol_alpha,vitamin_k,calcium,copper,irom,magnesium,manganese,phosphorous,potassium,selenium,zink,protein,alanine,arginine,aspartic_acid,cystine,glutamic_acid,glycine,histidine,hydroxyproline,isoleucine,leucine,lysine,methionine,phenylalanine,proline,serine,threonine,tryptophan,tyrosine,valine,carbohydrate,fiber,sugars,fructose,galactose,glucose,lactose,maltose,sucrose,fat,saturated_fatty_acids,monounsaturated_fatty_acids,polyunsaturated_fatty_acids,fatty_acids_total_trans,alcohol,ash,caffeine,theobromine,water_mcar
0,3.596307,0.291893,-0.366113,-0.435551,0.228384,-0.209526,-0.198124,-0.598476,0.235663,-0.329650,0.875784,-0.151787,-0.116302,-0.06443,-0.077065,0.039505,-0.097625,-0.312098,-0.176382,-0.096600,-0.125774,0.091222,0.091222,-0.080784,-0.047544,1.717884,-0.076167,1.442291,0.626420,0.574706,0.415869,-0.376594,0.631948,-0.397119,-0.271142,0.627573,0.017739,0.191368,0.047029,-0.112145,-0.164825,-0.367017,-0.253714,-0.253840,-0.543016,-0.253487,-0.025040,-0.283553,0.073584,-0.258726,-0.157730,-0.349888,-0.193107,-0.330509,1.660769,-0.194171,-0.121733,-0.062457,-0.114107,-0.07869,-0.17322,0.973011,3.595641,0.289465,4.968632,3.479097,-0.366113,-0.024432,-0.140161,-0.032626,-0.083373,-1.540787
4,-0.722310,-0.580640,-0.366113,-0.431514,-0.269964,0.582661,-0.198124,-0.523319,-0.349968,0.476815,-0.060213,1.215804,0.165906,-0.06443,2.799010,-0.039304,2.162443,-0.312098,-0.309146,1.007493,-0.125774,0.239586,0.239586,1.618233,0.158546,0.142626,-0.122968,0.157854,0.034281,-0.664367,1.170771,-0.476189,-0.530305,-0.805557,-0.859682,-0.565207,-0.841020,-0.431133,-0.933870,-0.779530,-0.554229,-0.367017,-0.397900,-0.478036,-0.584613,-0.596364,-0.507781,-0.882456,-0.907343,-0.542273,-0.501779,-0.441628,-0.461845,-0.585074,0.345494,-0.265708,-0.162020,-0.062457,-0.142311,-0.07869,-0.17322,-0.156322,-0.719623,-0.587547,-0.622010,-0.412658,-0.366113,-0.024432,0.057987,-0.032626,-0.083373,1.226795
5,0.628393,0.844497,0.203946,-0.356145,0.746522,-0.239995,-0.198124,0.442654,0.075535,-0.131745,-0.265675,-0.167846,-0.119859,-0.06443,-0.106167,-0.039304,-0.117689,0.189493,-0.342337,-0.120460,-0.110899,-0.195934,-0.195934,-0.079168,-0.348324,-0.143630,-0.239971,-0.247758,-0.074419,-0.110495,-0.180439,0.138548,0.315996,0.323254,0.616858,0.387023,0.505818,0.516766,0.354331,0.412333,0.524525,-0.367017,0.624687,0.497107,0.649102,0.544361,0.493228,0.262554,0.365378,0.563356,0.606823,0.495610,0.642583,-0.823283,-0.479340,-0.490007,-0.162020,-0.062457,-0.142311,-0.07869,-0.17322,-0.156322,0.653774,0.872782,0.687107,-0.130826,0.203946,-0.024432,-0.425862,-0.032626,-0.083373,0.344363
6,0.688963,1.571608,0.196137,0.697674,-0.223187,0.095161,-0.198124,-0.715123,0.612302,0.555977,-0.435857,0.067303,0.165906,-0.06443,-0.094124,-0.039304,-0.117689,-0.029817,-0.141117,-0.120460,0.008100,-0.193541,-0.193541,-0.105033,1.723714,-0.279136,-0.443890,-0.264659,-0.071447,0.974408,-0.291454,-0.009126,0.025433,0.639086,0.354461,0.034299,0.349596,-0.112809,1.311633,-0.221166,0.942871,-0.367017,0.945312,1.097864,0.957531,1.005926,1.393926,2.988139,1.398041,0.579676,1.478413,1.956015,1.311829,-0.806929,-0.479340,-0.455729,-0.162020,-0.062457,-0.142311,-0.07869,-0.17322,-0.156322,0.705269,1.610147,0.333480,-0.336492,0.196137,-0.024432,0.869012,-0.032626,-0.083373,0.085934
7,0.325545,-0.202543,-0.366113,0.223927,0.975007,0.399848,-0.198124,1.754657,-0.413117,1.575191,1.788951,-0.167846,-0.119859,-0.06443,-0.106167,-0.039304,-0.117689,0.599886,2.499641,-0.120460,-0.125774,0.581780,0.581780,-0.137364,0.091706,1.252082,-0.164755,-0.213957,-0.077391,1.562539,1.018522,-0.472754,-0.251025,0.951020,-0.859682,-0.839409,-0.841020,-0.883861,-0.933870,-0.779530,-0.854175,-0.367017,-0.891171,-0.904661,-0.834197,-0.856819,-0.915290,-0.882456,-0.907343,-0.882938,-0.868764,-0.882973,-0.905697,-0.503300,0.880521,-0.430393,-0.162020,-0.062457,-0.142311,-0.07869,-0.17322,-0.156322,0.326024,-0.195081,-0.029751,1.235772,-0.366113,-0.024432,1.477280,-0.032626,-0.083373,-0.143182
...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...
7194,-0.552715,-0.406134,0.118047,-0.362874,0.660166,-0.346635,-0.198124,0.543009,-0.145486,-0.072373,-0.363218,-0.164692,-0.116302,-0.06443,-0.106167,-0.039304,-0.117689,0.044010,0.696956,-0.120460,-0.118337,-0.188755,-0.188755,-0.113116,-0.365034,-0.233403,-0.109596,-0.399863,-0.076765,0.243526,0.101856,0.251879,0.389342,0.994886,1.295829,1.151049,1.271213,0.806795,1.122317,0.920606,1.458568,1.517980,1.180564,1.294852,1.444524,1.236709,1.051201,0.853207,1.091761,1.371160,1.134364,1.284080,1.126313,-0.823283,-0.479340,-0.490007,-0.162020,-0.062457,-0.142311,-0.07869,-0.17322,-0.156322,-0.552416,-0.412697,-0.416998,-0.424165,0.118047,-0.024432,-0.315268,-0.032626,-0.083373,0.783726
7195,-0.225639,-0.042578,0.485071,-0.368257,-0.500247,-0.377104,-0.198124,0.816271,0.022911,0.585663,-0.224168,-0.167846,-0.119859,-0.06443,-0.106167,-0.039304,-0.117689,0.328462,-0.321593,-0.120460,-0.125774,-0.198327,-0.198327,-0.137364,-0.365034,-0.121610,-0.106253,-0.230858,-0.072855,0.397696,-0.288282,-0.438411,0.567065,1.593409,1.779115,1.351715,1.567018,1.613217,1.369017,1.349324,1.611172,-0.367017,1.818021,1.600672,1.816871,1.645524,1.602905,1.165033,1.369069,1.701625,1.776588,1.584095,1.863177,-0.823283,-0.479340,-0.490007,-0.162020,-0.062457,-0.142311,-0.07869,-0.17322,-0.156322,-0.227696,-0.048015,-0.152704,-0.373753,0.485071,-0.024432,-0.089472,-0.032626,-0.083373,0.360536
7196,0.628393,1.135341,0.242991,-0.383062,-0.500247,-0.369487,-0.198124,0.570813,-0.022195,0.140375,-0.224168,-0.167846,-0.119859,-0.06443,-0.106167,-0.039304,-0.117689,0.213379,-0.383826,-0.120460,-0.125774,-0.193541,-0.193541,-0.137364,-0.365034,-0.174119,-0.250000,-0.349161,-0.074575,-0.047685,-0.453219,-0.462451,0.028254,0.340800,0.633165,0.399486,0.520608,0.530914,0.368811,0.425593,0.540311,-0.367017,0.641761,0.512343,0.665335,0.560846,0.507856,0.275753,0.379865,0.577636,0.629759,0.512966,0.659921,-0.823283,-0.479340,-0.490007,-0.162020,-0.062457,-0.142311,-0.07869,-0.17322,-0.156322,0.613184,1.073525,0.566349,-0.289733,0.242991,-0.024432,-0.402822,-0.032626,-0.083373,0.355482
7197,-0.583000,-0.449760,0.102428,-0.364220,0.654769,-0.346635,-0.198124,0.607741,-0.146238,-0.196064,-0.363218,-0.166699,-0.118673,-0.06443,-0.106167,-0.039304,-0.117689,0.087438,0.723923,-0.120460,-0.118337,-0.186362,-0.186362,-0.113116,-0.365034,-0.243566,-0.256686,-0.399863,-0.077234,0.260656,0.127231,0.269050,0.318817,0.987088,1.401085,1.296875,1.440376,0.927051,1.300906,0.842524,1.640114,1.023168,1.370284,1.487486,1.658598,1.381772,1.212115,0.887855,1.238693,1.566991,1.325502,1.482437,1.277153,-0.823283,-0.479340,-0.490007,-0.162020,-0.062457,-0.142311,-0.07869,-0.17322,-0.156322,-0.580284,-0.451537,-0.456518,-0.426174,0.102428,-0.024432,-0.319876,-0.032626,-0.083373,0.814724


In [32]:
input_shape = 72
latent_shape = 40
enc_input = keras.Input(shape = (input_shape,),name = 'input_data')
latent_layer = keras.layers.Dense(latent_shape, activation = 'relu')(enc_input)
encoder = keras.Model(enc_input,latent_layer, name = "encoder")

# dec_output = keras.layers.Dense(input_shape, activation = 'relu')(latent_layer)
dec_output = keras.layers.Dense(input_shape)(latent_layer)
# dec_output_reshaped = keras.layers.Reshape((input_shape,))(dec_output)
opt = keras.optimizers.Adam(lr = 0.0001)
auto_encoder = keras.Model(enc_input,dec_output, name="auto_encoder")
auto_encoder.summary()

Model: "auto_encoder"
_________________________________________________________________
 Layer (type)                Output Shape              Param #   
 input_data (InputLayer)     [(None, 72)]              0         
                                                                 
 dense_2 (Dense)             (None, 40)                2920      
                                                                 
 dense_3 (Dense)             (None, 72)                2952      
                                                                 
Total params: 5,872
Trainable params: 5,872
Non-trainable params: 0
_________________________________________________________________


  super().__init__(name, **kwargs)


In [33]:
callback = tf.keras.callbacks.EarlyStopping(monitor='val_loss', patience=3)
auto_encoder.compile(opt, loss = "mse")
auto_encoder.fit(water_mcar_train_df,water_mcar_train_df, epochs = 1000, validation_split = 0.1, callbacks=[callback])

Epoch 1/1000
Epoch 2/1000
Epoch 3/1000
Epoch 4/1000
Epoch 5/1000
Epoch 6/1000
Epoch 7/1000
Epoch 8/1000
Epoch 9/1000
Epoch 10/1000
Epoch 11/1000
Epoch 12/1000
Epoch 13/1000
Epoch 14/1000
Epoch 15/1000
Epoch 16/1000
Epoch 17/1000
Epoch 18/1000
Epoch 19/1000
Epoch 20/1000
Epoch 21/1000
Epoch 22/1000
Epoch 23/1000
Epoch 24/1000
Epoch 25/1000
Epoch 26/1000
Epoch 27/1000
Epoch 28/1000
Epoch 29/1000
Epoch 30/1000
Epoch 31/1000
Epoch 32/1000
Epoch 33/1000
Epoch 34/1000
Epoch 35/1000
Epoch 36/1000
Epoch 37/1000
Epoch 38/1000
Epoch 39/1000
Epoch 40/1000
Epoch 41/1000
Epoch 42/1000
Epoch 43/1000
Epoch 44/1000
Epoch 45/1000
Epoch 46/1000
Epoch 47/1000
Epoch 48/1000
Epoch 49/1000
Epoch 50/1000
Epoch 51/1000
Epoch 52/1000
Epoch 53/1000
Epoch 54/1000
Epoch 55/1000
Epoch 56/1000
Epoch 57/1000
Epoch 58/1000
Epoch 59/1000
Epoch 60/1000
Epoch 61/1000
Epoch 62/1000
Epoch 63/1000
Epoch 64/1000
Epoch 65/1000
Epoch 66/1000
Epoch 67/1000
Epoch 68/1000
Epoch 69/1000
Epoch 70/1000
Epoch 71/1000
Epoch 72/1000
E

Epoch 160/1000
Epoch 161/1000
Epoch 162/1000
Epoch 163/1000
Epoch 164/1000
Epoch 165/1000
Epoch 166/1000
Epoch 167/1000
Epoch 168/1000
Epoch 169/1000
Epoch 170/1000
Epoch 171/1000
Epoch 172/1000
Epoch 173/1000
Epoch 174/1000
Epoch 175/1000
Epoch 176/1000
Epoch 177/1000
Epoch 178/1000
Epoch 179/1000
Epoch 180/1000
Epoch 181/1000
Epoch 182/1000
Epoch 183/1000
Epoch 184/1000
Epoch 185/1000
Epoch 186/1000
Epoch 187/1000
Epoch 188/1000
Epoch 189/1000
Epoch 190/1000
Epoch 191/1000
Epoch 192/1000
Epoch 193/1000
Epoch 194/1000
Epoch 195/1000
Epoch 196/1000
Epoch 197/1000
Epoch 198/1000
Epoch 199/1000
Epoch 200/1000
Epoch 201/1000
Epoch 202/1000
Epoch 203/1000
Epoch 204/1000
Epoch 205/1000
Epoch 206/1000
Epoch 207/1000
Epoch 208/1000
Epoch 209/1000
Epoch 210/1000
Epoch 211/1000
Epoch 212/1000
Epoch 213/1000
Epoch 214/1000
Epoch 215/1000
Epoch 216/1000
Epoch 217/1000
Epoch 218/1000
Epoch 219/1000
Epoch 220/1000
Epoch 221/1000
Epoch 222/1000


<keras.callbacks.History at 0x216d711b7f0>

In [16]:
water_mcar_train_df[water_mcar_train_df.index == 0] 

Unnamed: 0,total_fat,saturated_fat,cholesterol,sodium,choline,folate,folic_acid,niacin,pantothenic_acid,riboflavin,thiamin,vitamin_a,vitamin_a_rae,carotene_alpha,carotene_beta,cryptoxanthin_beta,lutein_zeaxanthin,vitamin_b12,vitamin_b6,vitamin_c,vitamin_d,vitamin_e,tocopherol_alpha,vitamin_k,calcium,copper,irom,magnesium,manganese,phosphorous,potassium,selenium,zink,protein,alanine,arginine,aspartic_acid,cystine,glutamic_acid,glycine,histidine,hydroxyproline,isoleucine,leucine,lysine,methionine,phenylalanine,proline,serine,threonine,tryptophan,tyrosine,valine,carbohydrate,fiber,sugars,fructose,galactose,glucose,lactose,maltose,sucrose,fat,saturated_fatty_acids,monounsaturated_fatty_acids,polyunsaturated_fatty_acids,fatty_acids_total_trans,alcohol,ash,caffeine,theobromine,water_mcar
0,3.596307,0.291893,-0.366113,-0.435551,0.228384,-0.209526,-0.198124,-0.598476,0.235663,-0.32965,0.875784,-0.151787,-0.116302,-0.06443,-0.077065,0.039505,-0.097625,-0.312098,-0.176382,-0.0966,-0.125774,0.091222,0.091222,-0.080784,-0.047544,1.717884,-0.076167,1.442291,0.62642,0.574706,0.415869,-0.376594,0.631948,-0.397119,-0.271142,0.627573,0.017739,0.191368,0.047029,-0.112145,-0.164825,-0.367017,-0.253714,-0.25384,-0.543016,-0.253487,-0.02504,-0.283553,0.073584,-0.258726,-0.15773,-0.349888,-0.193107,-0.330509,1.660769,-0.194171,-0.121733,-0.062457,-0.114107,-0.07869,-0.17322,0.973011,3.595641,0.289465,4.968632,3.479097,-0.366113,-0.024432,-0.140161,-0.032626,-0.083373,-1.540787


In [34]:
max_water_mcar = water_mcar_train_df.water_mcar.max()
min_water_mcar = water_mcar_train_df.water_mcar.min()

In [67]:
water_mcar_test_df['water_mcar'] = 0#np.random.uniform(min_water_mcar, max_water_mcar, size=len(water_mcar_test_df))

In [37]:
water_mcar_test_df

Unnamed: 0,total_fat,saturated_fat,cholesterol,sodium,choline,folate,folic_acid,niacin,pantothenic_acid,riboflavin,thiamin,vitamin_a,vitamin_a_rae,carotene_alpha,carotene_beta,cryptoxanthin_beta,lutein_zeaxanthin,vitamin_b12,vitamin_b6,vitamin_c,vitamin_d,vitamin_e,tocopherol_alpha,vitamin_k,calcium,copper,irom,magnesium,manganese,phosphorous,potassium,selenium,zink,protein,alanine,arginine,aspartic_acid,cystine,glutamic_acid,glycine,histidine,hydroxyproline,isoleucine,leucine,lysine,methionine,phenylalanine,proline,serine,threonine,tryptophan,tyrosine,valine,carbohydrate,fiber,sugars,fructose,galactose,glucose,lactose,maltose,sucrose,fat,saturated_fatty_acids,monounsaturated_fatty_acids,polyunsaturated_fatty_acids,fatty_acids_total_trans,alcohol,ash,caffeine,theobromine,water_mcar
1,-0.619342,-0.551556,-0.366113,-0.419401,-0.264566,-0.377104,-0.198124,-0.121463,0.295053,0.016684,0.315431,-0.165265,-0.119859,-0.06443,-0.101149,-0.039304,-0.039796,-0.312098,0.387865,-0.120460,-0.125774,-0.224649,-0.224649,-0.106650,0.565155,1.057292,0.776284,2.507023,1.367768,1.442629,0.469791,-0.355989,0.378058,0.005471,0.247723,-0.195034,-0.083019,0.785573,0.862210,-0.076787,-0.062212,-0.367017,0.059323,0.257674,-0.452719,0.554252,0.543382,0.213058,0.379865,0.157415,0.193964,0.252622,0.283687,1.776759,1.304084,-0.352894,0.311358,-0.062457,0.372412,-0.07869,-0.150615,0.023213,-0.620268,-0.544198,-0.549419,-0.273112,-0.366113,-0.024432,0.265352,-0.032626,-0.083373,-0.003348
2,-0.643570,-0.435218,-0.358304,-0.373641,-0.361717,-0.346635,-0.198124,-0.838286,-0.244720,-0.411286,-0.437932,-0.154655,-0.105630,-0.06443,-0.105163,0.004479,-0.109427,-0.283870,-0.564302,-0.070570,-0.125774,-0.241400,-0.241400,-0.137364,-0.136664,-0.267279,-0.475648,-0.467465,-0.075670,-0.778567,-0.580093,-0.455583,-0.510558,-1.183777,-0.859682,-0.839409,-0.841020,-0.883861,-0.933870,-0.779530,-0.854175,-0.367017,-0.891171,-0.904661,-0.834197,-0.856819,-0.915290,-0.882456,-0.907343,-0.882938,-0.868764,-0.882973,-0.905697,0.257549,-0.189534,1.322267,-0.162020,-0.062457,-0.142311,-0.07869,-0.173220,-0.156322,-0.643289,-0.440772,-0.557515,-0.454119,-0.358304,-0.024432,-0.642443,-0.032626,-0.083373,1.408102
3,-0.746538,-0.595182,-0.366113,-0.395175,0.296750,0.057075,-0.198124,-0.741841,0.088316,-0.502818,-0.390198,-0.167846,-0.119859,-0.06443,-0.106167,-0.039304,-0.116508,-0.312098,-0.230317,0.925066,-0.125774,-0.224649,-0.224649,0.113204,-0.314904,-0.248647,-0.428847,-0.349161,-0.053148,-0.755727,0.063794,-0.486491,-0.569799,-1.103844,-0.687716,-0.732221,-0.677402,-0.742383,-0.796040,-0.674929,-0.706833,-0.367017,-0.756470,-0.789298,-0.614036,-0.790881,-0.779454,-0.765315,-0.729369,-0.727905,-0.715853,-0.756520,-0.688973,-0.646582,-0.033484,-0.347678,0.814953,-0.062457,0.520483,-0.07869,-0.173220,-0.156322,-0.747491,-0.590602,-0.625578,-0.463069,-0.366113,-0.024432,-0.476551,-0.032626,-0.083373,1.651227
13,-0.710196,-0.580640,-0.366113,-0.388446,-0.500247,-0.232377,-0.198124,-0.495732,-0.325912,-0.282648,0.292602,-0.167846,-0.119859,-0.06443,-0.106167,-0.039304,-0.117689,-0.312098,-0.456431,-0.120460,-0.125774,-0.243793,-0.243793,-0.137364,-0.270344,2.032935,-0.164755,-0.197057,0.005816,-0.750017,0.974116,-0.483057,-0.253846,-0.160244,-0.271142,-0.325903,0.762798,0.509692,-0.504827,-0.204961,-0.220078,-0.367017,-0.084863,-0.208130,-0.233572,-0.385363,0.027205,0.237806,0.052889,0.037060,1.057909,-0.007722,0.132847,0.175776,-0.479340,-0.490007,-0.162020,-0.062457,-0.142311,-0.07869,-0.173220,-0.156322,-0.709930,-0.577219,-0.598133,-0.436950,-0.366113,-0.024432,0.094852,-0.032626,-0.083373,-1.192960
20,-0.740481,-0.595182,-0.366113,-0.426130,-0.500247,-0.377104,-0.198124,-0.851971,-0.413117,-0.651247,-0.493967,-0.167846,-0.119859,-0.06443,-0.106167,-0.039304,-0.117689,-0.312098,-0.612014,-0.120460,-0.125774,-0.243793,-0.243793,-0.137364,-0.326044,-0.314706,-0.499049,-0.602669,-0.077391,-1.006968,-0.884591,-0.489926,-0.645966,-1.291004,-0.859682,-0.839409,-0.841020,-0.883861,-0.933870,-0.779530,-0.854175,-0.367017,-0.891171,-0.904661,-0.834197,-0.856819,-0.915290,-0.882456,-0.907343,-0.882938,-0.868764,-0.882973,-0.905697,2.547208,0.055687,-0.490007,-0.162020,-0.062457,-0.142311,-0.07869,-0.173220,-0.156322,-0.740221,-0.601075,-0.617482,-0.426356,-0.366113,-0.024432,-0.227714,-0.032626,-0.083373,1.330437
...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...
7166,-0.213525,-0.115289,0.321081,-0.357491,1.439172,-0.308549,-0.198124,0.961156,0.004869,-0.292543,-0.338314,-0.167846,-0.119859,-0.06443,-0.106167,-0.039304,-0.117689,0.072238,0.609829,-0.120460,-0.081150,-0.143288,-0.143288,-0.113116,-0.348324,-0.163956,-0.178126,-0.197057,-0.075670,0.243526,0.231902,0.609044,0.851987,1.466686,1.690166,1.440208,1.541135,1.698103,1.343811,1.758889,1.521714,3.132008,1.550517,1.544079,1.591637,1.572992,1.419004,1.343219,1.398041,1.422158,0.553304,1.351026,1.528554,-0.823283,-0.479340,-0.490007,-0.162020,-0.062457,-0.142311,-0.07869,-0.173220,-0.156322,-0.216185,-0.108383,-0.134591,-0.406265,0.321081,-0.024432,-0.319876,-0.032626,-0.083373,-0.744752
7167,0.628393,0.728159,0.570970,-0.338648,-0.500247,-0.232377,-0.198124,0.583847,0.045465,-0.032792,-0.348691,-0.167846,-0.119859,-0.06443,-0.106167,-0.039304,-0.117689,0.250292,-0.383826,-0.120460,-0.125774,-0.243793,-0.243793,-0.137364,-0.298194,-0.075877,-0.092881,-0.163256,-0.073793,0.192135,0.101856,0.794495,1.114340,1.740603,1.912537,1.462643,1.689038,1.740546,1.485931,1.458345,1.737465,-0.367017,1.954619,1.728007,1.951808,1.774102,1.730382,1.268974,1.484959,1.832179,1.906562,1.708069,2.003614,-0.823283,-0.479340,-0.490007,-0.162020,-0.062457,-0.142311,-0.07869,-0.173220,-0.156322,0.607731,0.730226,0.691223,-0.176489,0.570970,-0.024432,-0.310660,-0.032626,-0.083373,0.690429
7170,0.446684,0.655448,0.305463,-0.339994,0.863463,-0.331401,-0.198124,0.157880,0.147706,-0.186169,-0.356992,-0.160964,-0.111559,-0.06443,-0.106167,-0.039304,-0.117689,0.315434,0.128560,-0.120460,-0.081150,-0.219863,-0.219863,-0.111499,-0.353894,-0.180894,-0.087867,-0.247758,-0.075514,0.089355,0.171637,0.413290,1.788559,1.141105,1.249873,1.207136,1.226842,0.969494,1.181847,0.897034,1.271759,1.635792,1.121751,1.279615,1.378577,1.480679,1.067919,0.808661,1.075205,1.356881,1.279629,1.249367,1.048292,-0.823283,-0.479340,-0.490007,-0.162020,-0.062457,-0.142311,-0.07869,-0.173220,-0.156322,0.468392,0.652693,0.670502,-0.322793,0.305463,-0.024432,-0.393606,-0.032626,-0.083373,1.537408
7186,-0.437633,-0.347965,0.125856,-0.339994,1.273656,-0.316166,-0.198124,-0.062380,-0.413117,-0.082268,-0.263600,-0.167846,-0.119859,-0.06443,-0.106167,-0.039304,-0.117689,0.424004,0.055955,-0.120460,-0.125774,-0.205506,-0.205506,-0.137364,-0.392884,-0.114835,-0.009308,-0.213957,-0.077391,0.289206,0.044762,0.361776,1.562879,1.233710,1.460384,1.335513,1.545757,1.082676,1.292862,1.034047,1.342799,1.046731,1.645377,1.587612,1.632219,1.566398,1.431543,0.924152,1.242832,1.558831,1.271983,1.470040,1.521619,-0.823283,-0.479340,-0.490007,-0.162020,-0.062457,-0.142311,-0.07869,-0.173220,-0.156322,-0.436098,-0.343601,-0.272363,-0.398593,0.125856,-0.024432,-0.200066,-0.032626,-0.083373,0.186553


In [58]:
water_mcar_test_df

Unnamed: 0,total_fat,saturated_fat,cholesterol,sodium,choline,folate,folic_acid,niacin,pantothenic_acid,riboflavin,thiamin,vitamin_a,vitamin_a_rae,carotene_alpha,carotene_beta,cryptoxanthin_beta,lutein_zeaxanthin,vitamin_b12,vitamin_b6,vitamin_c,vitamin_d,vitamin_e,tocopherol_alpha,vitamin_k,calcium,copper,irom,magnesium,manganese,phosphorous,potassium,selenium,zink,protein,alanine,arginine,aspartic_acid,cystine,glutamic_acid,glycine,histidine,hydroxyproline,isoleucine,leucine,lysine,methionine,phenylalanine,proline,serine,threonine,tryptophan,tyrosine,valine,carbohydrate,fiber,sugars,fructose,galactose,glucose,lactose,maltose,sucrose,fat,saturated_fatty_acids,monounsaturated_fatty_acids,polyunsaturated_fatty_acids,fatty_acids_total_trans,alcohol,ash,caffeine,theobromine,water_mcar
1,-0.619342,-0.551556,-0.366113,-0.419401,-0.264566,-0.377104,-0.198124,-0.121463,0.295053,0.016684,0.315431,-0.165265,-0.119859,-0.06443,-0.101149,-0.039304,-0.039796,-0.312098,0.387865,-0.120460,-0.125774,-0.224649,-0.224649,-0.106650,0.565155,1.057292,0.776284,2.507023,1.367768,1.442629,0.469791,-0.355989,0.378058,0.005471,0.247723,-0.195034,-0.083019,0.785573,0.862210,-0.076787,-0.062212,-0.367017,0.059323,0.257674,-0.452719,0.554252,0.543382,0.213058,0.379865,0.157415,0.193964,0.252622,0.283687,1.776759,1.304084,-0.352894,0.311358,-0.062457,0.372412,-0.07869,-0.150615,0.023213,-0.620268,-0.544198,-0.549419,-0.273112,-0.366113,-0.024432,0.265352,-0.032626,-0.083373,-0.003348
2,-0.643570,-0.435218,-0.358304,-0.373641,-0.361717,-0.346635,-0.198124,-0.838286,-0.244720,-0.411286,-0.437932,-0.154655,-0.105630,-0.06443,-0.105163,0.004479,-0.109427,-0.283870,-0.564302,-0.070570,-0.125774,-0.241400,-0.241400,-0.137364,-0.136664,-0.267279,-0.475648,-0.467465,-0.075670,-0.778567,-0.580093,-0.455583,-0.510558,-1.183777,-0.859682,-0.839409,-0.841020,-0.883861,-0.933870,-0.779530,-0.854175,-0.367017,-0.891171,-0.904661,-0.834197,-0.856819,-0.915290,-0.882456,-0.907343,-0.882938,-0.868764,-0.882973,-0.905697,0.257549,-0.189534,1.322267,-0.162020,-0.062457,-0.142311,-0.07869,-0.173220,-0.156322,-0.643289,-0.440772,-0.557515,-0.454119,-0.358304,-0.024432,-0.642443,-0.032626,-0.083373,1.408102
3,-0.746538,-0.595182,-0.366113,-0.395175,0.296750,0.057075,-0.198124,-0.741841,0.088316,-0.502818,-0.390198,-0.167846,-0.119859,-0.06443,-0.106167,-0.039304,-0.116508,-0.312098,-0.230317,0.925066,-0.125774,-0.224649,-0.224649,0.113204,-0.314904,-0.248647,-0.428847,-0.349161,-0.053148,-0.755727,0.063794,-0.486491,-0.569799,-1.103844,-0.687716,-0.732221,-0.677402,-0.742383,-0.796040,-0.674929,-0.706833,-0.367017,-0.756470,-0.789298,-0.614036,-0.790881,-0.779454,-0.765315,-0.729369,-0.727905,-0.715853,-0.756520,-0.688973,-0.646582,-0.033484,-0.347678,0.814953,-0.062457,0.520483,-0.07869,-0.173220,-0.156322,-0.747491,-0.590602,-0.625578,-0.463069,-0.366113,-0.024432,-0.476551,-0.032626,-0.083373,1.651227
13,-0.710196,-0.580640,-0.366113,-0.388446,-0.500247,-0.232377,-0.198124,-0.495732,-0.325912,-0.282648,0.292602,-0.167846,-0.119859,-0.06443,-0.106167,-0.039304,-0.117689,-0.312098,-0.456431,-0.120460,-0.125774,-0.243793,-0.243793,-0.137364,-0.270344,2.032935,-0.164755,-0.197057,0.005816,-0.750017,0.974116,-0.483057,-0.253846,-0.160244,-0.271142,-0.325903,0.762798,0.509692,-0.504827,-0.204961,-0.220078,-0.367017,-0.084863,-0.208130,-0.233572,-0.385363,0.027205,0.237806,0.052889,0.037060,1.057909,-0.007722,0.132847,0.175776,-0.479340,-0.490007,-0.162020,-0.062457,-0.142311,-0.07869,-0.173220,-0.156322,-0.709930,-0.577219,-0.598133,-0.436950,-0.366113,-0.024432,0.094852,-0.032626,-0.083373,-1.192960
20,-0.740481,-0.595182,-0.366113,-0.426130,-0.500247,-0.377104,-0.198124,-0.851971,-0.413117,-0.651247,-0.493967,-0.167846,-0.119859,-0.06443,-0.106167,-0.039304,-0.117689,-0.312098,-0.612014,-0.120460,-0.125774,-0.243793,-0.243793,-0.137364,-0.326044,-0.314706,-0.499049,-0.602669,-0.077391,-1.006968,-0.884591,-0.489926,-0.645966,-1.291004,-0.859682,-0.839409,-0.841020,-0.883861,-0.933870,-0.779530,-0.854175,-0.367017,-0.891171,-0.904661,-0.834197,-0.856819,-0.915290,-0.882456,-0.907343,-0.882938,-0.868764,-0.882973,-0.905697,2.547208,0.055687,-0.490007,-0.162020,-0.062457,-0.142311,-0.07869,-0.173220,-0.156322,-0.740221,-0.601075,-0.617482,-0.426356,-0.366113,-0.024432,-0.227714,-0.032626,-0.083373,1.330437
...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...
7166,-0.213525,-0.115289,0.321081,-0.357491,1.439172,-0.308549,-0.198124,0.961156,0.004869,-0.292543,-0.338314,-0.167846,-0.119859,-0.06443,-0.106167,-0.039304,-0.117689,0.072238,0.609829,-0.120460,-0.081150,-0.143288,-0.143288,-0.113116,-0.348324,-0.163956,-0.178126,-0.197057,-0.075670,0.243526,0.231902,0.609044,0.851987,1.466686,1.690166,1.440208,1.541135,1.698103,1.343811,1.758889,1.521714,3.132008,1.550517,1.544079,1.591637,1.572992,1.419004,1.343219,1.398041,1.422158,0.553304,1.351026,1.528554,-0.823283,-0.479340,-0.490007,-0.162020,-0.062457,-0.142311,-0.07869,-0.173220,-0.156322,-0.216185,-0.108383,-0.134591,-0.406265,0.321081,-0.024432,-0.319876,-0.032626,-0.083373,-0.744752
7167,0.628393,0.728159,0.570970,-0.338648,-0.500247,-0.232377,-0.198124,0.583847,0.045465,-0.032792,-0.348691,-0.167846,-0.119859,-0.06443,-0.106167,-0.039304,-0.117689,0.250292,-0.383826,-0.120460,-0.125774,-0.243793,-0.243793,-0.137364,-0.298194,-0.075877,-0.092881,-0.163256,-0.073793,0.192135,0.101856,0.794495,1.114340,1.740603,1.912537,1.462643,1.689038,1.740546,1.485931,1.458345,1.737465,-0.367017,1.954619,1.728007,1.951808,1.774102,1.730382,1.268974,1.484959,1.832179,1.906562,1.708069,2.003614,-0.823283,-0.479340,-0.490007,-0.162020,-0.062457,-0.142311,-0.07869,-0.173220,-0.156322,0.607731,0.730226,0.691223,-0.176489,0.570970,-0.024432,-0.310660,-0.032626,-0.083373,0.690429
7170,0.446684,0.655448,0.305463,-0.339994,0.863463,-0.331401,-0.198124,0.157880,0.147706,-0.186169,-0.356992,-0.160964,-0.111559,-0.06443,-0.106167,-0.039304,-0.117689,0.315434,0.128560,-0.120460,-0.081150,-0.219863,-0.219863,-0.111499,-0.353894,-0.180894,-0.087867,-0.247758,-0.075514,0.089355,0.171637,0.413290,1.788559,1.141105,1.249873,1.207136,1.226842,0.969494,1.181847,0.897034,1.271759,1.635792,1.121751,1.279615,1.378577,1.480679,1.067919,0.808661,1.075205,1.356881,1.279629,1.249367,1.048292,-0.823283,-0.479340,-0.490007,-0.162020,-0.062457,-0.142311,-0.07869,-0.173220,-0.156322,0.468392,0.652693,0.670502,-0.322793,0.305463,-0.024432,-0.393606,-0.032626,-0.083373,1.537408
7186,-0.437633,-0.347965,0.125856,-0.339994,1.273656,-0.316166,-0.198124,-0.062380,-0.413117,-0.082268,-0.263600,-0.167846,-0.119859,-0.06443,-0.106167,-0.039304,-0.117689,0.424004,0.055955,-0.120460,-0.125774,-0.205506,-0.205506,-0.137364,-0.392884,-0.114835,-0.009308,-0.213957,-0.077391,0.289206,0.044762,0.361776,1.562879,1.233710,1.460384,1.335513,1.545757,1.082676,1.292862,1.034047,1.342799,1.046731,1.645377,1.587612,1.632219,1.566398,1.431543,0.924152,1.242832,1.558831,1.271983,1.470040,1.521619,-0.823283,-0.479340,-0.490007,-0.162020,-0.062457,-0.142311,-0.07869,-0.173220,-0.156322,-0.436098,-0.343601,-0.272363,-0.398593,0.125856,-0.024432,-0.200066,-0.032626,-0.083373,0.186553


In [69]:
water_mcar_pred = pd.DataFrame(auto_encoder.predict(water_mcar_test_df), columns = water_mcar_test_df.columns)



In [71]:
water_mcar_true = pd.DataFrame(scaler_water_mcar.transform(water_df[water_df.water_mcar.isnull()].drop(['name','water_mcar', 'water_mar'], axis = 'columns')),columns = water_df.drop(['name','water_mcar', 'water_mar'], axis = 'columns').columns).drop('serving_size',axis = 'columns')# a = np.array(x) # your x
# b = np.array(y) # your y
# mses = ((a-b)**2).mean(axis=1)

Feature names unseen at fit time:
- water
Feature names seen at fit time, yet now missing:
- water_mcar



In [77]:
from sklearn.metrics import mean_squared_error
pd.DataFrame({'columns':water_mcar_true.columns,
              'mse':list(mean_squared_error(water_mcar_true, water_mcar_pred, multioutput='raw_values'))}).nlargest(10,'mse')

Unnamed: 0,columns,mse
31,selenium,1.593192
57,galactose,0.66808
58,glucose,0.558205
29,phosphorous,0.404909
6,folic_acid,0.256179
56,fructose,0.250337
71,water,0.208576
5,folate,0.201098
33,protein,0.189536
17,vitamin_b12,0.161144
