# Variational autoencoder
This notebook will investiga

In [1]:
%matplotlib
import numpy as np
import numpy.random as rnd
import matplotlib.pyplot as plt
import tensorflow as tf
from tensorflow import keras
import pandas as pd
import seaborn as sns
from variational_autoencoder import VariationalAutoencoder
from autoencoder import Autoencoder
from cont_bern_loss import cont_bern_loss

# For dimension reduction
from sklearn.manifold import TSNE
from sklearn.decomposition import PCA

# for visualization and evaluation
from latent_plane_mosaic import LatentPlaneMosaic
from latent_interpolation_mosaic import LatentInterpolationMosaic
from sample_scatter_gui import SampleScatterGUI

Using matplotlib backend: Qt5Agg


In [2]:
model_df = pd.DataFrame(data = {
    'path'    : ["models\\ae", "models\\vae_10", "models\\vae_100" , "models\\vae_1000"],
    'ae_type' : ['ae', 'vae', 'vae', 'vae'],
    },
    index = ['ae', 'vae_10', 'vae_100', 'vae_1000'],
)


In [3]:
models = []
for name, model in model_df.iterrows():
    if model['ae_type'] == "ae":
        models.append(keras.models.load_model(model['path'], 
                         custom_objects={"Autoencoder": Autoencoder,
                                         "cont_bern_loss":  cont_bern_loss}))
    elif model['ae_type'] == "vae":
        models.append(keras.models.load_model(model['path'], 
                                 custom_objects={"VariationalAutoencoder": VariationalAutoencoder,
                                                 "cont_bern_loss":  cont_bern_loss}))
model_df['model'] = models    

In [4]:
num_models = model_df.shape[0]
latent_dim = 10

In [5]:
# Global settings
np.set_printoptions(precision=0)

## Preprocess data


In [6]:
num_samples = 10000
(train_digits, train_labels), (test_digits, test_labels) = keras.datasets.mnist.load_data()
train_digits = np.expand_dims(train_digits, -1).astype("float32") / 255
test_digits = np.expand_dims(test_digits, -1).astype("float32") / 255
input_shape = train_digits.shape[1:]
num_digits = len(train_labels)
sample_index = rnd.randint(0, num_digits, size = (num_samples,))
train_digits = train_digits[sample_index]
train_labels = train_labels[sample_index]
num_train = train_labels.shape[0]
num_test = test_labels.shape[0]
# Here we get the pure data
# train_digits
# test_digits
# train_labels
# test_labels

## Load Variational autoencoder

### Print setup details

## Reconstruct and dimension reductions

In [7]:
# Fill
df_list = []
latent_mean_list = []
latent_std_list =[]
latent_base_list = []

for name, model in model_df.iterrows():
    if model['ae_type'] == "ae":
        code = model['model'].encoder(test_digits).numpy()
        code_std = np.empty(shape = code.shape)*np.nan
        
    elif model['ae_type'] == "vae":
        digit_distribution = model['model'].encoder(test_digits).numpy()
        code = digit_distribution[:,0,:]
        code_std = np.sqrt(np.exp(digit_distribution[:,1,:]))
        
    latent_mean = np.mean(code)
    mean_distance = np.linalg.norm(code-latent_mean, axis = 1)
    code_tsne = TSNE(n_components = 2).fit_transform(code)
    
    temp_df = pd.DataFrame(data = name, columns = ["model_name"], index = range(num_test))
    temp_df['label'] = test_labels
    temp_df['distance'] = mean_distance
    temp_df[["latent_"+str(i) for i in range(latent_dim)]] = code
    temp_df[["latent_std_"+str(i) for i in range(latent_dim)]] = code_std
    temp_df[['tsne_0', 'tsne_1']] = code_tsne
    df_list.append(temp_df)
    
    # Calculate global code information
    pca = PCA().fit(code)
    latent_mean_list.append(pca.mean_)
    latent_std_list.append(np.sqrt(pca.explained_variance_))
    latent_base_list.append(pca.components_)
        
latent_df = pd.concat(df_list)
model_df['latent_mean'] = latent_mean_list
model_df['latent_std'] = latent_std_list
model_df['latent_base'] = latent_base_list


## Investigation

### Show reconstructions

In [47]:
num_col = 4
num_row = 3
num_img = num_row*num_col
rec_index = rnd.randint(num_samples,size =(num_row*num_col,) )
digits = test_digits
digits = digits[rec_index]

In [48]:

model = model_df.loc['ae','model']

reconstructions = model(digits)
i = 0
for col in range(num_col):
    for row in range(num_row): 
        pair = np.concatenate((digits[i], reconstructions[i]), axis = 1)
        if row == 0: 
            ver_img = pair
        else:
            ver_img = np.concatenate((ver_img,
                                 pair), axis = 0)
        i = i + 1
    if col == 0:
        img = ver_img
    else: 
        img = np.concatenate((img,
                              ver_img), axis = 1)

sns.heatmap(img[:,:,0], vmin=0, vmax = 1)

<AxesSubplot:>

In [51]:

num_dig = 14
rec_index = rnd.randint(num_test,size =(num_dig,) )
digits = test_digits
digits = digits[rec_index]

reconstructions = []
for name, model in model_df.iterrows():
    reconstructions.append(model['model'](digits))

for dig_i in range(num_dig):
    rec_im = digits[dig_i]
    model_i = 0
    for name, model in model_df.iterrows():
        
        rec_im = np.concatenate((rec_im,
                         reconstructions[model_i][dig_i]),
                         axis = 0)
        model_i = model_i + 1
    if dig_i == 0:
        im = rec_im
    else:
        im = np.concatenate((im, rec_im), axis = 1)
    
sns.heatmap(im[:,:,0], vmin=0, vmax = 1)

<AxesSubplot:>

(1, 28, 28, 1)

### Latent

In [52]:

indeces = rnd.randint(num_samples,size = (3,))
indeces = [1,2,3]
ul = test_digits[indeces[0]]
ur = test_digits[indeces[1]]
dl = test_digits[indeces[2]]
z =  np.zeros(dl.shape)
corner_image = np.concatenate( (np.concatenate((ul,ur),axis = 1),
                                np.concatenate((dl,z ),axis = 1)), axis = 0)


In [54]:

model = model_df.loc['vae_10','model']
mosaic = LatentInterpolationMosaic(
                          model.encode,
                          model.decoder,
                          test_digits,
                          indeces,
                          num_row = 15,
                          num_col = 15).mosaic



sns.heatmap(mosaic[:,:,0], vmin = 0, vmax = 1)

<AxesSubplot:>

### Scatter

In [34]:
model_name = 'vae_1000'
scatter = latent_df[latent_df['model_name']==model_name][['tsne_0', 'tsne_1']].to_numpy()
SampleScatterGUI(scatter, test_labels, test_digits)


<sample_scatter_gui.SampleScatterGUI at 0x149c4e2ba90>

In [56]:
model_name = "vae_1000"
sns.jointplot(data = latent_df[latent_df['model_name'] == model_name ], x = 'tsne_0', y = 'tsne_1',
              hue = 'label',
              palette = 'colorblind')

<seaborn.axisgrid.JointGrid at 0x149e964e9a0>

In [57]:
model_name = "vae_1000"
sns.relplot(data = latent_df[latent_df['model_name'] == model_name ], x = 'tsne_0', y = 'tsne_1',
              hue = 'label',
              palette = 'colorblind',
           kind = 'scatter')

<seaborn.axisgrid.FacetGrid at 0x149f2ca4460>

### Spread

Here we have that the first 6 principal axises are almost one and the remaining 4 are almost zero (e-3). This is combination with that the cross covariance seem to be super much zero (e-15), indicates that we have a six dimesnioal sphere in this ten dimesional latent space. Interesting to see is also if you create a latent vector in the pca-vector space. Then comparing the latent value in each dimension times the spread in each dimension, gives you the importance of that dimension in that latent point. A low importance will not have an effect on the output (kind of like a low derivative...), while a hight value will give a big difference.

In [58]:
axis_index = [0,-1]
scaling_factors = np.array([5,5])

In [59]:
model_name = 'ae'
model = model_df.loc[model_name, 'model']
latent_vectors = model_df.loc[model_name, 'latent_base'][axis_index]
latent_origin  = model_df.loc[model_name, 'latent_mean']
mosaic = LatentPlaneMosaic(model.decoder,
                  latent_vectors = (scaling_factors*latent_vectors.T).T,
                  latent_origin = latent_origin,
                  num_row = 20,
                  num_col = 20).mosaic

#fig,ax = plt.subplots(1,1)
#ax.imshow(mosaic)
sns.heatmap(mosaic[:,:,0], vmin = 0, vmax = 1)

<AxesSubplot:>

## Spred 

In [23]:
## TODO
# Make a grid work with seaborn to plot the things nice
# Maybe do a pca on the 

In [41]:
latent_df.head()

Unnamed: 0,model_name,label,distance,latent_0,latent_1,latent_2,latent_3,latent_4,latent_5,latent_6,...,latent_std_2,latent_std_3,latent_std_4,latent_std_5,latent_std_6,latent_std_7,latent_std_8,latent_std_9,tsne_0,tsne_1
0,ae,7,16.084698,8.920861,2.935239,0.10104,3.835251,5.167331,0.044287,-0.737711,...,,,,,,,,,60.183018,2.488432
1,ae,2,9.284235,-4.63037,0.333097,0.051023,1.97057,-0.373991,-7.604153,-2.687356,...,,,,,,,,,1.227274,34.697578
2,ae,1,24.278584,-5.574288,9.404783,10.561562,3.208399,4.680079,-1.272882,-8.597842,...,,,,,,,,,49.863174,62.129429
3,ae,0,7.559016,-1.396393,-3.778943,-0.89094,1.304242,2.89106,-0.907814,-5.144459,...,,,,,,,,,-59.530056,-11.259049
4,ae,4,14.996903,0.393196,3.45392,1.677722,10.656785,-0.166942,-2.436691,-7.303797,...,,,,,,,,,25.870169,-68.021873


In [40]:
sns.catplot(data = latent_df.query("model_name != 'ae'"), x = 'label',y = 'distance', col = 'model_name', kind = 'violin' )

<seaborn.axisgrid.FacetGrid at 0x149a1bc3be0>

In [61]:
sns.catplot(data= latent_df,
            x = "model_name", y = 'distance',
            kind =  'violin')

<seaborn.axisgrid.FacetGrid at 0x149f29db370>

In [73]:
sns.relplot(data = latent_df,
            x = 'latent_8',
            y = 'latent_std_8',
           hue = 'model_name')

<seaborn.axisgrid.FacetGrid at 0x14a07e83460>

In [62]:
x_vars = ['latent_' + str(i) for i in range(0,3)]
y_vars = ['latent_std_' + str(i) for i in range(0,1)]
sns.pairplot(data = latent_df,
             x_vars = x_vars,
            kind  = 'hist',
            )

<seaborn.axisgrid.PairGrid at 0x149fa5dfca0>

In [75]:
latent_df.head()

Unnamed: 0,model_name,label,distance,latent_0,latent_1,latent_2,latent_3,latent_4,latent_5,latent_6,...,latent_std_2,latent_std_3,latent_std_4,latent_std_5,latent_std_6,latent_std_7,latent_std_8,latent_std_9,tsne_0,tsne_1
0,ae,7,16.084698,8.920861,2.935239,0.10104,3.835251,5.167331,0.044287,-0.737711,...,,,,,,,,,60.183018,2.488432
1,ae,2,9.284235,-4.63037,0.333097,0.051023,1.97057,-0.373991,-7.604153,-2.687356,...,,,,,,,,,1.227274,34.697578
2,ae,1,24.278584,-5.574288,9.404783,10.561562,3.208399,4.680079,-1.272882,-8.597842,...,,,,,,,,,49.863174,62.129429
3,ae,0,7.559016,-1.396393,-3.778943,-0.89094,1.304242,2.89106,-0.907814,-5.144459,...,,,,,,,,,-59.530056,-11.259049
4,ae,4,14.996903,0.393196,3.45392,1.677722,10.656785,-0.166942,-2.436691,-7.303797,...,,,,,,,,,25.870169,-68.021873


In [32]:
sns.set()
values = ['latent_std_'+str(i) for i in range(5,10)]
rows = len(values)
fig, axs = plt.subplots(rows,4, sharey = False, sharex = False)
fig.suptitle("Best title")
for row in range(rows):
    col = 0
    for name, model in model_df.iterrows():
        ax = axs[row,col]
        col = col+1
        sns.histplot(latent_df[latent_df['model_name']== name], x=values[row],
                     ax = ax,
                     binrange = (0,1.1))

In [33]:
sns.set()
values = ['latent_'+str(i) for i in range(5,10)]
rows = len(values)
fig, axs = plt.subplots(rows,4, sharey = False, sharex = False)
fig.suptitle("Best title")
for row in range(rows):
    col = 0
    for name, model in model_df.iterrows():
        ax = axs[row,col]
        col = col+1
        sns.histplot(latent_df[latent_df['model_name']== name], x=values[row], ax = ax)

In [20]:
for i in range(4,10):
    sns.pairplot(
        latent_df,
        x_vars=["latent_"+str(i)],
        y_vars=['latent_std_'+str(i)],
        hue = 'model_name',
    )

In [11]:
sns.histplot(latent_df, x='distance', hue = 'model_name')

<AxesSubplot:xlabel='distance', ylabel='Count'>

## Generative power

In [12]:
model_name = 'ae'
mean = np.zeros(latent_dim)
mean = model_df.loc[model_name,'latent_mean']
cov  = 1*np.eye(latent_dim)
num_col = 15
num_row = 10
num_img = num_row*num_col
latent_points = rnd.multivariate_normal(mean, cov, size = (num_img))



In [14]:
model = model_df.loc[model_name, 'model']
title = "Reconstruction with autoencoder"
fig, ax = plt.subplots()
reconstructions = model.decoder(latent_points)
i = 0
for col in range(num_col):
    for row in range(num_row): 
        if row == 0: 
            ver_img = reconstructions[i]
        else:
            ver_img = np.concatenate((ver_img,
                                 reconstructions[i]), axis = 0)
        i = i + 1
    if col == 0:
        img = ver_img
    else: 
        img = np.concatenate((img,
                              ver_img), axis = 1)
sns.heatmap(img[:,:,0])



<AxesSubplot:>

In [24]:
gb = latent_df.groupby('model_name')
gb.std()

Unnamed: 0_level_0,label,distance,latent_0,latent_1,latent_2,latent_3,latent_4,latent_5,latent_6,latent_7,...,latent_std_2,latent_std_3,latent_std_4,latent_std_5,latent_std_6,latent_std_7,latent_std_8,latent_std_9,tsne_0,tsne_1
model_name,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1,Unnamed: 5_level_1,Unnamed: 6_level_1,Unnamed: 7_level_1,Unnamed: 8_level_1,Unnamed: 9_level_1,Unnamed: 10_level_1,Unnamed: 11_level_1,Unnamed: 12_level_1,Unnamed: 13_level_1,Unnamed: 14_level_1,Unnamed: 15_level_1,Unnamed: 16_level_1,Unnamed: 17_level_1,Unnamed: 18_level_1,Unnamed: 19_level_1,Unnamed: 20_level_1,Unnamed: 21_level_1
ae,2.895865,4.616839,3.059542,3.437961,2.503023,2.846906,2.707609,3.406957,2.828791,3.319199,...,,,,,,,,,41.793837,36.97304
vae_10,2.895865,0.584376,0.950227,0.917103,0.033998,0.027231,0.019836,0.04132,0.020561,0.022091,...,0.011546,0.018479,0.012477,0.011973,0.015912,0.014269,0.012981,0.062115,39.707425,43.493389
vae_100,2.895865,0.608067,0.402621,1.0197,1.003607,0.997669,0.961496,0.219146,0.139178,0.122261,...,0.016091,0.022199,0.051068,0.036448,0.038823,0.03779,0.061752,0.021849,41.366605,38.618522
vae_1000,2.895865,0.704744,1.261804,0.908298,0.652684,0.910759,0.858636,0.903943,1.185087,1.365042,...,0.014704,0.016647,0.011964,0.014934,0.013046,0.011084,0.011746,0.010003,36.963504,42.377751


In [12]:
mean = np.zeros(latent_dim)
cov  = 1*np.eye(latent_dim)
num_dig = 5
latent_points = rnd.multivariate_normal(mean, cov, size = (num_dig))

reconstructions = []
for name, model in model_df.iterrows():
    reconstructions.append(model['model'].decoder(latent_points))
    
fig, axs = plt.subplots(num_models, num_dig, sharex = True, sharey = True)
for model_i in range(num_models):
    for dig_i in range(num_dig):
        ax = axs[model_i, dig_i]
        sns.heatmap(reconstructions[model_i][dig_i][:,:,0], ax = ax)



In [26]:
tips = sns.load_dataset("tips")
tips.head()

Unnamed: 0,total_bill,tip,sex,smoker,day,time,size
0,16.99,1.01,Female,No,Sun,Dinner,2
1,10.34,1.66,Male,No,Sun,Dinner,3
2,21.01,3.5,Male,No,Sun,Dinner,3
3,23.68,3.31,Male,No,Sun,Dinner,2
4,24.59,3.61,Female,No,Sun,Dinner,4


In [30]:
sns.relplot(data = tips, x = "total_bill", y = "sex",
            units ="day")

<seaborn.axisgrid.FacetGrid at 0x149c4bbf7f0>

In [31]:
ax = sns.swarmplot(x="day", y="total_bill", hue="smoker",

                   data=tips, palette="Set2", dodge=True)

In [32]:
ax

<AxesSubplot:xlabel='day', ylabel='total_bill'>