# Evaluate model
Takes a model and creates a pandas file which sumarizes the behavior of the model

## Check if Colab

In [8]:
import os
try:
  import google.colab
  IN_COLAB = True
except:
  IN_COLAB = False

print("Is in Colab: ", IN_COLAB)
if IN_COLAB:
    os.system('git clone https://github.com/AllaVinner/JL-ML.git')
    os.system('pip install -e JL-ML')
    # Needed for colab to find the JL-ML directory
    import site
    site.main()

saved_path   = os.path.join('..','saved-models') if not IN_COLAB else  os.path.join('JL-ML','saved-models')

Is in Colab:  False


In [25]:
import yaml
import os
import keras

import pandas as pd
import numpy as np
import tensorflow as tf

from os.path import join
from sklearn.manifold import TSNE

from jlauto.models.variational_autoencoder import VariationalAutoencoder
from jlauto.models.autoencoder import Autoencoder
from jlauto.models.continuous_bernoulli_loss import continuous_bernoulli_loss


In [10]:
model_name = 'ae_latent_dim_3'

In [12]:
# Get config
model_path = join(saved_path, model_name)
# Read YAML file
with open(join(model_path, 'config.yaml'), 'r') as stream:
    config = yaml.safe_load(stream)

In [21]:
# Load model
if config['model_type'] == "autoencoder":
    model = keras.models.load_model(model_path, 
                custom_objects={"Autoencoder": Autoencoder,
                                 "continuous_bernoulli_loss": continuous_bernoulli_loss})
elif config['model_type'] == "variational_autoencoder":
    model = keras.models.load_model(model_path, 
                custom_objects={"VariationalAutoencoder": VariationalAutoencoder,
                                "continuous_bernoulli_loss":  continuous_bernoulli_loss})
  

# Get test data

In [23]:
(_, _), (test_digits, test_labels) = keras.datasets.mnist.load_data()
test_digits = np.expand_dims(test_digits, -1).astype("float32") / 255
input_shape = test_digits.shape[1:]
num_test = test_labels.shape[0]

In [26]:
# Encode digits
if config['model_type'] == "autoencoder":
        code = model.encoder(test_digits).numpy()
        code_std = np.empty(shape = code.shape)*np.nan
        
elif ['model_type'] == "variational_autoencoder":
        digit_distribution = model.encoder(test_digits).numpy()
        code = digit_distribution[:,0,:]
        code_std = np.sqrt(np.exp(digit_distribution[:,1,:]))
    
# Caclualte latent data
latent_distance = np.linalg.norm(code, axis = 1) # code distance from origo
latent_radius = np.power(np.prod(code_std, axis = 1),1/config['latent_dim']) # code's average radius 
latent_loss = tf.reduce_mean(tf.reduce_mean(tf.keras.losses.binary_crossentropy(test_digits, model(test_digits)),axis = -1),axis = -1).numpy()
latent_tsne = TSNE(n_components = 2).fit_transform(code)

latent_df = pd.DataFrame(data = {
    'model_name' : config['name'],
    'label' : test_labels,
    'loss' : latent_loss,
    'distance' : latent_distance,
    'radius' : latent_radius,
    'tsne_0' : latent_tsne[:,0],
    'tsne_1' : latent_tsne[:,1],
}) 


# Calculate dimension_df data
dim_std = np.nanstd(code, axis = 0)
dim_radius = np.nanmean(code_std, axis = 0)
dim_index = np.arange(config['latent_dim'])

dimension_df = pd.DataFrame(data = {
    'model_name' : config['name'],
    'dim_index' : dim_index,
    'code_spread' : dim_std,
    'mean_radius' : dim_radius,
})



  dim_radius = np.nanmean(code_std, axis = 0)


In [33]:
latent_df.head()

Unnamed: 0,model_name,label,loss,distance,radius,tsne_0,tsne_1
0,ae_latent_dim_3,7,0.229921,11.130809,,-34.155605,54.997852
1,ae_latent_dim_3,2,0.348439,10.334489,,-68.249336,14.420111
2,ae_latent_dim_3,1,0.153093,14.271111,,80.883606,13.235858
3,ae_latent_dim_3,0,0.286345,8.028056,,4.561124,-61.365135
4,ae_latent_dim_3,4,0.232498,8.965589,,-37.893227,-54.385307


In [29]:
dimension_df.head()

Unnamed: 0,model_name,dim_index,coda_std,mean_radius
0,ae_latent_dim_3,0,1.547557,
1,ae_latent_dim_3,1,1.342508,
2,ae_latent_dim_3,2,1.874117,


## Save data frames

In [31]:
latent_df.to_csv(join(saved_path,model_name,'latent_df.csv'), index=False)
dimension_df.to_csv(join(saved_path,model_name,'dimension_df.csv'), index=False)