In [1]:
# !git clone https://github.com/jxnl/dirichlet-vae.git

# Import Packages

In [2]:
from vae.dirichlet import SamplingReparamLaplace, SamplingReparamKL, DirVae
from vae.gauss import GaussVae
import pandas as pd
from sklearn import datasets
from sklearn.model_selection import StratifiedKFold,StratifiedShuffleSplit,train_test_split
from keras.datasets import mnist
from tqdm import tqdm_notebook

import numpy as np

Using TensorFlow backend.


# Generate Dummy Data

In [3]:
# def get_data():
#     (x_train, y_train), (x_test, y_test) = mnist.load_data()

#     x_train = x_train.astype('float32') / 255.
#     x_test = x_test.astype('float32') / 255.
#     x_train = x_train.reshape((len(x_train), np.prod(x_train.shape[1:])))
#     x_test = x_test.reshape((len(x_test), np.prod(x_test.shape[1:])))
#     return x_train, x_test, y_train, y_test

# Download Iris Dataset

In [4]:


iris = datasets.load_iris()

In [5]:
X = iris['data']
y = iris['target']

## Scale Data

In [6]:
for i in range(0,len(X[0])):
    X[:,i] = X[:,i]/max(X[:,i])

## Duplicate Data

In [7]:
from copy import deepcopy

for i in range(0,5):
    X = np.concatenate((X,X))
    y = np.concatenate((y,y))

In [8]:
len(X)

4800

In [9]:
iris.keys()

dict_keys(['data', 'target', 'target_names', 'DESCR', 'feature_names', 'filename'])

# Train-Test Split

In [10]:
x_train, x_test, y_train, y_test = train_test_split(X,y,stratify=y)

## Check Stratification

In [11]:
# y_test
np.unique(y_test, return_counts=True)

(array([0, 1, 2]), array([400, 400, 400]))

In [12]:
np.unique(y_train, return_counts=True)

(array([0, 1, 2]), array([1200, 1200, 1200]))

Labels seem to be evenly divided in train and test datasets

# Init Models & Train

In [13]:
from keras import callbacks as cb

cb_earlystop = cb.EarlyStopping(
    monitor='val_loss', min_delta=0.1, patience=4, verbose=0, mode='auto')

models = {}

## Model Parameters

In [14]:
o_dim = 4
h_dim = 4
c_dim = 2

BATCH_SIZE = 16
# o_dim = 784
# h_dim = 300
# c_dim = 15
# BATCH_SIZE = 16

In [15]:
c = lambda _: [
    cb_earlystop, 
    cb.CSVLogger("./log_results/log.{}.csv".format(_))]
model_types = ["dirichlet_laplace", "dirichlet_kl", "logit", "normal"]

In [16]:
models["dirichlet_laplace"] = DirVae(
    reparam=SamplingReparamLaplace, 
    batch_size=BATCH_SIZE, 
    original_dim=o_dim, 
    encoder_widths=h_dim, 
    latent_dim=c_dim, 
    decoder_width=h_dim,)
#     log_alpha=True,  These two arguments are throwing errors, please check
#     dropout=True)

models["dirichlet_kl"] = DirVae(
    reparam=SamplingReparamLaplace, 
    batch_size=BATCH_SIZE, 
    original_dim=o_dim, 
    encoder_widths=h_dim, 
    latent_dim=c_dim, 
    decoder_width=h_dim,)
#     log_alpha=True,
#     dropout=True)

models["logit"] = GaussVae(
    batch_size=BATCH_SIZE, 
    original_dim=o_dim, 
    encoder_widths=h_dim, 
    latent_dim=c_dim, 
    decoder_width=h_dim,
    logit=True)

models["normal"] = GaussVae(
    batch_size=BATCH_SIZE, 
    original_dim=o_dim, 
    encoder_widths=h_dim, 
    latent_dim=c_dim, 
    decoder_width=h_dim,
    logit=False)

In [17]:
model_types

['dirichlet_laplace', 'dirichlet_kl', 'logit', 'normal']

## Fit Data

In [18]:
for i in tqdm_notebook(range(0,len(model_types))):
    print("Trianing {}model".format(model_types[i]))
    models[model_types[i]].fit(
                            x_train,
                            x_test,
    #                            x_train,
                           callbacks=c(model_types[i]), 
                           nb_epoch=30)

Please use `tqdm.notebook.tqdm` instead of `tqdm.tqdm_notebook`
  """Entry point for launching an IPython kernel.


HBox(children=(FloatProgress(value=0.0, max=4.0), HTML(value='')))

  **kwargs))


Trianing dirichlet_laplacemodel
Train on 3600 samples, validate on 1200 samples
Epoch 1/30
Epoch 2/30
Epoch 3/30
Epoch 4/30
Epoch 5/30
Trianing dirichlet_klmodel
Train on 3600 samples, validate on 1200 samples
Epoch 1/30
Epoch 2/30
Epoch 3/30
Epoch 4/30
Epoch 5/30
Trianing logitmodel


  **kwargs))


Train on 3600 samples, validate on 1200 samples
Epoch 1/30
Epoch 2/30
Epoch 3/30
Epoch 4/30
Epoch 5/30
Epoch 6/30
Trianing normalmodel
Train on 3600 samples, validate on 1200 samples
Epoch 1/30
Epoch 2/30
Epoch 3/30
Epoch 4/30
Epoch 5/30
Epoch 6/30
Epoch 7/30



# Evaluation

In [19]:
from vae.evaluate import MnistMetrics

%matplotlib inline

In [22]:
m = MnistMetrics(models[model_types[3]], x_test, y_test, logit=False)

AttributeError: 'GaussVae' object has no attribute 'encoder'

In [24]:
?models[model_types[3]]

In [21]:
# model_types[2`]

'logit'