In [1]:
import math
import numpy as np


class Value:
    """ stores a single scalar value and its gradient """

    def __init__(self, data, _children=(), _op=''):
        self.data = data
        self.grad = 0
        # internal variables used for autograd graph construction
        self._backward = lambda: None
        self._prev = set(_children)
        self._op = _op  # the op that produced this node, for graphviz / debugging / etc

    def __add__(self, other):
        other = other if isinstance(other, Value) else Value(other)
        out = Value(self.data + other.data, (self, other), '+')

        def _backward():
            self.grad += out.grad
            other.grad += out.grad
        out._backward = _backward

        return out

    def __mul__(self, other):
        other = other if isinstance(other, Value) else Value(other)
        out = Value(self.data * other.data, (self, other), '*')

        def _backward():
            self.grad += other.data * out.grad
            other.grad += self.data * out.grad
        out._backward = _backward

        return out

    def __pow__(self, other):
        assert isinstance(other, (int, float)
                          ), "only supporting int/float powers for now"
        out = Value(self.data**other, (self,), f'**{other}')

        def _backward():
            self.grad += (other * self.data**(other-1)) * out.grad
        out._backward = _backward

        return out

    def relu(self):
        out = Value(0 if self.data < 0 else self.data, (self,), 'ReLU')

        def _backward():
            self.grad += (out.data > 0) * out.grad
        out._backward = _backward

        return out

    # fix dead neuron problem
    def leaky_relu(self):
        out = Value(self.data * 0.01 if self.data <
                    0 else self.data, (self,), 'ReLU')

        def _backward():
            local_grad = 1.0 if self.data > 0 else 0.01
            self.grad += local_grad * out.grad 
        out._backward = _backward
        return out

    def log(self):

        out = Value(math.log(self.data), (self, ), 'log')

        def _backward():
            self.grad += (1 / self.data) * out.grad
        out._backward = _backward

        return out

    def exp(self):
        x = self.data
        out = Value(math.exp(x), (self, ), 'exp')

        def _backward():
            self.grad += out.data * out.grad
        out._backward = _backward

        return out

    def sigmoid(self):
        x = self.data
        t = 1 / (1 + (np.exp(-x)))

        out = Value(t, (self, ), 'sigmoid')

        def _backward():
            self.grad += (out.data * (1 - out.data)) * out.grad
        out._backward = _backward

        return out

    def backward(self):

        # topological order all of the children in the graph
        topo = []
        visited = set()

        def build_topo(v):
            if v not in visited:
                visited.add(v)
                for child in v._prev:
                    build_topo(child)
                topo.append(v)
        build_topo(self)

        # go one variable at a time and apply the chain rule to get its gradient
        self.grad = 1
        for v in reversed(topo):
            v._backward()

    def __ge__(self, other):
        return self.data >= other.data

    def __le__(self, other):
        return self.data <= other.data

    def __gt__(self, other):
        return self.data > other.data

    def __lt__(self, other):
        return self.data < other.data

    def __neg__(self):  # -self
        return self * -1

    def __radd__(self, other):  # other + self
        return self + other

    def __sub__(self, other):  # self - other
        return self + (-other)

    def __rsub__(self, other):  # other - self
        return other + (-self)

    def __rmul__(self, other):  # other * self
        return self * other

    def __truediv__(self, other):  # self / other
        return self * other**-1

    def __rtruediv__(self, other):  # other / self
        return other * self**-1

    def __repr__(self):
        return f"Value(data={self.data}, grad={self.grad})"


In [2]:
import random
import math


class Module:

    def zero_grad(self):
        for p in self.parameters():
            p.grad = 0

    def parameters(self):
        return []

    def layers(self):
        return []

    def summary(self):
        return f"{len(self.layers())} layers, {len(self.parameters())} parameters"


class Neuron(Module):

    # I want to introduce weight sharing, which means I need to be able to
    # initialise a neuron with pre defined weights, but leave the bias?

    def __init__(self, nin, nonlin=True, **kwargs):
        tied_weights = kwargs.get('tied_weights', None)
        self.w = tied_weights if tied_weights is not None else [Value(random.uniform(-1, 1)) for _ in range(nin)]
        self.b = Value(random.uniform(-1, 1))
        self.nonlin = nonlin
        self.activate = kwargs.get('activate', None)

    def __call__(self, x):
        if isinstance(x, (Value, float, int)):
            # This is for a single input, likely at the start of a layer
            act = (self.w[0] * x) + self.b
        else:
            act = sum((wi*xi for wi, xi in zip(self.w, x)), self.b)
        if self.activate and self.nonlin == False:
            return self.activate(act)
        else:
            return act.leaky_relu() if self.nonlin else act

    def parameters(self):
        return self.w + [self.b] if isinstance(self.w[0], Value) else [p for w_list in self.w for p in w_list] + [self.b]

    def __repr__(self):
        return f"{'ReLU' if self.nonlin else '{self.activate}'}Neuron({len(self.w)})"


class Layer(Module):
    def __init__(self, nin, nout, tied_to_layer=None, **kwargs):
        if tied_to_layer is None:
            # Standard Layer initialization
            self.neurons = [Neuron(nin, **kwargs) for _ in range(nout)]
        else:
            # Tied Layer initialization
            # The weights for this layer are the transpose of the tied_to_layer's weights
            # This requires careful construction.
            # Number of inputs for this layer = number of outputs of the tied layer
            # Number of outputs for this layer = number of inputs of the tied layer
            self.tied_to_layer = tied_to_layer
            self.neurons = [Neuron(nin, **kwargs) for _ in range(nout)]
            
    def __call__(self, x):
        if hasattr(self, 'tied_to_layer'):
            # The weights are conceptually transposed.
            # So, the output of a neuron is sum(w_ji * x_i), which means summing over the neurons of the previous layer.
            # This is hard to do cleanly with the current structure.
            # The simpler approach is to loop manually.
            out = []
            for j in range(len(self.neurons)):
                # The j-th neuron of this layer uses the j-th weight of every neuron in the tied layer.
                # For each output neuron (j), sum the weighted inputs.
                # The weight connecting input `i` to output `j` is the same as the weight connecting
                # input `j` of the encoder layer to output `i`.
                act = sum(self.tied_to_layer.neurons[i].w[j] * x[i] for i in range(len(x))) + self.neurons[j].b if isinstance(x, list) else self.tied_to_layer.neurons[0].w[j] * x + self.neurons[j].b
                
                # Apply activation
                if self.neurons[j].activate and self.neurons[j].nonlin is False:
                    act = self.neurons[j].activate(act)
                else:
                    act = act.relu() if self.neurons[j].nonlin else act
                out.append(act)
            return out[0] if len(out) == 1 else out
        else:
            # Standard layer behavior
            out = [n(x) for n in self.neurons]
            return out

    def parameters(self):
        # In a tied layer, the weights are shared, but the biases are not.
        if hasattr(self, 'tied_to_layer'):
            return [n.b for n in self.neurons]
        else:
            return [p for n in self.neurons for p in n.parameters()]
        
    def __repr__(self):
        return f"Layer of [{', '.join(str(n) for n in self.neurons)}]"


class MLP(Module):
    def __init__(self, nin, nouts, tied_weights_from=None, **kwargs):
        sz = [nin] + nouts
        self.layers = []
        if tied_weights_from is None:
            # Standard MLP initialization
            self.layers = [Layer(sz[i], sz[i+1], nonlin=i != len(nouts)-1, **kwargs) for i in range(len(nouts))]
        else:
            # Tied-weight MLP initialization
            tied_layers = list(reversed(tied_weights_from))
            for i in range(len(nouts)):
                # Pass the encoder's layer directly to the decoder's layer.
                # The decoder layer will use the encoder's weights.
                self.layers.append(Layer(sz[i], sz[i+1], tied_to_layer=tied_layers[i], nonlin=i != len(nouts)-1, **kwargs))

    def __call__(self, x):
        for layer in self.layers:
            x = layer(x)
        return x

    def parameters(self):
        return [p for layer in self.layers for p in layer.parameters()]
        
    def __repr__(self):
        return f"MLP of [{', '.join(str(layer) for layer in self.layers)}]"

class AutoEncoder(Module):
    def __init__(self, in_embeds, n_hidden_layers, compressed, act_func=None, tied=False):
        n_hidden_layers = [math.ceil(in_embeds / i)
                           for i in range(2, n_hidden_layers + 2)]
        
        
        self.act_func = act_func
        
        # Create encoder
        self.encoder = MLP(in_embeds, n_hidden_layers + [compressed])
        
        
        
        # Create decoder, passing encoder layers for tied weights
        if tied:
            self.decoder = MLP(compressed, list(reversed(n_hidden_layers)) + [in_embeds], tied_weights_from=self.encoder.layers, activate=act_func)
        else:
            self.decoder = MLP(compressed, list(reversed(n_hidden_layers)) + [in_embeds], activate=act_func)
        
        
    
    def __call__(self, x):
        compressed = self.encoder(x)
        out = self.decoder(compressed)
        return out

    def parameters(self):
        return self.encoder.parameters() + self.decoder.parameters()

    def layers(self):
        return self.encoder.layers + self.decoder.layers

    def pretty(self):
        if self.act_func != None:
            hey = str(self.act_func)
            return hey.split()[1][6:]
        else:
            return "no function"

    def __repr__(self):
        return f"encoder has {self.summary()}, decoder has {self.summary()} activated with {self.pretty()}"


class VariationalAutoEncoder(Module):
    """
    Simple Variational Autoencoder implementation.
    The encoder outputs mean and log-variance for each latent dimension.
    Uses reparameterization trick to sample from the latent distribution.
    """
    
    def __init__(self, in_embeds, n_hidden_layers, latent_dim, act_func=None, tied=False):
        n_hidden_layers = [math.ceil(in_embeds / i)
                           for i in range(2, n_hidden_layers + 2)]
        
        self.latent_dim = latent_dim
        self.act_func = act_func
        
        # Encoder outputs 2 * latent_dim: mean and log-variance for each dimension
        # Last layer outputs 2*latent_dim (no activation on this layer)
        self.encoder = MLP(in_embeds, n_hidden_layers + [2 * latent_dim])
        
        # Decoder takes latent_dim as input
        if tied:
            # For tied weights, we'd need to handle the 2*latent_dim -> latent_dim transition
            # For simplicity, we'll skip tied weights in VAE for now
            self.decoder = MLP(latent_dim, list(reversed(n_hidden_layers)) + [in_embeds], activate=act_func)
        else:
            self.decoder = MLP(latent_dim, list(reversed(n_hidden_layers)) + [in_embeds], activate=act_func)
    
    def encode(self, x):
        """Encode input to mean and log-variance"""
        encoded = self.encoder(x)
        
        # Ensure encoded is a list
        if not isinstance(encoded, list):
            encoded = [encoded]
        
        # Split the output into mean and log_var
        # encoded should be a list of 2*latent_dim values
        if len(encoded) != 2 * self.latent_dim:
            raise ValueError(f"Encoder output dimension {len(encoded)} doesn't match expected 2*latent_dim={2*self.latent_dim}")
        
        mu = encoded[:self.latent_dim]
        log_var = encoded[self.latent_dim:]
        
        return mu, log_var
    
    def reparameterize(self, mu, log_var):
        """
        Reparameterization trick: z = mu + sigma * epsilon
        where epsilon ~ N(0,1) and sigma = exp(0.5 * log_var)
        
        Note: epsilon is sampled and treated as a constant during backprop
        """
        # Sample epsilon from standard normal (treated as constant in backprop)
        epsilon = [Value(random.gauss(0, 1)) for _ in range(len(mu))]
        
        # Compute sigma = exp(0.5 * log_var) more efficiently
        # sigma = exp(0.5 * log_var) = sqrt(exp(log_var))
        sigma = [(log_var_i * 0.5).exp() for log_var_i in log_var]
        
        # z = mu + sigma * epsilon
        z = [mu_i + sigma_i * eps_i for mu_i, sigma_i, eps_i in zip(mu, sigma, epsilon)]
        
        return z
    
    def decode(self, z):
        """Decode latent sample to reconstruction"""
        return self.decoder(z)
    
    def __call__(self, x):
        """Forward pass: encode, sample, decode"""
        mu, log_var = self.encode(x)
        z = self.reparameterize(mu, log_var)
        reconstruction = self.decode(z)
        return reconstruction, mu, log_var
    
    def parameters(self):
        return self.encoder.parameters() + self.decoder.parameters()
    
    
    def layers(self):
        return self.encoder.layers + self.decoder.layers
    
    def __repr__(self):
        return f"VAE(encoder: {len(self.encoder.layers)} layers, decoder: {len(self.decoder.layers)} layers, latent_dim: {self.latent_dim})"



In [3]:
class Optimizer:
    """Base class for optimizers"""

    def __init__(self, parameters):
        self.parameters = parameters
    
    def zero_grad(self):
        for p in self.parameters:
            p.grad = 0

  
    def step(self):
        """Take a step of gradient descent"""

        raise NotImplementedError


class SGD(Optimizer):
    """Stochastic Gradient Descent optimizer"""

    def __init__(self, parameters, learning_rate = 0.01):
        super().__init__(parameters)
        self.count = 0
        self.learning_rate = learning_rate
        
    def increment(self):
        self.count += 1

    def step(self):
        self.increment()
        # Calculate the base LR
        new_lr = 1.0 - 0.9 * self.count / 100
        
        # Ensure the learning rate is never negative (or zero, to stop training)
        self.learning_rate = max(0.00001, new_lr) 
        
        """Update model parameters in the opposite direction of their gradient"""

        if self.learning_rate > 0: # Only update if the LR is positive
            for p in self.parameters:
                p.data -= self.learning_rate * p.grad

In [4]:
"""
Heavily inspired by https://github.com/joelgrus/joelnet/blob/master/joelnet/data.py
"""
import random


# Batch = NamedTuple("Batch", [("inputs", List[Vector]), ("targets", Vector)])

class BatchIterator:
    """Iterates on data by batches"""

    def __init__(self,inputs,targets,batch_size=32):
        self.inputs  = inputs
        self.targets = targets
        self.batch_size = batch_size

    def __call__(self):
        starts = list(range(0, len(self.inputs), self.batch_size))
            
        for start in starts:
            end = start + self.batch_size
            batch_inputs = self.inputs[start:end]
            batch_targets = self.targets[start:end]
            yield (batch_inputs, batch_targets)

In [5]:
def mean_squared_error(y_true, y_pred):
    total_loss = sum([(true - pred)**2 for true, pred in zip(y_true, y_pred)])
    mean_loss = total_loss / len(y_true)
    
    return mean_loss


def vae_loss(reconstruction, target, mu, log_var, beta=1.0):
    """
    Variational Autoencoder loss = Reconstruction Loss + beta * KL Divergence
    
    Args:
        reconstruction: Value or List of Value objects (reconstructed output)
        target: List of Value objects (original input)
        mu: List of Value objects (mean of latent distribution)
        log_var: List of Value objects (log variance of latent distribution)
        beta: Weight for KL divergence term (default 1.0)
    
    Returns:
        Total VAE loss as a Value object
    """
    # Ensure reconstruction is a list
    if not isinstance(reconstruction, list):
        reconstruction = [reconstruction]
    if not isinstance(target, list):
        target = [target]
    
    # Reconstruction loss (MSE)
    recon_loss = mean_squared_error(target, reconstruction)
    
    # KL divergence: -0.5 * sum(1 + log_var - mu^2 - exp(log_var))
    # This encourages the latent distribution to be close to N(0,1)
    kl_terms = []
    for mu_i, log_var_i in zip(mu, log_var):
        # KL term for one dimension: -0.5 * (1 + log_var - mu^2 - exp(log_var))
        kl_term = -0.5 * (Value(1.0) + log_var_i - mu_i**2 - log_var_i.exp())
        kl_terms.append(kl_term)
    
    kl_loss = sum(kl_terms) / len(kl_terms) if kl_terms else Value(0.0)
    
    # Total loss
    total_loss = recon_loss + beta * kl_loss
    
    return total_loss

    


In [6]:
 # from pyfit.engine import Vector, Scalar
# from pyfit.nn import Module
# from pyfit.optim import Optimizer
# from pyfit.data import BatchIterator
# from pyfit.metrics import binary_accuracy

# Used to record training history for metrics
History = {}


class Trainer:
    """Encapsulates the model training loop"""

    def __init__(self, model, optimizer, loss):
        self.model = model
        self.optimizer = optimizer
        self.loss = loss

    def fit(self, data_iterator, num_epochs=500, verbose=False):
        """Fits the model to the data"""

        history = {"loss": []}
        epoch_loss = 0
        epoch_y_true = []
        epoch_y_pred = []
        for epoch in range(num_epochs):
            # Reset the gradients of model parameters
            self.optimizer.zero_grad()
            # Reset epoch data
            epoch_loss = 0
            epoch_y_true = []
            epoch_y_pred = []

            for batch in data_iterator():
                # Forward pass
                outputs = list(map(self.model, batch[0]))
                
                batch_y_true = [Value(val) for sublist in batch[1] for val in sublist]
                batch_y_pred = [val for sublist in outputs for val in sublist]
                # Loss computation
                # [item for sublist in outputs[0] for item in sublist]
                batch_loss = self.loss(batch_y_true, batch_y_pred)
                epoch_loss += batch_loss.data

                # Store batch predictions and ground truth for computing epoch metrics
                epoch_y_pred.extend(batch_y_pred)
                epoch_y_true.extend(batch[1])

                # Backprop and gradient descent
                batch_loss.backward()
                self.optimizer.step()

            # Accuracy computation for epoch
            
            

            # Record training history
            history["loss"].append(epoch_loss)
            if verbose:
                print(
                    f"Epoch [{epoch+1}/{num_epochs}], "
                    f"loss: {epoch_loss:.6f}, "
                )

        return history

In [7]:

# Example usage of VariationalAutoEncoder

# Create a simple VAE
vae = VariationalAutoEncoder(
    in_embeds=10,        # Input dimension
    n_hidden_layers=2,   # Number of hidden layers
    latent_dim=3,        # Latent space dimension
    act_func=None,       # Activation function (None uses default leaky_relu)
    tied=False           # Whether to use tied weights
)

optimizer = SGD(vae.parameters(), learning_rate=0.01)

# Example input (list of Values)
x = [Value(random.uniform(0, 1)) for _ in range(10)]
target = [Value(val.data) for val in x] 
for i in range(100):
    optimizer.zero_grad()
    # Forward pass returns: (reconstruction, mu, log_var)
    reconstruction, mu, log_var = vae(x)

    # Ensure reconstruction is a list for loss computation
    if not isinstance(reconstruction, list):
        reconstruction = [reconstruction]

    # Compute VAE loss
     # Target is the input itself (autoencoder)
    loss = vae_loss(reconstruction, target, mu, log_var, beta=1.0)

    # Backward pass
    loss.backward()

    
    optimizer.step()

    print(f"VAE Loss: {loss.data:.12f}")


# Note: To use VAE with the Trainer class, you'll need to create a wrapper
# or modify the training loop to handle the (reconstruction, mu, log_var) output
# and use vae_loss instead of the standard MSE loss.


VAE Loss: 1.885692220844
VAE Loss: 0.775253382145
VAE Loss: 0.388295866018
VAE Loss: 0.243936953579
VAE Loss: 0.159898954791
VAE Loss: 0.113929933732
VAE Loss: 0.743840775263
VAE Loss: 0.441804987907
VAE Loss: 0.230553340726
VAE Loss: 0.125320135904
VAE Loss: 0.071456777775
VAE Loss: 0.042591899699
VAE Loss: 0.026575101850
VAE Loss: 0.017439395863
VAE Loss: 0.011621689478
VAE Loss: 0.008075181871
VAE Loss: 0.005686644723
VAE Loss: 0.004081616450
VAE Loss: 0.049005339617
VAE Loss: 0.007548395786
VAE Loss: 0.004662716209
VAE Loss: 0.002952894746
VAE Loss: 0.001952988214
VAE Loss: 0.001342187352
VAE Loss: 0.000926823349
VAE Loss: 0.000671867040
VAE Loss: 0.000486797917
VAE Loss: 0.000359053988
VAE Loss: 0.000267787015
VAE Loss: 0.000201657980
VAE Loss: 0.000152519990
VAE Loss: 0.000116479037
VAE Loss: 0.000089446124
VAE Loss: 0.000069512354
VAE Loss: 0.000053671708
VAE Loss: 0.000042077098
VAE Loss: 0.000032660843
VAE Loss: 0.014389295911
VAE Loss: 0.001375019987
VAE Loss: 0.000910263630


In [8]:
# to construct a training loop, I need a loss function
# forward pass
# backward pass
# batches if im feeling cheeky
# update (Stochastic gradient descent)



In [9]:

# Example usage of VariationalAutoEncoder

# Create a simple VAE
auto = AutoEncoder(
    in_embeds=10,        # Input dimension
    n_hidden_layers=2,   # Number of hidden layers
    compressed=3,        # Latent space dimension
    act_func=Value.sigmoid,       # Activation function (None uses default leaky_relu)
    tied=True           # Whether to use tied weights
)

optimizer = SGD(auto.parameters())

# Example input (list of Values)
x = [Value(random.uniform(0, 1)) for _ in range(10)]
target = [Value(val.data) for val in x]  # Target is the input itself (autoencoder)
for i in range(100):
    optimizer.zero_grad()
    
    # Forward pass returns: (reconstruction, mu, log_var)
    reconstruction = auto(x)

    # Ensure reconstruction is a list for loss computation

    # Compute VAE loss
    
    loss = mean_squared_error(reconstruction, target)

    # Backward pass
    loss.backward()
    
    optimizer.step()

    print(f"AutoEncoder Loss: {loss.data:.12f}")


# Note: To use VAE with the Trainer class, you'll need to create a wrapper
# or modify the training loop to handle the (reconstruction, mu, log_var) output
# and use vae_loss instead of the standard MSE loss.


AutoEncoder Loss: 0.154100578898
AutoEncoder Loss: 0.121172180946
AutoEncoder Loss: 0.108503458121
AutoEncoder Loss: 0.098586663214
AutoEncoder Loss: 0.092146694514
AutoEncoder Loss: 0.088537622307
AutoEncoder Loss: 0.084915297342
AutoEncoder Loss: 0.083621397654
AutoEncoder Loss: 0.080006730797
AutoEncoder Loss: 0.077217073003
AutoEncoder Loss: 0.076242318730
AutoEncoder Loss: 0.080102734315
AutoEncoder Loss: 0.072887421172
AutoEncoder Loss: 0.069148290593
AutoEncoder Loss: 0.066283086624
AutoEncoder Loss: 0.063735622323
AutoEncoder Loss: 0.062401797382
AutoEncoder Loss: 0.061932117944
AutoEncoder Loss: 0.058127429064
AutoEncoder Loss: 0.055364448549
AutoEncoder Loss: 0.053791074059
AutoEncoder Loss: 0.053383041223
AutoEncoder Loss: 0.049881004690
AutoEncoder Loss: 0.047505002879
AutoEncoder Loss: 0.047413136493
AutoEncoder Loss: 0.044312554137
AutoEncoder Loss: 0.042458256759
AutoEncoder Loss: 0.042094161924
AutoEncoder Loss: 0.039283335822
AutoEncoder Loss: 0.037694878208
AutoEncode

In [10]:
from sklearn.datasets import load_digits
from sklearn.preprocessing import MinMaxScaler

In [11]:
X, y = load_digits(return_X_y=True)
t = MinMaxScaler()
t.fit(X)
X_train = t.transform(X)

In [14]:

# Example usage of VariationalAutoEncoder

# Create a simple VAE
auto = AutoEncoder(
    in_embeds=64,        # Input dimension
    n_hidden_layers=1,   # Number of hidden layers
    compressed=16,        # Latent space dimension
    act_func=Value.sigmoid,       # Activation function (None uses default leaky_relu)
    tied=True           # Whether to use tied weights
)

optimizer = SGD(auto.parameters())

# Example input (list of Values)
target = X_train[0]
target = list(map(Value, target)) # Target is the input itself (autoencoder)
for i in range(150):
    optimizer.zero_grad()
    
    # Forward pass returns: (reconstruction, mu, log_var)
    
    # reconstruction = [list(map(Value, xrow)) for xrow in X_train]
    
    reconstruction = map(Value, X_train[0])
    
    scores = auto(reconstruction)
    # Ensure reconstruction is a list for loss computation

    # Compute VAE loss
    
    loss = mean_squared_error(target, scores)

    alpha = 1e-4
    reg_loss = alpha * sum((p*p for p in auto.parameters()))
    total_loss = loss + reg_loss

    # Backward pass
    loss.backward()
    
    optimizer.step()

    print(f"Iteration : {i}, AutoEncoder Loss: {total_loss.data:.12f}")
    print(auto.parameters()[:30])


# Note: To use VAE with the Trainer class, you'll need to create a wrapper
# or modify the training loop to handle the (reconstruction, mu, log_var) output
# and use vae_loss instead of the standard MSE loss.


(64,)
Iteration : 0, AutoEncoder Loss: 0.243101277896
(64,)
Iteration : 1, AutoEncoder Loss: 0.209554474847
(64,)
Iteration : 2, AutoEncoder Loss: 0.195153729115
(64,)
Iteration : 3, AutoEncoder Loss: 0.191140189781
(64,)
Iteration : 4, AutoEncoder Loss: 0.188718272333
(64,)
Iteration : 5, AutoEncoder Loss: 0.193006403770
(64,)
Iteration : 6, AutoEncoder Loss: 0.178557645838
(64,)
Iteration : 7, AutoEncoder Loss: 0.166324824406
(64,)
Iteration : 8, AutoEncoder Loss: 0.157395371741
(64,)
Iteration : 9, AutoEncoder Loss: 0.155887547776
(64,)
Iteration : 10, AutoEncoder Loss: 0.155665978985
(64,)
Iteration : 11, AutoEncoder Loss: 0.155607102259
(64,)
Iteration : 12, AutoEncoder Loss: 0.156550397051
(64,)
Iteration : 13, AutoEncoder Loss: 0.155992145061
(64,)
Iteration : 14, AutoEncoder Loss: 0.155478549465
(64,)
Iteration : 15, AutoEncoder Loss: 0.153316993607
(64,)
Iteration : 16, AutoEncoder Loss: 0.151037202368


KeyboardInterrupt: 