### Variational Autoencoder

In [11]:
### importing modules that are needed

import numpy as np
import pandas as pd
import matplotlib.pyplot as plt
import seaborn as sns
import torch
import torch.nn as nn 
import torch.nn.functional as f 
import torch.optim as optim 
from torch.utils.data import Dataset, DataLoader

from sklearn.model_selection import train_test_split



##### **Loading the data**

In [12]:
data_f = pd.read_csv(\
    '~/ml_J1-J2_supervised/all_phase/af/augumented_dataL24.csv',index_col=[0])

X_train,X_test,y_train,y_test = train_test_split(data_f.iloc[:,:-1],data_f.iloc[:,-1:], \
                    random_state=42,test_size=0.2,stratify=data_f.iloc[:,-1:])

data_train = pd.concat([X_train,y_train],axis=1)
data_test = pd.concat([X_test,y_test],axis=1)



FileNotFoundError: [Errno 2] No such file or directory: '/Users/shashi/ml_J1-J2_supervised/all_phase/af/augumented_dataL24.csv'

**Set the device**

In [13]:
device = torch.device('cuda' if torch.cuda.is_available() else 'cpu')

##### Class definition that is used to load the dataset.

In [14]:
### class to load the data

class LoadData(Dataset):
    def __init__(self,data,L,device=device):
        self.L = L
        self.x_data = torch.tensor(data.iloc[:,:-1].values,dtype=torch.float32).to(device=device)
        self.y_data = torch.tensor(data.iloc[:,-1:].values,dtype=torch.long).to(device=device)

    ### length of the dataset
    ### function one has to use if you want to define a custom dataset class
    def __len__(self):
        return len(self.y_data)

    
    ## get the image and label
    def __getitem__(self,idx):
        if torch.is_tensor(idx):
            idx = idx.to_list()
        
        image = self.x_data[idx,:]
        label = self.y_data[idx]

        return {'data':image,'label':label}

##### **Model definition for the variational autoencoder**

In [15]:
### definition of the network
class encoder(nn.Module):
    
    def __init__(self,input_size,latent_dim):
        super(encoder,self).__init__()
        self.latent_dim = latent_dim
        self.input_size = input_size
        ### encoder network
        self.encoder1 = nn.Linear(in_features=self.input_size,out_features=256)
        self.encoder2 = nn.Linear(in_features=256,out_features=128)
        self.encoder3 = nn.Linear(in_features=128,out_features=64)
        self.encoder4 = nn.Linear(in_features=64,out_features=32)

        self.mu = nn.Linear(in_features=32,out_features=self.latent_dim)
        self.var = nn.Linear(in_features=32,out_features=self.latent_dim)

    ### feedforward network 
    def forward(self,x):
        ##  passing the input through the network
        x = self.encoder1(x)
        x = self.encoder2(x)
        x = self.encoder3(x)
        x = self.encoder4(x)

        ## passing input x to layer mu and var, mux = g(x), sigmax = f(x) 
        x1 = self.mu(x)
        x2 = self.var(x)

        ## combining mu and sigma to a normal distribution reparameterization trick
        ##  z = mu + sigma * N(0,1)
        #zi = torch.distributions.Normal(0,1).rsample([self.latent_dim])
        #x = x1 + x2*zi

        return x,x1,x2



### definition of the network
class decoder(nn.Module):

    def __init__(self, input_size, latent_dim):
        super(decoder, self).__init__()
        self.latent_dim = latent_dim
        self.input_size = input_size
       
        ### decoder network
        self.decoder1 = nn.Linear(in_features=self.latent_dim, out_features=32)
        self.decoder2 = nn.Linear(in_features=32, out_features=64)
        self.decoder3 = nn.Linear(in_features=64, out_features=128)
        self.decoder4 = nn.Linear(in_features=128, out_features=self.input_size)

    ### feedforward network
    def forward(self, x):
        ##  passing the input through the network
       
        x = self.decoder1(x)
        x = self.decoder2(x)
        x = self.decoder3(x)
        x = self.decoder4(x)

        return x


### Steps of the process
* Pass the input $X$ through the encoder stage $x_enc$
* To get estimate of $\mu$ and $\sigma$ use two neural network and pass $x_enc$ through both of them
    * $\mu_{x} = f_{2}(f_{1}(x))$
    * $\sigma_{x} = g_{2}(f_{1}(x))$
    * $\mu_{x},\sigma_{x}$ are vectors of dimension $d_{enc}$(encoding dimension)
    * Network $f_{1}(x)$ represents the input that is passed through various stages (neural network) and $f_{2},g_{2}$ represents two different neural network.
* To generate the distribution $q(z|x)$ use $\mu_{x},\sigma_{x}$ to generate a normal distribution.
    * $q(z|x)$ is a multidimensional distribution with means given by $\mu_{x}$ vector and variance given by $\sigma_{x}$ vector. 
    * $q(z|x) = \mathcal{N}(\mu_{x},\sigma_{x})$
    * Sample a point from this distribution $q(z|x)$
* Now using $z$ we have to generate $p(x|z)$.
* The error one is trying to minimize is 
\begin{equation}
min E_{q} \left [ \log q(z|x) - \log p(z) \right ] -  {E}_{q} \log p(x|z) 
\end{equation}
* Here $E_{q}$ means average value for a given distribution of $q(z|x)$
* Distribution $p(z) = \mathcal{N} (0,1)$ is a standard normal.
* Distribution $p(x|z) = \mathcal{N}(f(z),cI) = \mathcal{N} (decoder(z),cI)$ 

* $x --[encoder]-->x_{enc},\mu_{x},\sigma_{x} -->\mathcal{N}(\mu_{x},\sigma_{x})--> [decoder]--->x$

### **Steps for the computation**
1. I will use the vector notation where the data $\vec{x}$ is cosidered as a vector.
1. Instantiate encoder object
1. The data $\vec{x}$ passed through the encoder network will give us $\vec{\mu}_{x}, \vec{\sigma}_{x}$.
1. $\vec{x}$ is the vector in the space of the data and $\vec{\mu}_{x},\vec{\sigma}_{x}$ vector in the latent/encoding dimension.
1. The distribution $\bold{q}(z|x)$ is obtained from $\vec{\mu}_{x}$ and $\vec{\sigma}_{x}$.
    * $\bold{q}(z|x) \equiv \mathcal{N}(\vec{\mu}_{x},\vec{\sigma}_{x})$
    * $\vec{z}$ is sampled from the distribution $\bold{q}(z|x)$
1. $\vec{z}$ is passed through the decoder network to give $\hat{x}$
1. We need to compute $\bold{p}(\hat{x}|z)$.
    * This distribution is $\bold{p}(\hat{x}|z) \equiv \mathcal{N}(f(z),cI)$

In [37]:
input_size = 32
latent_dim = 32
x = torch.randn(input_size)

### create an instance of encoder network
encdr = encoder(input_size,latent_dim)

## generate encoded output and mu,sigma values
x_enc,mu,sigma = encdr(x)

## create an instance of decoder network
decdr = decoder(input_size,latent_dim)
## generate multidimensional distribution q(z|x) using mu(x) and sigma(x) 
## z is passed as an input to the decoder object
q_zx  = torch.distributions.Normal(mu.data,sigma.data)

## prior distribution of z p(z) = N(0,1)
## will be used to calculate the KL divergence

#p_z = torch.distributions.Normal(torch.zeros_like(mu), torch.ones_like(sigma))

## sampling z from the q_zx

#z = q_zx.rsample()

## measuring the log probability of z being in q(z|x) and p(z)

#log_qzx = q_zx.log_prob(z)
#log_pz = p_z.log_prob(z)


### passing z to decoder network to get xnew 

#x_new = decdr(z)

### generate distribution p(x|z) 
#p_xz = torch.distributions.Normal(x_new,torch.ones_like(x_new))

### measure log p(x|z) 
#log_pxz = p_xz.log(x)












tensor([ 0.0401,  0.1875, -0.0988,  0.2141, -0.1828,  0.2008, -0.0694,  0.1027,
        -0.1705,  0.1471,  0.1004,  0.2217, -0.0063, -0.0665, -0.0971, -0.3043,
        -0.1484, -0.1096, -0.0980,  0.0626,  0.0152,  0.1768,  0.0393, -0.1000,
        -0.0330, -0.1884, -0.0192, -0.1777,  0.0384,  0.1635, -0.1496, -0.1226],
       grad_fn=<AddBackward0>)


ValueError: Expected parameter scale (Tensor of shape (32,)) of distribution Normal(loc: torch.Size([32]), scale: torch.Size([32])) to satisfy the constraint GreaterThan(lower_bound=0.0), but found invalid values:
tensor([-0.0888, -0.1067,  0.1868, -0.0179, -0.0805, -0.0762, -0.0806,  0.0584,
        -0.1148,  0.1615, -0.1582, -0.1890,  0.0938, -0.1230,  0.1164, -0.0443,
        -0.0380,  0.2295,  0.0536,  0.0275, -0.0710,  0.1306,  0.1789,  0.0417,
        -0.0263,  0.2756,  0.1133, -0.0234, -0.0112, -0.1482, -0.0625,  0.0670])

In [26]:
xi = torch.rand([10])
p_xz = torch.distributions.Normal(xi,xi)


In [36]:
mu.data

tensor([ 0.0690, -0.0463, -0.0693,  0.0466,  0.1630, -0.1311,  0.1725, -0.0606,
         0.0797, -0.1888, -0.1515, -0.0336,  0.1242,  0.0949,  0.1362, -0.0273,
        -0.0293, -0.1463,  0.0584, -0.0516,  0.0967,  0.2002,  0.1508,  0.0385,
         0.0708, -0.0302, -0.0302, -0.1748, -0.0062, -0.1858, -0.2742,  0.0707])

RuntimeError: tensor.H is only supported on matrices (2-D tensors). Got 1-D tensor.