In [1]:
from __future__ import print_function
import torch
import pandas as pd
import pickle
import matplotlib.pyplot as plt
import numpy as np
import os
import torch.utils.data
import torch.nn.init as init
from torch import nn, optim
from torch.autograd import Variable
from torch.nn import functional as F
from torchvision import datasets, transforms
from sklearn.preprocessing import MinMaxScaler
from sklearn.preprocessing import StandardScaler
import VAE

print( 'torch:', torch.__version__, 
      'cuda:', torch.cuda.is_available())

torch: 1.3.0+cpu cuda: False


## Load & Re-Scale Data Sets

In [2]:
path = "train_2_1000Samples.csv"
train_data = pd.read_csv(path).iloc[:,1:]
test_data = pd.read_csv(path[:-42]+str('Test_Data_Sets\\test_2_200Samples.csv')).iloc[:,1:]
train_data = train_data.iloc[:,:-1]
test_data = test_data.iloc[:,:-1]
train_data.columns  = test_data.columns
cols = test_data.columns
scalar = MinMaxScaler().fit(pd.concat([train_data, test_data]))
train_data = pd.DataFrame(scalar.transform (train_data)) 
test_data = pd.DataFrame(scalar.transform (test_data)) 
train_data.columns = cols
test_data.columns = cols

## Fix the Global Variables for Learning

In [3]:
#hyperparameters
x_dim = train_data.shape[1]
h_dim = 250
z_dim = 25
n_epochs = 40
clip = 10
learning_rate = 1e-3
batch_size = 10
seed = 100
print_every = 10
save_every = 10
#manual seed
torch.manual_seed(seed)
#init model + optimizer + datasets
train_loader = torch.utils.data.DataLoader ( dataset = train_data.values ,  batch_size = batch_size , shuffle= True)
test_loader = torch.utils.data.DataLoader (  dataset = test_data.values , shuffle= True)
model = VAE.VAE(x_dim, h_dim, z_dim)
optimizer = torch.optim.Adam(model.parameters(), lr=learning_rate)

## Train and Test the VAE

In [4]:
def train(epoch):
    train_loss = 0
    epoch_loss = np.zeros(int(len (train_data) / batch_size ))
    epoch_div = np.zeros(int(len (train_data) / batch_size))
    for batch_idx, (data) in enumerate(train_loader):
        
        data = Variable(data)
        #forward + backward + optimize
        optimizer.zero_grad()
        kld_loss, nll_loss, lat, recon, _ = model(data)
        epoch_loss [batch_idx] = nll_loss
        epoch_div [batch_idx] = kld_loss
        loss = kld_loss + nll_loss
        loss.backward()
        nn.utils.clip_grad_norm_(model.parameters(), clip)
        optimizer.step()
        #printing
        if batch_idx % print_every == 0:
            print('Train Epoch: {} [{}/{} ({:.0f}%)]\t KLD Loss: {:.6f} \t NLL Loss: {:.6f}'.format(
                epoch, batch_idx * len(data), len(train_loader.dataset),
                100. * batch_idx / len(train_loader),
                kld_loss.data / batch_size,
                nll_loss.data / batch_size))

            

        train_loss += loss.data
    print('====> Epoch: {} Average loss: {:.4f}'.format(
        epoch, train_loss / len(train_loader.dataset)))
    return epoch_loss, epoch_div
    
def test(epoch):
    """uses test data to evaluate 
    likelihood of the model"""
    mean_kld_loss, mean_nll_loss = 0, 0
    epoch_loss = np.zeros(len(test_data))
    epoch_div = np.zeros(len(test_data))
    for i, (data) in enumerate(test_loader):                                           
        
        data = Variable(data.reshape(1,-1))
        kld_loss, nll_loss, _, _, _ = model(data)
        epoch_div [i] = kld_loss
        epoch_loss [i] = nll_loss
        mean_kld_loss += kld_loss.data
        mean_nll_loss += nll_loss.data

    mean_kld_loss /= len(test_loader.dataset)
    mean_nll_loss /= len(test_loader.dataset)

    print('====> Test set loss: KLD Loss = {:.4f}, NLL Loss = {:.4f} '.format(
        mean_kld_loss, mean_nll_loss))
    return epoch_loss, epoch_div

## Train and Test the Model

In [5]:
train_error = np.zeros([n_epochs , int(train_data.shape[0] / batch_size ) ])
train_div = np.zeros([n_epochs , int(train_data.shape[0] / batch_size ) ])
test_error , test_div  = np.zeros([n_epochs , test_data.shape[0]]) , np.zeros([n_epochs , test_data.shape[0]]) 
for epoch in range(1, n_epochs + 1):
    #training + testing
    tr = train(epoch)
    train_error [epoch-1 , :] = tr [0]
    train_div [epoch-1 , :] = tr [1] 
    te = test(epoch)
    test_error [epoch-1 , :] = te [0]
    test_div [epoch-1 , :] = te [1]

====> Epoch: 1 Average loss: 19.9136
====> Test set loss: KLD Loss = 1.3069, NLL Loss = 8.9413 
====> Epoch: 2 Average loss: 7.4871
====> Test set loss: KLD Loss = 0.9885, NLL Loss = 3.9383 
====> Epoch: 3 Average loss: 1.8163
====> Test set loss: KLD Loss = 1.1866, NLL Loss = -1.3112 
====> Epoch: 4 Average loss: -3.6000
====> Test set loss: KLD Loss = 1.1380, NLL Loss = -4.9445 
====> Epoch: 5 Average loss: -8.4720
====> Test set loss: KLD Loss = 1.2008, NLL Loss = -10.3660 
====> Epoch: 6 Average loss: -12.5371
====> Test set loss: KLD Loss = 1.2502, NLL Loss = -11.9232 
====> Epoch: 7 Average loss: -14.9045
====> Test set loss: KLD Loss = 1.2856, NLL Loss = -13.9402 
====> Epoch: 8 Average loss: -17.5535
====> Test set loss: KLD Loss = 1.2601, NLL Loss = -17.1098 
====> Epoch: 9 Average loss: -18.7450
====> Test set loss: KLD Loss = 1.2844, NLL Loss = -18.9730 
====> Epoch: 10 Average loss: -19.4608
====> Test set loss: KLD Loss = 1.3105, NLL Loss = -20.6077 
====> Epoch: 11 Averag

====> Epoch: 20 Average loss: -32.9350
====> Test set loss: KLD Loss = 1.3555, NLL Loss = -33.3746 
====> Epoch: 21 Average loss: -33.2625
====> Test set loss: KLD Loss = 1.2863, NLL Loss = -32.8235 
====> Epoch: 22 Average loss: -34.8845
====> Test set loss: KLD Loss = 1.3321, NLL Loss = -36.9072 
====> Epoch: 23 Average loss: -34.9613
====> Test set loss: KLD Loss = 1.3081, NLL Loss = -33.2274 
====> Epoch: 24 Average loss: -35.0146
====> Test set loss: KLD Loss = 1.3327, NLL Loss = -33.3541 
====> Epoch: 25 Average loss: -36.1935
====> Test set loss: KLD Loss = 1.3257, NLL Loss = -36.8693 
====> Epoch: 26 Average loss: -36.2960
====> Test set loss: KLD Loss = 1.2972, NLL Loss = -37.5001 
====> Epoch: 27 Average loss: -37.4017
====> Test set loss: KLD Loss = 1.2922, NLL Loss = -39.2005 
====> Epoch: 28 Average loss: -38.0909
====> Test set loss: KLD Loss = 1.2811, NLL Loss = -34.1815 
====> Epoch: 29 Average loss: -38.2211
====> Test set loss: KLD Loss = 1.2803, NLL Loss = -36.1868 


====> Epoch: 39 Average loss: -40.4139
====> Test set loss: KLD Loss = 1.2250, NLL Loss = -40.2917 
====> Epoch: 40 Average loss: -39.6547
====> Test set loss: KLD Loss = 1.2097, NLL Loss = -40.0745 


## Get Latent Representations and Save the Model for later Use

In [6]:
train_lat = [ model (Variable(torch.tensor(train_data.iloc[idx,:].values)).reshape(1,-1))[-1] for idx in range(len(train_data)) ]
test_lat = [ model (Variable(torch.tensor(test_data.iloc[idx,:].values)).reshape(1,-1))[-1] for idx in range(len(test_data)) ]
train_lat = pd.DataFrame(torch.cat(train_lat).cpu().detach().numpy())
test_lat = pd.DataFrame(torch.cat(test_lat).cpu().detach().numpy())
cols = []
for i in range(train_lat.shape[1]):
    cols.append(str('Z'+str(i+1)))
train_lat.columns = cols
test_lat.columns = cols
train_lat.to_csv('Training_Data_Sets\\latent_50D.csv')
test_lat.to_csv('Test_Data_Sets\\latent_50D.csv')
with open('Models\\50D.pickle', 'wb') as handle:
    pickle.dump(model, handle, protocol=pickle.HIGHEST_PROTOCOL)

In [16]:
array = np.zeros((10000,25))
for i in range(len(array)):
    array [i] = model(Variable(torch.tensor(np.zeros(50))))[-1].cpu().detach().numpy()
array1 = np.zeros((10000,25))
for i in range(len(array)):
    array1 [i] = model(Variable(torch.tensor(np.ones(50))))[-1].cpu().detach().numpy()

In [17]:
np.mean(abs(np.mean(array1, 0)-np.mean(array, 0)))/4

0.12781853826622014