In [3]:
import pandas as pd
import numpy as np
import torch
import torch.nn as nn
import torch.nn.functional as F
import torch.optim as optim
from sklearn.model_selection import train_test_split
import torch.utils.data as Data
import process_data
from sklearn.preprocessing import MaxAbsScaler

In [5]:
my_data=process_data.process('all_data.csv')
X,y=process_data.split_x_y(my_data)
X_s=MaxAbsScaler().fit_transform(X)
X_train,X_test,y_train,y_test=train_test_split(X_s,y,test_size=0.1)

In [6]:
class MyDataset():
 
  def __init__(self,x,y):

    self.x_train=torch.tensor(x,dtype=torch.float32)
    self.y_train=torch.tensor(y,dtype=torch.float32)
 
  def __len__(self):
    return len(self.y_train)
   
  def __getitem__(self,idx):
    return self.x_train[idx],self.y_train[idx]

In [7]:
train_set=MyDataset(X_train,y_train)
test_set=MyDataset(X_test,y_test)

In [8]:
n_samples=len(X_train)
n_features=my_data.shape[1]-1

In [9]:
train_loader=torch.utils.data.DataLoader(train_set,batch_size=32,shuffle=False)
test_loader=torch.utils.data.DataLoader(test_set,batch_size=32,shuffle=False)

In [10]:
class AutoEncoder(nn.Module):
    def __init__(self, input_dim, encoding_dim):
        super(AutoEncoder, self).__init__()
        self.encoder = nn.Linear(input_dim, encoding_dim)
        self.decoder = nn.Linear(encoding_dim, input_dim)

    def forward(self, x):
        encoded = F.relu(self.encoder(x))
        decoded = self.decoder(encoded)
        return decoded

In [11]:
input_dim = n_features
encoding_dim = 1000

model = AutoEncoder(input_dim, encoding_dim)
model = model
optimizer = optim.Adam(model.parameters())
loss_fn = torch.nn.MSELoss()

In [12]:
def train_model(model,loss_fn,data_loader=None,epochs=1,optimizer=None):
    for epoch in range(epochs):
        for batch_idx, (data, _) in enumerate(train_loader):
            data = data.view([-1, n_features])
            optimizer.zero_grad()
            output = model(data)
            loss = loss_fn(output, data)
            loss.backward()
            optimizer.step()
            if batch_idx % 100 == 0:
                print('Train Epoch: {} [{}/{} ({:.0f}%)]\tLoss: {:.6f}'.format(
                    epoch, batch_idx * len(data), len(data_loader.dataset),
                    100. * batch_idx / len(data_loader), loss.data.item()))

In [13]:
train_model(model, loss_fn, data_loader=train_loader, epochs=30, optimizer=optimizer)



In [96]:
# Hyper-parameters
input_dim = n_features
h_dim = 1000
z_dim = 100
num_epochs = 15
learning_rate = 1e-3

# VAE model
class VAE(nn.Module):
    def __init__(self, image_size=input_dim, h_dim=h_dim, z_dim=z_dim):
        super(VAE, self).__init__()
        self.fc1 = nn.Linear(image_size, h_dim)
        self.fc2 = nn.Linear(h_dim, z_dim)
        self.fc3 = nn.Linear(h_dim, z_dim)
        self.fc4 = nn.Linear(z_dim, h_dim)
        self.fc5 = nn.Linear(h_dim, image_size)
        
    def encode(self, x):
        h = F.relu(self.fc1(x))
        return self.fc2(h), self.fc3(h)
    
    def reparameterize(self, mu, log_var):
        std = torch.exp(log_var/2)
        eps = torch.randn_like(std)
        return mu + eps * std

    def decode(self, z):
        print(z)
        h = F.relu(self.fc4(z))
        print(h)
        return torch.sigmoid(self.fc5(h))
    
    def forward(self, x):
        mu, log_var = self.encode(x)
        z = self.reparameterize(mu, log_var)
        x_reconst = self.decode(z)
        return x_reconst, mu, log_var

model = VAE()
optimizer = torch.optim.Adam(model.parameters(), lr=learning_rate)

In [97]:
# Start training
for epoch in range(num_epochs):
    for i, (x, _) in enumerate(train_loader):
        # Forward pass
        x = x.view(-1, input_dim)
        x_reconst, mu, log_var = model(x)
        print(x_reconst)
        break
        # Compute reconstruction loss and kl divergence
        # For KL divergence between Gaussians, see Appendix B in VAE paper or (Doersch, 2016):
        # https://arxiv.org/abs/1606.05908

        reconst_loss = loss_fn(x,x_reconst)
        kl_div = - 0.5 * torch.sum(1 + log_var - mu.pow(2) - log_var.exp())
        
        # Backprop and optimize
        loss = reconst_loss + kl_div
        optimizer.zero_grad()
        loss.backward()
        optimizer.step()
        
        if (i+1) % 10 == 0:
            print ("Epoch[{}/{}], Step [{}/{}], Reconst Loss: {:.4f}, KL Div: {:.4f}" 
                   .format(epoch+1, num_epochs, i+1, len(train_loader), reconst_loss.item()/len(x), kl_div.item()/len(x)))

tensor([[ 2.9616e+01, -1.6557e+01, -1.2575e+02,  ...,  6.9948e+01,
         -4.4358e+11, -1.9275e+03],
        [ 5.8495e+01, -4.4476e+02,         inf,  ...,  5.7821e+01,
         -3.2536e+25,  3.0096e+01],
        [-1.1218e+02, -3.6154e+00, -2.0821e+02,  ..., -2.3523e+24,
          2.2601e+20, -1.2753e+02],
        ...,
        [-2.7893e+22, -6.3873e+02,  6.8929e+02,  ...,        -inf,
          1.1645e+02,  6.2630e+02],
        [-1.7975e+01, -1.9729e+02, -2.3129e+02,  ...,  6.8145e+14,
         -1.8031e+16, -2.1522e+01],
        [-3.7166e+01, -5.7598e+01, -3.6521e+15,  ...,  8.4709e+29,
         -1.1724e+34,  7.5912e+01]], grad_fn=<AddBackward0>)
tensor([[2.1793e+32, 0.0000e+00, 0.0000e+00,  ..., 0.0000e+00, 0.0000e+00,
         1.1361e+32],
        [       nan,        nan,        nan,  ...,        nan,        nan,
                nan],
        [       nan,        nan,        nan,  ...,        nan,        nan,
                nan],
        ...,
        [       nan,        nan,        