In [None]:
from data import DATABASE_CSV, PREPARED_TRAIN_DATABASE_CSV, PREPARED_TEST_DATABASE_CSV
import pandas as pd
import numpy as np

# Loading Data

In [None]:
X_train_prepared = np.loadtxt(PREPARED_TRAIN_DATABASE_CSV, delimiter=",")

# Define Vanilla AutoEncoder

In [None]:
from torch import nn, optim
from torch.nn import functional as F
import torch

class FcEncoder(nn.Module):
    def __init__(self, input_size, embedding_size):
        super(FcEncoder, self).__init__()

        hidden_size = int(embedding_size/2)
        self.fc = nn.Linear(input_size, hidden_size)
        self.h = nn.Linear(hidden_size, embedding_size)

    def forward(self, x):
        z = F.relu(self.fc(x))
        return F.relu(self.h(z))


class FcDecoder(nn.Module):
    def __init__(self, embedding_size, output_size):
        super(FcDecoder, self).__init__()

        hidden_size = int(embedding_size/2)

        self.h = nn.Linear(embedding_size, hidden_size)
        self.fc = nn.Linear(hidden_size, output_size)

    def forward(self, x):
        z = F.relu(self.h(x))
        return self.fc(z)

class AE(nn.Module):
    def __init__(self, input_size, embedding_size):
        super().__init__()

        self.encoder = FcEncoder(input_size = input_size, embedding_size=embedding_size)
        self.decoder = FcDecoder(embedding_size=embedding_size, output_size=input_size)
    

    def encode(self, x):
        return self.encoder(x)

    def decode(self, x):
        return self.decoder(x)
    
    def forward(self, x):
        z = self.encode(x)
        return self.decode(z)


In [None]:
input_dim = X_train_prepared.shape[1]
model = AE(input_size=input_dim, embedding_size=30)

# Adam optimizer with learning rate 1e-3
optimizer = optim.Adam(model.parameters(), lr=1e-3)

# mean-squared error loss
criterion = nn.MSELoss()

In [None]:
epochs = 10
train_loader = torch.utils.data.DataLoader(
    X_train_prepared, batch_size=128, shuffle=True, num_workers=4, pin_memory=True
)
for epoch in range(epochs):
    loss = 0
    for batch_features in train_loader:
        # reset the gradients back to zero
        # PyTorch accumulates gradients on subsequent backward passes
        optimizer.zero_grad()
        
        batch_features = batch_features.type(torch.float32)
        # compute reconstructions
        outputs = model(batch_features)
        
        # compute training reconstruction loss
        train_loss = criterion(outputs, batch_features)
        
        # compute accumulated gradients
        train_loss.backward()
        
        # perform parameter update based on current gradients
        optimizer.step()
        
        # add the mini-batch training loss to epoch loss
        loss += train_loss.item()
    
    # compute the epoch training loss
    loss = loss / len(train_loader)
    
    # display the epoch training loss
    print("epoch : {}/{}, loss = {:.6f}".format(epoch + 1, epochs, loss))

In [None]:
X_tiny = model.encode(torch.Tensor(X_train_prepared))
X_tiny[1] == X_tiny[1000]

# Save

In [None]:
np.savetxt("????????", model.encode(torch.Tensor(X_train_prepared)), delimiter=",")