In [2]:
import torch
import torch.nn as nn
import torch.optim as optim
import numpy as np
from sklearn.model_selection import train_test_split
import os
import pandas

In [3]:
data = np.random.rand(1000, 50)
data_tensor = torch.tensor(data, dtype=torch.float32)
train_data, test_data = train_test_split(data, test_size=0.2, random_state=42)
train_data_tensor = torch.tensor(train_data, dtype=torch.float32)
test_data_tensor = torch.tensor(test_data, dtype=torch.float32)

In [4]:
class Autoencoder(nn.Module):
    def __init__(self):
        super(Autoencoder, self).__init__()

        # Encoder
        self.encoder = nn.Sequential(
            nn.Linear(50, 20),
            nn.ReLU(),
            nn.Linear(20, 3)
        )

        # Decoder
        self.decoder = nn.Sequential(
            nn.Linear(3, 20),
            nn.ReLU(),
            nn.Linear(20, 50)
        )

    def forward(self, x):
        x = self.encoder(x)
        x = self.decoder(x)
        return x

In [5]:
class VAE(nn.Module):
    def __init__(self, input_dim=50, hidden_dim=20, z_dim=3):
        super(VAE, self).__init__()
        
        self.encoder = nn.Sequential(
            nn.Linear(input_dim, hidden_dim),
            nn.ReLU(),
            nn.Linear(hidden_dim, z_dim * 2)
        )
        
        self.decoder = nn.Sequential(
            nn.Linear(z_dim, hidden_dim),
            nn.ReLU(),
            nn.Linear(hidden_dim, input_dim)
        )
    
    def reparameterize(self, mu, logvar):
        std = torch.exp(0.5 * logvar)
        eps = torch.randn_like(std)
        return mu + eps * std
        
    def forward(self, x):
        h = self.encoder(x)
        mu, logvar = h.chunk(2, dim=1)
        z = self.reparameterize(mu, logvar)
        x_reconstructed = self.decoder(z)
        return x_reconstructed, mu, logvar

In [7]:
def vae_loss(x, x_reconstructed, mu, logvar):
    reconstruction_loss = nn.MSELoss(reduction='sum')(x_reconstructed, x)
    kl_divergence = -0.5 * torch.sum(1 + logvar - mu.pow(2) - logvar.exp())
    return reconstruction_loss + kl_divergence

In [9]:
device = torch.device("cuda" if torch.cuda.is_available() else "cpu")
model = Autoencoder().to(device)
criterion = nn.MSELoss()
optimizer = optim.Adam(model.parameters(), lr=0.001)

num_epochs = 100
batch_size = 32

for epoch in range(num_epochs):
    for i in range(0, len(train_data_tensor), batch_size):
        batch = train_data_tensor[i:i+batch_size].to(device)
        output = model(batch)
        loss = criterion(output, batch)

        optimizer.zero_grad()
        loss.backward()
        optimizer.step()

    # Evaluation
    model.eval()
    with torch.no_grad():
        test_output = model(test_data_tensor.to(device))
        test_loss = criterion(test_output, test_data_tensor.to(device))
    model.train()

    print(f'Epoch [{epoch + 1}/{num_epochs}], Train Loss: {loss.item():.4f}, Test Loss: {test_loss.item():.4f}')

Epoch [1/100], Train Loss: 0.2140, Test Loss: 0.2062
Epoch [2/100], Train Loss: 0.1206, Test Loss: 0.1186
Epoch [3/100], Train Loss: 0.0895, Test Loss: 0.0882
Epoch [4/100], Train Loss: 0.0835, Test Loss: 0.0818
Epoch [5/100], Train Loss: 0.0825, Test Loss: 0.0811
Epoch [6/100], Train Loss: 0.0822, Test Loss: 0.0809
Epoch [7/100], Train Loss: 0.0820, Test Loss: 0.0808
Epoch [8/100], Train Loss: 0.0818, Test Loss: 0.0806
Epoch [9/100], Train Loss: 0.0815, Test Loss: 0.0805
Epoch [10/100], Train Loss: 0.0812, Test Loss: 0.0803
Epoch [11/100], Train Loss: 0.0809, Test Loss: 0.0801
Epoch [12/100], Train Loss: 0.0805, Test Loss: 0.0798
Epoch [13/100], Train Loss: 0.0802, Test Loss: 0.0796
Epoch [14/100], Train Loss: 0.0798, Test Loss: 0.0793
Epoch [15/100], Train Loss: 0.0793, Test Loss: 0.0790
Epoch [16/100], Train Loss: 0.0789, Test Loss: 0.0787
Epoch [17/100], Train Loss: 0.0785, Test Loss: 0.0785
Epoch [18/100], Train Loss: 0.0781, Test Loss: 0.0782
Epoch [19/100], Train Loss: 0.0778, T

In [10]:
#VAE Training
VAE_model = VAE().to(device)
num_epochs = 100
batch_size = 32

for epoch in range(num_epochs):
    for i in range(0, len(train_data_tensor), batch_size):
        batch = train_data_tensor[i:i+batch_size].to(device)
        x_reconstructed, mu, logvar = VAE_model(batch)
        loss = vae_loss(batch, x_reconstructed, mu, logvar)
        
        optimizer.zero_grad()
        loss.backward()
        optimizer.step()
    
    print(f'Epoch [{epoch + 1}/{num_epochs}], Loss: {loss.item():.4f}')

Epoch [1/100], Loss: 595.5386
Epoch [2/100], Loss: 609.1243
Epoch [3/100], Loss: 623.9763
Epoch [4/100], Loss: 619.3960
Epoch [5/100], Loss: 603.5135
Epoch [6/100], Loss: 616.0740
Epoch [7/100], Loss: 627.4708
Epoch [8/100], Loss: 602.8265
Epoch [9/100], Loss: 601.7139
Epoch [10/100], Loss: 587.5834
Epoch [11/100], Loss: 600.5737
Epoch [12/100], Loss: 628.3588
Epoch [13/100], Loss: 596.4462
Epoch [14/100], Loss: 575.5097
Epoch [15/100], Loss: 614.1708
Epoch [16/100], Loss: 567.0222
Epoch [17/100], Loss: 602.0323
Epoch [18/100], Loss: 594.1666
Epoch [19/100], Loss: 619.6693
Epoch [20/100], Loss: 580.0005
Epoch [21/100], Loss: 610.9658
Epoch [22/100], Loss: 645.8005
Epoch [23/100], Loss: 587.4895
Epoch [24/100], Loss: 612.5948
Epoch [25/100], Loss: 607.5726
Epoch [26/100], Loss: 620.2687
Epoch [27/100], Loss: 636.8727
Epoch [28/100], Loss: 602.3323
Epoch [29/100], Loss: 621.6059
Epoch [30/100], Loss: 632.4106
Epoch [31/100], Loss: 659.6248
Epoch [32/100], Loss: 651.7289
Epoch [33/100], L

In [7]:
with torch.no_grad():
    features = model.encoder(data_tensor.to(device)).cpu().numpy()

In [22]:
from sklearn.decomposition import PCA

pca = PCA(n_components=3)
reduced_data_pca = pca.fit_transform(data)

In [23]:
# Reconstruct data using PCA
reconstructed_data_pca = pca.inverse_transform(reduced_data_pca)

# Reconstruct data using AutoEncoder
with torch.no_grad():
    reconstructed_data_autoencoder = model(data_tensor.to(device)).cpu().numpy()

# mse
mse_pca = np.mean((data - reconstructed_data_pca) ** 2)
mse_autoencoder = np.mean((data - reconstructed_data_autoencoder) ** 2)

print(f'MSE for PCA: {mse_pca:.4f}')
print(f'MSE for Autoencoder: {mse_autoencoder:.4f}')

MSE for PCA: 0.0764
MSE for Autoencoder: 0.0773


In [24]:
VAE_model.eval()
with torch.no_grad():
    test_reconstructed, _, _ = VAE_model(test_data_tensor.to(device))
    test_reconstructed = test_reconstructed.cpu().numpy()
    mse = np.mean((test_data - test_reconstructed) ** 2)

print(f'Test MSE: {mse:.4f}')

Test MSE: 0.3665


In [11]:
#Save Model
model_folder = "saved_models"
model_filename = "autoencoder.pth"

# if not exsit
if not os.path.exists(model_folder):
    os.makedirs(model_folder)

model_path = os.path.join(model_folder, model_filename)

# save model
torch.save(model.state_dict(), model_path)