In [None]:
import numpy as np
import torch
import torch.nn as nn
import torch.optim as optim
from torch.utils.data import DataLoader, TensorDataset

# Generate a synthetic dataset with missing values
# For simplicity, we'll use a 1D dataset with some missing values
np.random.seed(0)
original_data = np.random.randn(100, 1)
missing_mask = np.random.choice([0, 1], size=original_data.shape, p=[0.2, 0.8])
data_with_missing = original_data * missing_mask

# Define the VAE model for data imputation
latent_dim = 2

# Encoder
class Encoder(nn.Module):
    def __init__(self):
        super(Encoder, self).__init__()
        self.fc1 = nn.Linear(1, 32)
        self.fc_mean = nn.Linear(32, latent_dim)
        self.fc_log_var = nn.Linear(32, latent_dim)

    def forward(self, x):
        x = torch.relu(self.fc1(x))
        mean = self.fc_mean(x)
        log_var = self.fc_log_var(x)
        return mean, log_var

# Decoder
class Decoder(nn.Module):
    def __init__(self):
        super(Decoder, self).__init__()
        self.fc1 = nn.Linear(latent_dim, 32)
        self.fc2 = nn.Linear(32, 1)

    def forward(self, x):
        x = torch.relu(self.fc1(x))
        x = self.fc2(x)
        return x

# VAE
class VAE(nn.Module):
    def __init__(self):
        super(VAE, self).__init__()
        self.encoder = Encoder()
        self.decoder = Decoder()

    def forward(self, x):
        mean, log_var = self.encoder(x)
        std = torch.exp(0.5 * log_var)
        epsilon = torch.randn_like(std)
        z = mean + epsilon * std
        reconstructed = self.decoder(z)
        return reconstructed, mean, log_var

# Define VAE loss function
def vae_loss(reconstructed, x, mean, log_var):
    mse_loss = nn.functional.mse_loss(reconstructed, x, reduction='sum')
    kl_loss = -0.5 * torch.sum(1 + log_var - mean.pow(2) - log_var.exp())
    return mse_loss + 0.1 * kl_loss  # You can adjust the weight of the KL divergence term

# Convert data to PyTorch tensors
data_with_missing = torch.tensor(data_with_missing, dtype=torch.float32)

# Create DataLoader
batch_size = 32
data_loader = DataLoader(TensorDataset(data_with_missing), batch_size=batch_size, shuffle=True)

# Initialize the VAE model and optimizer
vae = VAE()
optimizer = optim.Adam(vae.parameters())

# Training loop
num_epochs = 100
for epoch in range(num_epochs):
    total_loss = 0
    for batch in data_loader:
        optimizer.zero_grad()
        x = batch[0]
        reconstructed, mean, log_var = vae(x)
        loss = vae_loss(reconstructed, x, mean, log_var)
        loss.backward()
        optimizer.step()
        total_loss += loss.item()

    print(f"Epoch [{epoch+1}/{num_epochs}] Loss: {total_loss / len(data_with_missing)}")

# Perform data imputation
imputed_data = vae(data_with_missing)[0].detach().numpy()

# Print some results
print("Original Data:")
print(original_data[:10])
print("Data with Missing Values:")
print(data_with_missing[:10].detach().numpy())
print("Imputed Data:")
print(imputed_data[:10])


Epoch [1/100] Loss: 0.9524822425842285
Epoch [2/100] Loss: 0.9198378562927246
Epoch [3/100] Loss: 0.9081756210327149
Epoch [4/100] Loss: 0.8561338877677918
Epoch [5/100] Loss: 0.8490874099731446
Epoch [6/100] Loss: 0.849484293460846
Epoch [7/100] Loss: 0.8794067668914795
Epoch [8/100] Loss: 0.8170731163024902
Epoch [9/100] Loss: 0.8352644538879395
Epoch [10/100] Loss: 0.8223301029205322
Epoch [11/100] Loss: 0.7549737393856049
Epoch [12/100] Loss: 0.770291531085968
Epoch [13/100] Loss: 0.768078688532114
Epoch [14/100] Loss: 0.7691821533441544
Epoch [15/100] Loss: 0.692395088672638
Epoch [16/100] Loss: 0.7302446037530899
Epoch [17/100] Loss: 0.7131495821475983
Epoch [18/100] Loss: 0.7065990990400315
Epoch [19/100] Loss: 0.6261398273706437
Epoch [20/100] Loss: 0.6398012076318264
Epoch [21/100] Loss: 0.627301561832428
Epoch [22/100] Loss: 0.6232863807678223
Epoch [23/100] Loss: 0.5686908796429634
Epoch [24/100] Loss: 0.5270124864578247
Epoch [25/100] Loss: 0.5405384069681167
Epoch [26/100]