Import files

In [17]:
import os
import numpy as np
import pandas as pd
import torch

def read_file(file_path):
    data = []
    with open(file_path, 'r') as f:
        for line in f:
            mass, intensity = line.strip().split('\t')
            data.append([float(mass), float(intensity)])
    return np.array(data)

directory = r'C:\Users\Marshall\TEST DATA\TEST DATA\DalbergiaLatifolia'
file_paths = [os.path.join(directory, f) for f in os.listdir(directory) if f.endswith('.txt')]
device = torch.device("cuda" if torch.cuda.is_available() else "cpu")

all_data = []
for file_path in file_paths:
    data = pd.read_csv(file_path,sep= '\t',header = 1)
    all_data.append(data)

all_data = np.concatenate(all_data, axis=0)

# Normalize the data
means = np.mean(all_data, axis=0)
stds = np.std(all_data, axis=0)
normalized_data = (all_data - means) / stds

data_tensor = torch.from_numpy(normalized_data).float()
data_tensor = data_tensor.to(device)

VAE setup

In [18]:
import torch
import torch.nn as nn
import torch.nn.functional as F

# Define the VAE model
class VAE(nn.Module):
    def __init__(self, input_dim, latent_dim):
        super(VAE, self).__init__()
        self.input_dim = input_dim
        self.latent_dim = latent_dim

        # Encoder
        self.encoder = nn.Sequential(
            nn.Linear(input_dim, 512),
            nn.ReLU(),
            nn.Linear(512, 256),
            nn.ReLU(),
        )
        self.mu = nn.Linear(256, latent_dim)
        self.log_var = nn.Linear(256, latent_dim)

        # Decoder
        self.decoder = nn.Sequential(
            nn.Linear(latent_dim, 256),
            nn.ReLU(),
            nn.Linear(256, 512),
            nn.ReLU(),
            nn.Linear(512, input_dim),
        )

    def encode(self, x):
        h = self.encoder(x)
        mu = self.mu(h)
        log_var = self.log_var(h)
        return mu, log_var

    def reparameterize(self, mu, log_var):
        std = torch.exp(0.5 * log_var)
        eps = torch.randn_like(std)
        z = mu + eps * std
        return z

    def decode(self, z):
        return self.decoder(z)

    def forward(self, x):
        mu, log_var = self.encode(x)
        z = self.reparameterize(mu, log_var)
        x_recon = self.decode(z)
        return x_recon, mu, log_var

Training the VAE

In [19]:
# Instantiate the VAE model
input_dim = data.shape[1]
latent_dim = 16
vae = VAE(input_dim, latent_dim)
vae = vae.to(device)

# Define the loss function
reconstruction_loss = nn.MSELoss(reduction='sum')

def loss_function(x, x_recon, mu, log_var):
    bce = reconstruction_loss(x_recon, x)
    kl_div = -0.5 * torch.sum(1 + log_var - mu.pow(2) - log_var.exp())
    return bce + kl_div

# Define the optimizer
optimizer = torch.optim.Adam(vae.parameters(), lr=0.001)

# Training loop
num_epochs = 100
batch_size = 64

for epoch in range(num_epochs):
    for i in range(0, len(data_tensor), batch_size):
        batch = data_tensor[i:i+batch_size]
        optimizer.zero_grad()

        x_recon, mu, log_var = vae(batch)
        loss = loss_function(batch, x_recon, mu, log_var)

        loss.backward()
        optimizer.step()

    print(f"Epoch [{epoch+1}/{num_epochs}], Loss: {loss.item():.4f}")

Epoch [1/100], Loss: 102.6587
Epoch [2/100], Loss: 102.8241
Epoch [3/100], Loss: 102.3620
Epoch [4/100], Loss: 101.8976
Epoch [5/100], Loss: 101.4027
Epoch [6/100], Loss: 100.9417
Epoch [7/100], Loss: 100.5659
Epoch [8/100], Loss: 100.2648
Epoch [9/100], Loss: 100.0893
Epoch [10/100], Loss: 99.8088
Epoch [11/100], Loss: 100.2705
Epoch [12/100], Loss: 101.1461
Epoch [13/100], Loss: 101.2201
Epoch [14/100], Loss: 100.8800
Epoch [15/100], Loss: 101.1369
Epoch [16/100], Loss: 100.4984
Epoch [17/100], Loss: 100.5655
Epoch [18/100], Loss: 100.6574
Epoch [19/100], Loss: 100.5500
Epoch [20/100], Loss: 100.6231
Epoch [21/100], Loss: 100.4506
Epoch [22/100], Loss: 100.4644
Epoch [23/100], Loss: 100.2973
Epoch [24/100], Loss: 100.3886
Epoch [25/100], Loss: 100.1671
Epoch [26/100], Loss: 100.1372
Epoch [27/100], Loss: 100.2425
Epoch [28/100], Loss: 100.2427
Epoch [29/100], Loss: 100.9567
Epoch [30/100], Loss: 100.2307
Epoch [31/100], Loss: 100.1890
Epoch [32/100], Loss: 100.4566
Epoch [33/100], Lo

Create output

In [20]:
# Generate synthetic data
latent_samples = torch.randn(1000, latent_dim).to(device)
synthetic_data = vae.decode(latent_samples)

In [22]:
# Denormalize the synthetic data
synthetic_data = synthetic_data.cpu().detach().numpy()
synthetic_data = (synthetic_data * stds) + means

# Save the synthetic data
np.savetxt('synthetic_data.txt', synthetic_data, delimiter='\t')

# Visualize the synthetic data
import matplotlib.pyplot as plt

plt.figure(figsize=(10, 6))
plt.plot(synthetic_data[:, 0], synthetic_data[:, 1])
plt.xlabel('Mass')
plt.ylabel('Intensity')
plt.title('Synthetic Data')
plt.grid(True)
plt.show()

# Save the model
torch.save(vae.state_dict(), 'vae_model.pth')




AttributeError: 'numpy.ndarray' object has no attribute 'cpu'