In [6]:
import pandas as pd
import numpy as np
from sklearn.preprocessing import StandardScaler

In [11]:
train_csn = pd.read_csv("/hpc/home/js1207/sparkECMO/Adult ECMO RL/train_data_continuous.csv")
train_csn = train_csn.csn.unique()
data = pd.read_csv("non_discritized_states.csv",index_col=0)
train_data = data[data['csn'].isin(train_csn)]

train_data.reset_index(drop=True, inplace=True)
train_data.drop(columns=['csn'], inplace=True)

scaler = StandardScaler()
train_data = scaler.fit_transform(train_data)


A value is trying to be set on a copy of a slice from a DataFrame

See the caveats in the documentation: https://pandas.pydata.org/pandas-docs/stable/user_guide/indexing.html#returning-a-view-versus-a-copy
  train_data.drop(columns=['csn'], inplace=True)


In [14]:
import torch
import torch.nn as nn
import torch.optim as optim
import torch.utils.data as data
import numpy as np
from model import MLPVAE  # Import the model

# Load NumPy Training Data
train_tensor = torch.tensor(train_data[:, :42], dtype=torch.float32)

# Create Dataset & DataLoader
class TabularDataset(data.Dataset):
    def __init__(self, tensor):
        self.tensor = tensor
    def __getitem__(self, idx):
        return self.tensor[idx]
    def __len__(self):
        return self.tensor.shape[0]

dataset = TabularDataset(train_tensor)
dataloader = data.DataLoader(dataset, batch_size=64, shuffle=True)

# Initialize Model & Optimizer
model = MLPVAE(input_dim=42, hidden_dim=64, latent_dim=16)
optimizer = optim.Adam(model.parameters(), lr=5e-4, weight_decay=1e-4)
scheduler = optim.lr_scheduler.CosineAnnealingLR(optimizer, T_max=50)

# Early Stopping Parameters
patience = 10  # Stop after 10 epochs of no improvement
best_loss = float("inf")
epochs_no_improve = 0

# VAE Loss Function
def vae_loss(recon_x, x, mu, logvar, beta=1.0):
    recon_loss = nn.MSELoss()(recon_x, x)  # Use MSE
    kld = -0.5 * torch.sum(1 + logvar - mu.pow(2) - logvar.exp())
    kld = torch.clamp(kld, min=0.1)  # Prevent KL collapse
    return recon_loss + beta * kld, recon_loss, kld

# Training Loop with Early Stopping
num_epochs = 100
model.train()

for epoch in range(num_epochs):
    total_loss = 0
    total_recon_loss = 0
    total_kld = 0
    num_batches = 0

    # **Cyclical KL Annealing Schedule**
    cycle = 50
    beta = min(1.0, (epoch % cycle) / cycle)

    for batch in dataloader:
        optimizer.zero_grad()
        recon, mu, logvar = model(batch)
        loss, recon_loss, kld = vae_loss(recon, batch, mu, logvar, beta=beta)

        loss.backward()
        torch.nn.utils.clip_grad_norm_(model.parameters(), max_norm=1.0)  # Gradient Clipping
        optimizer.step()

        total_loss += loss.item()
        total_recon_loss += recon_loss.item()
        total_kld += kld.item()
        num_batches += 1

    avg_loss = total_loss / num_batches
    avg_recon_loss = total_recon_loss / num_batches
    avg_kld = total_kld / num_batches

    scheduler.step()  # Update learning rate

    print(f"Epoch {epoch+1}/{num_epochs}, Loss: {avg_loss:.4f}, Recon: {avg_recon_loss:.4f}, KL: {avg_kld:.4f} (Beta={beta:.2f})")

    # Early Stopping Logic
    if avg_loss < best_loss:
        best_loss = avg_loss
        epochs_no_improve = 0  # Reset counter
        torch.save(model.state_dict(), "best_mlp_vae.pth")  # Save best model
    else:
        epochs_no_improve += 1  # Increment if no improvement

    if epochs_no_improve >= patience:
        print(f"Early stopping triggered at epoch {epoch+1}.")
        break  # Stop training

# Final Model Save
torch.save(model.state_dict(), "mlp_vae.pth")


Epoch 1/100, Loss: 0.9264, Recon: 0.9264, KL: 375.0389 (Beta=0.00)
Epoch 2/100, Loss: 3.2743, Recon: 0.9932, KL: 114.0533 (Beta=0.02)
Epoch 3/100, Loss: 1.1111, Recon: 1.0255, KL: 2.1406 (Beta=0.04)
Epoch 4/100, Loss: 1.0446, Recon: 1.0121, KL: 0.5413 (Beta=0.06)
Epoch 5/100, Loss: 1.0237, Recon: 1.0091, KL: 0.1830 (Beta=0.08)
Epoch 6/100, Loss: 1.0204, Recon: 1.0100, KL: 0.1044 (Beta=0.10)
Epoch 7/100, Loss: 1.0179, Recon: 1.0058, KL: 0.1008 (Beta=0.12)
Epoch 8/100, Loss: 1.0182, Recon: 1.0041, KL: 0.1011 (Beta=0.14)
Epoch 9/100, Loss: 1.0202, Recon: 1.0040, KL: 0.1009 (Beta=0.16)
Epoch 10/100, Loss: 1.0221, Recon: 1.0040, KL: 0.1008 (Beta=0.18)
Epoch 11/100, Loss: 1.0232, Recon: 1.0030, KL: 0.1009 (Beta=0.20)
Early stopping triggered at epoch 11.


In [13]:
torch.save(model, "mlp_vae.pth")

In [61]:
model = torch.load("mlp_vae.pth")
model.eval()  # Set to evaluation mode if not training

import torch

def generate_synthetic_data(model, sample, column_idx, new_value):
    """
    Modifies a specific column in the input and generates a synthetic output.
    
    :param model: Trained MLPVAE model
    :param sample: A single input sample (1D tensor)
    :param column_idx: Index of the column to modify
    :param new_value: New value to assign to the column
    :return: Generated output with modified column
    """
    model.eval()
    
    # Convert to batch format (1 sample)
    sample = sample.clone().unsqueeze(0)  # Shape (1, input_dim)
    
    # Encode to latent space
    with torch.no_grad():
        mu, logvar = model.encode(sample)
        z = model.reparameterize(mu, logvar)

    # Modify the column in latent space (alternative: modify directly in input)
    modified_sample = sample.clone()
    modified_sample[0, column_idx] = new_value  # Change the specified column

    # Re-encode after modification
    with torch.no_grad():
        new_mu, new_logvar = model.encode(modified_sample)
        new_z = model.reparameterize(new_mu, new_logvar)

    # Decode back to see changes
    generated_output = model.decode(new_z)

    return generated_output.squeeze().detach().numpy()  # Convert back to NumPy for easier analysis

# Example usage:
sample_idx = 0  # Pick any row from your dataset
sample_data = train_tensor[sample_idx]  # Original sample

column_to_change = 5  # Example: Modify column 5
new_value = 2.0  # New value to assign

synthetic_output = generate_synthetic_data(model, sample_data, column_to_change, new_value)

print("Modified synthetic output:", synthetic_output)


Modified synthetic output: [-1.1699022   0.42113316  0.46528575  0.38823992  0.3147081  -0.24146605
  1.0364234   0.01297785  0.03269807 -0.16800848 -0.03724345  0.23910508
 -0.01513278  1.3327388  -0.64124554  1.1138294   0.54673445 -1.0044123
 -0.08894157 -0.16174182 -0.3544161  -0.11003835  1.2203125   0.14217015
 -0.36982304 -0.2851519   1.8047109  -0.02481666  0.06410962 -0.6459687
  0.07594274  0.05963591 -0.62476987 -0.12943527  0.35917825 -0.95582557
  0.5780245  -0.17491864 -0.79985195 -0.08835298 -0.7651638  -0.874994
  1.542209  ]


  model = torch.load("mlp_vae.pth")


In [33]:
train_csn

array([   43640203,  2178911314,  2207761225,  4510511039,  6648317299,
        7270121193, 10017307254, 10057670084, 10243791111, 11197349249,
       11367061032, 11598700132, 12128379018, 12156649330, 14719067105,
       15668590152, 15738357227, 16251981187, 17249721021, 19073406214,
       19085650202, 19330760302, 19657017003, 31978091155, 32313439276,
       36950570009, 38450329331, 39299217144, 42008659061, 42989860004,
       43323090183, 43917519266, 43946031211, 44991746336, 45457761150,
       46056470115, 46429729018, 48783979065, 48791437223, 48994390085,
       51299946263, 52278286092, 52461796114, 53029789123, 53045406183,
       53096266192, 53243526208, 53393046226, 53467166235, 53476486236,
       54565637009, 54705307026, 55395459196, 55403120241, 55511607117,
       55953247177, 56063101211, 56258940008, 56831407291, 57284130211,
       57642960169, 58928481104, 60780209038, 60845329119, 60879489278,
       60956019056, 61200959078, 61260430112, 61391379093, 62369

In [28]:
sparkECMO/Adult ECMO RL/train_data_continuous.csn

SyntaxError: invalid syntax (2854671802.py, line 1)