In [3]:
!pip install torch torchvision torchaudio

Collecting torch
  Downloading torch-2.7.0-cp312-cp312-win_amd64.whl.metadata (29 kB)
Collecting torchvision
  Downloading torchvision-0.22.0-cp312-cp312-win_amd64.whl.metadata (6.3 kB)
Collecting torchaudio
  Downloading torchaudio-2.7.0-cp312-cp312-win_amd64.whl.metadata (6.7 kB)
Collecting sympy>=1.13.3 (from torch)
  Downloading sympy-1.14.0-py3-none-any.whl.metadata (12 kB)
Downloading torch-2.7.0-cp312-cp312-win_amd64.whl (212.5 MB)
   ---------------------------------------- 0.0/212.5 MB ? eta -:--:--
   - -------------------------------------- 7.9/212.5 MB 44.2 MB/s eta 0:00:05
   ----- ---------------------------------- 26.7/212.5 MB 70.6 MB/s eta 0:00:03
   -------- ------------------------------- 44.8/212.5 MB 77.1 MB/s eta 0:00:03
   ------------ --------------------------- 64.0/212.5 MB 79.9 MB/s eta 0:00:02
   --------------- ------------------------ 82.6/212.5 MB 82.3 MB/s eta 0:00:02
   ------------------ -------------------- 102.2/212.5 MB 84.8 MB/s eta 0:00:02
   ----

In [27]:
import pandas as pd
from sklearn.preprocessing import MinMaxScaler
from torch import nn
from torch import optim

# Load your saved data
df = pd.read_csv('smote_tomek_data.csv')  
target_col = 'label' 

# Drop label column for unsupervised VAE
X = df.drop(columns=[target_col])

# Normalize features 
scaler = MinMaxScaler()
X_scaled = scaler.fit_transform(X)


import torch
from torch.utils.data import DataLoader, TensorDataset
from sklearn.model_selection import train_test_split

# Train/test split
X_train, X_val = train_test_split(X_scaled, test_size=0.2, random_state=42)
train_tensor = torch.tensor(X_train, dtype=torch.float32)
val_tensor = torch.tensor(X_val, dtype=torch.float32)

train_loader = DataLoader(TensorDataset(train_tensor), batch_size=256, shuffle=True)
val_loader = DataLoader(TensorDataset(val_tensor), batch_size=256)

# Define VAE
class VAE(nn.Module):
    def __init__(self, input_dim, latent_dim=10):
        super(VAE, self).__init__()
        self.encoder = nn.Sequential(
            nn.Linear(input_dim, 128),
            nn.ReLU(),
            nn.Linear(128, 64),
            nn.ReLU()
        )
        self.fc_mu = nn.Linear(64, latent_dim)
        self.fc_logvar = nn.Linear(64, latent_dim)

        self.decoder = nn.Sequential(
            nn.Linear(latent_dim, 64),
            nn.ReLU(),
            nn.Linear(64, 128),
            nn.ReLU(),
            nn.Linear(128, input_dim),
            nn.Sigmoid()
        )

    def reparameterize(self, mu, logvar):
        std = torch.exp(0.5 * logvar)
        eps = torch.randn_like(std)
        return mu + eps * std

    def forward(self, x):
        h = self.encoder(x)
        mu = self.fc_mu(h)
        logvar = self.fc_logvar(h)
        z = self.reparameterize(mu, logvar)
        return self.decoder(z), mu, logvar

def vae_loss(recon_x, x, mu, logvar):
    BCE = nn.functional.binary_cross_entropy(recon_x, x, reduction='sum')
    KLD = -0.5 * torch.sum(1 + logvar - mu.pow(2) - logvar.exp())
    return BCE + KLD

device = torch.device("cuda" if torch.cuda.is_available() else "cpu")
vae = VAE(input_dim=X_scaled.shape[1]).to(device)
optimizer = optim.Adam(vae.parameters(), lr=1e-3)
epochs = 10

# Train loop
for epoch in range(epochs):
    vae.train()
    train_loss = 0
    for batch in train_loader:
        data = batch[0].to(device)
        optimizer.zero_grad()
        recon, mu, logvar = vae(data)
        loss = vae_loss(recon, data, mu, logvar)
        loss.backward()
        train_loss += loss.item()
        optimizer.step()

    avg_loss = train_loss / len(train_loader.dataset)
    print(f"Epoch {epoch+1}, Train Loss: {train_loss:.2f}, Avg Loss per sample: {avg_loss:.4f}")

    
torch.save(vae.state_dict(), "smote_tomek_vae_from_csv.pth")


Epoch 1, Train Loss: 77157596.20, Avg Loss per sample: 7.8664
Epoch 2, Train Loss: 76032857.99, Avg Loss per sample: 7.7518
Epoch 3, Train Loss: 75914793.21, Avg Loss per sample: 7.7397
Epoch 4, Train Loss: 75874800.26, Avg Loss per sample: 7.7356
Epoch 5, Train Loss: 75853259.27, Avg Loss per sample: 7.7335
Epoch 6, Train Loss: 75836671.05, Avg Loss per sample: 7.7318
Epoch 7, Train Loss: 75803409.09, Avg Loss per sample: 7.7284
Epoch 8, Train Loss: 75790715.54, Avg Loss per sample: 7.7271
Epoch 9, Train Loss: 75788672.49, Avg Loss per sample: 7.7269
Epoch 10, Train Loss: 75784302.85, Avg Loss per sample: 7.7264


In [28]:
import pandas as pd
from sklearn.preprocessing import MinMaxScaler
from torch import nn
from torch import optim

# Load your saved data
df = pd.read_csv('smote_enn_data.csv')  
target_col = 'label' 

# Drop label column for unsupervised VAE
X = df.drop(columns=[target_col])

# Normalize features 
scaler = MinMaxScaler()
X_scaled = scaler.fit_transform(X)


import torch
from torch.utils.data import DataLoader, TensorDataset
from sklearn.model_selection import train_test_split

# Train/test split
X_train, X_val = train_test_split(X_scaled, test_size=0.2, random_state=42)
train_tensor = torch.tensor(X_train, dtype=torch.float32)
val_tensor = torch.tensor(X_val, dtype=torch.float32)

train_loader = DataLoader(TensorDataset(train_tensor), batch_size=256, shuffle=True)
val_loader = DataLoader(TensorDataset(val_tensor), batch_size=256)

# Define VAE
class VAE(nn.Module):
    def __init__(self, input_dim, latent_dim=10):
        super(VAE, self).__init__()
        self.encoder = nn.Sequential(
            nn.Linear(input_dim, 128),
            nn.ReLU(),
            nn.Linear(128, 64),
            nn.ReLU()
        )
        self.fc_mu = nn.Linear(64, latent_dim)
        self.fc_logvar = nn.Linear(64, latent_dim)

        self.decoder = nn.Sequential(
            nn.Linear(latent_dim, 64),
            nn.ReLU(),
            nn.Linear(64, 128),
            nn.ReLU(),
            nn.Linear(128, input_dim),
            nn.Sigmoid()
        )

    def reparameterize(self, mu, logvar):
        std = torch.exp(0.5 * logvar)
        eps = torch.randn_like(std)
        return mu + eps * std

    def forward(self, x):
        h = self.encoder(x)
        mu = self.fc_mu(h)
        logvar = self.fc_logvar(h)
        z = self.reparameterize(mu, logvar)
        return self.decoder(z), mu, logvar

def vae_loss(recon_x, x, mu, logvar):
    BCE = nn.functional.binary_cross_entropy(recon_x, x, reduction='sum')
    KLD = -0.5 * torch.sum(1 + logvar - mu.pow(2) - logvar.exp())
    return BCE + KLD

device = torch.device("cuda" if torch.cuda.is_available() else "cpu")
vae = VAE(input_dim=X_scaled.shape[1]).to(device)
optimizer = optim.Adam(vae.parameters(), lr=1e-3)
epochs = 10

# Train loop
for epoch in range(epochs):
    vae.train()
    train_loss = 0
    for batch in train_loader:
        data = batch[0].to(device)
        optimizer.zero_grad()
        recon, mu, logvar = vae(data)
        loss = vae_loss(recon, data, mu, logvar)
        loss.backward()
        train_loss += loss.item()
        optimizer.step()

    avg_loss = train_loss / len(train_loader.dataset)
    print(f"Epoch {epoch+1}, Train Loss: {train_loss:.2f}, Avg Loss per sample: {avg_loss:.4f}")

    
torch.save(vae.state_dict(), "smote_enn_vae_from_csv.pth")


Epoch 1, Train Loss: 76773331.23, Avg Loss per sample: 7.8868
Epoch 2, Train Loss: 75709175.79, Avg Loss per sample: 7.7775
Epoch 3, Train Loss: 75627797.80, Avg Loss per sample: 7.7692
Epoch 4, Train Loss: 75586339.46, Avg Loss per sample: 7.7649
Epoch 5, Train Loss: 75551604.83, Avg Loss per sample: 7.7613
Epoch 6, Train Loss: 75542041.56, Avg Loss per sample: 7.7603
Epoch 7, Train Loss: 75523536.76, Avg Loss per sample: 7.7584
Epoch 8, Train Loss: 75507089.52, Avg Loss per sample: 7.7568
Epoch 9, Train Loss: 75499620.06, Avg Loss per sample: 7.7560
Epoch 10, Train Loss: 75489985.92, Avg Loss per sample: 7.7550


In [29]:
import pandas as pd
from sklearn.preprocessing import MinMaxScaler
from torch import nn
from torch import optim

# Load your saved data
df = pd.read_csv('smote_data.csv')  
target_col = 'label' 

# Drop label column for unsupervised VAE
X = df.drop(columns=[target_col])

# Normalize features 
scaler = MinMaxScaler()
X_scaled = scaler.fit_transform(X)


import torch
from torch.utils.data import DataLoader, TensorDataset
from sklearn.model_selection import train_test_split

# Train/test split
X_train, X_val = train_test_split(X_scaled, test_size=0.2, random_state=42)
train_tensor = torch.tensor(X_train, dtype=torch.float32)
val_tensor = torch.tensor(X_val, dtype=torch.float32)

train_loader = DataLoader(TensorDataset(train_tensor), batch_size=256, shuffle=True)
val_loader = DataLoader(TensorDataset(val_tensor), batch_size=256)

# Define VAE
class VAE(nn.Module):
    def __init__(self, input_dim, latent_dim=10):
        super(VAE, self).__init__()
        self.encoder = nn.Sequential(
            nn.Linear(input_dim, 128),
            nn.ReLU(),
            nn.Linear(128, 64),
            nn.ReLU()
        )
        self.fc_mu = nn.Linear(64, latent_dim)
        self.fc_logvar = nn.Linear(64, latent_dim)

        self.decoder = nn.Sequential(
            nn.Linear(latent_dim, 64),
            nn.ReLU(),
            nn.Linear(64, 128),
            nn.ReLU(),
            nn.Linear(128, input_dim),
            nn.Sigmoid()
        )

    def reparameterize(self, mu, logvar):
        std = torch.exp(0.5 * logvar)
        eps = torch.randn_like(std)
        return mu + eps * std

    def forward(self, x):
        h = self.encoder(x)
        mu = self.fc_mu(h)
        logvar = self.fc_logvar(h)
        z = self.reparameterize(mu, logvar)
        return self.decoder(z), mu, logvar

def vae_loss(recon_x, x, mu, logvar):
    BCE = nn.functional.binary_cross_entropy(recon_x, x, reduction='sum')
    KLD = -0.5 * torch.sum(1 + logvar - mu.pow(2) - logvar.exp())
    return BCE + KLD

device = torch.device("cuda" if torch.cuda.is_available() else "cpu")
vae = VAE(input_dim=X_scaled.shape[1]).to(device)
optimizer = optim.Adam(vae.parameters(), lr=1e-3)
epochs = 10

# Train loop
for epoch in range(epochs):
    vae.train()
    train_loss = 0
    for batch in train_loader:
        data = batch[0].to(device)
        optimizer.zero_grad()
        recon, mu, logvar = vae(data)
        loss = vae_loss(recon, data, mu, logvar)
        loss.backward()
        train_loss += loss.item()
        optimizer.step()

    avg_loss = train_loss / len(train_loader.dataset)
    print(f"Epoch {epoch+1}, Train Loss: {train_loss:.2f}, Avg Loss per sample: {avg_loss:.4f}")

    
torch.save(vae.state_dict(), "smote_vae_from_csv.pth")


Epoch 1, Train Loss: 74954586.96, Avg Loss per sample: 12.9850
Epoch 2, Train Loss: 74389779.37, Avg Loss per sample: 12.8871
Epoch 3, Train Loss: 74360263.74, Avg Loss per sample: 12.8820
Epoch 4, Train Loss: 74337897.49, Avg Loss per sample: 12.8781
Epoch 5, Train Loss: 74335071.33, Avg Loss per sample: 12.8777
Epoch 6, Train Loss: 74325532.29, Avg Loss per sample: 12.8760
Epoch 7, Train Loss: 74325322.45, Avg Loss per sample: 12.8760
Epoch 8, Train Loss: 74315980.35, Avg Loss per sample: 12.8743
Epoch 9, Train Loss: 74311597.01, Avg Loss per sample: 12.8736
Epoch 10, Train Loss: 74306734.12, Avg Loss per sample: 12.8727


In [30]:
import pandas as pd
from sklearn.preprocessing import MinMaxScaler
from torch import nn
from torch import optim

# Load your saved data
df = pd.read_csv('adasyn_data.csv')  
target_col = 'label' 

# Drop label column for unsupervised VAE
X = df.drop(columns=[target_col])

# Normalize features 
scaler = MinMaxScaler()
X_scaled = scaler.fit_transform(X)


import torch
from torch.utils.data import DataLoader, TensorDataset
from sklearn.model_selection import train_test_split

# Train/test split
X_train, X_val = train_test_split(X_scaled, test_size=0.2, random_state=42)
train_tensor = torch.tensor(X_train, dtype=torch.float32)
val_tensor = torch.tensor(X_val, dtype=torch.float32)

train_loader = DataLoader(TensorDataset(train_tensor), batch_size=256, shuffle=True)
val_loader = DataLoader(TensorDataset(val_tensor), batch_size=256)

# Define VAE
class VAE(nn.Module):
    def __init__(self, input_dim, latent_dim=10):
        super(VAE, self).__init__()
        self.encoder = nn.Sequential(
            nn.Linear(input_dim, 128),
            nn.ReLU(),
            nn.Linear(128, 64),
            nn.ReLU()
        )
        self.fc_mu = nn.Linear(64, latent_dim)
        self.fc_logvar = nn.Linear(64, latent_dim)

        self.decoder = nn.Sequential(
            nn.Linear(latent_dim, 64),
            nn.ReLU(),
            nn.Linear(64, 128),
            nn.ReLU(),
            nn.Linear(128, input_dim),
            nn.Sigmoid()
        )

    def reparameterize(self, mu, logvar):
        std = torch.exp(0.5 * logvar)
        eps = torch.randn_like(std)
        return mu + eps * std

    def forward(self, x):
        h = self.encoder(x)
        mu = self.fc_mu(h)
        logvar = self.fc_logvar(h)
        z = self.reparameterize(mu, logvar)
        return self.decoder(z), mu, logvar

def vae_loss(recon_x, x, mu, logvar):
    BCE = nn.functional.binary_cross_entropy(recon_x, x, reduction='sum')
    KLD = -0.5 * torch.sum(1 + logvar - mu.pow(2) - logvar.exp())
    return BCE + KLD

device = torch.device("cuda" if torch.cuda.is_available() else "cpu")
vae = VAE(input_dim=X_scaled.shape[1]).to(device)
optimizer = optim.Adam(vae.parameters(), lr=1e-3)
epochs = 10

# Train loop
for epoch in range(epochs):
    vae.train()
    train_loss = 0
    for batch in train_loader:
        data = batch[0].to(device)
        optimizer.zero_grad()
        recon, mu, logvar = vae(data)
        loss = vae_loss(recon, data, mu, logvar)
        loss.backward()
        train_loss += loss.item()
        optimizer.step()

    avg_loss = train_loss / len(train_loader.dataset)
    print(f"Epoch {epoch+1}, Train Loss: {train_loss:.2f}, Avg Loss per sample: {avg_loss:.4f}")

    
torch.save(vae.state_dict(), "adasyn_vae_from_csv.pth")


Epoch 1, Train Loss: 75148178.96, Avg Loss per sample: 7.6582
Epoch 2, Train Loss: 74330879.91, Avg Loss per sample: 7.5749
Epoch 3, Train Loss: 74275958.13, Avg Loss per sample: 7.5693
Epoch 4, Train Loss: 74231801.90, Avg Loss per sample: 7.5648
Epoch 5, Train Loss: 74204151.89, Avg Loss per sample: 7.5620
Epoch 6, Train Loss: 74195444.64, Avg Loss per sample: 7.5611
Epoch 7, Train Loss: 74186570.59, Avg Loss per sample: 7.5602
Epoch 8, Train Loss: 74155659.03, Avg Loss per sample: 7.5571
Epoch 9, Train Loss: 74142781.41, Avg Loss per sample: 7.5558
Epoch 10, Train Loss: 74131805.25, Avg Loss per sample: 7.5546


In [31]:
import pandas as pd
from sklearn.preprocessing import MinMaxScaler
from torch import nn
from torch import optim

# Load your saved data
df = pd.read_csv('undersampled_data.csv')  
target_col = 'label' 

# Drop label column for unsupervised VAE
X = df.drop(columns=[target_col])

# Normalize features 
scaler = MinMaxScaler()
X_scaled = scaler.fit_transform(X)


import torch
from torch.utils.data import DataLoader, TensorDataset
from sklearn.model_selection import train_test_split

# Train/test split
X_train, X_val = train_test_split(X_scaled, test_size=0.2, random_state=42)
train_tensor = torch.tensor(X_train, dtype=torch.float32)
val_tensor = torch.tensor(X_val, dtype=torch.float32)

train_loader = DataLoader(TensorDataset(train_tensor), batch_size=256, shuffle=True)
val_loader = DataLoader(TensorDataset(val_tensor), batch_size=256)

# Define VAE
class VAE(nn.Module):
    def __init__(self, input_dim, latent_dim=10):
        super(VAE, self).__init__()
        self.encoder = nn.Sequential(
            nn.Linear(input_dim, 128),
            nn.ReLU(),
            nn.Linear(128, 64),
            nn.ReLU()
        )
        self.fc_mu = nn.Linear(64, latent_dim)
        self.fc_logvar = nn.Linear(64, latent_dim)

        self.decoder = nn.Sequential(
            nn.Linear(latent_dim, 64),
            nn.ReLU(),
            nn.Linear(64, 128),
            nn.ReLU(),
            nn.Linear(128, input_dim),
            nn.Sigmoid()
        )

    def reparameterize(self, mu, logvar):
        std = torch.exp(0.5 * logvar)
        eps = torch.randn_like(std)
        return mu + eps * std

    def forward(self, x):
        h = self.encoder(x)
        mu = self.fc_mu(h)
        logvar = self.fc_logvar(h)
        z = self.reparameterize(mu, logvar)
        return self.decoder(z), mu, logvar

def vae_loss(recon_x, x, mu, logvar):
    BCE = nn.functional.binary_cross_entropy(recon_x, x, reduction='sum')
    KLD = -0.5 * torch.sum(1 + logvar - mu.pow(2) - logvar.exp())
    return BCE + KLD

device = torch.device("cuda" if torch.cuda.is_available() else "cpu")
vae = VAE(input_dim=X_scaled.shape[1]).to(device)
optimizer = optim.Adam(vae.parameters(), lr=1e-3)
epochs = 10

# Train loop
for epoch in range(epochs):
    vae.train()
    train_loss = 0
    for batch in train_loader:
        data = batch[0].to(device)
        optimizer.zero_grad()
        recon, mu, logvar = vae(data)
        loss = vae_loss(recon, data, mu, logvar)
        loss.backward()
        train_loss += loss.item()
        optimizer.step()

    avg_loss = train_loss / len(train_loader.dataset)
    print(f"Epoch {epoch+1}, Train Loss: {train_loss:.2f}, Avg Loss per sample: {avg_loss:.4f}")

    
torch.save(vae.state_dict(), "undersampled_vae_from_csv.pth")


Epoch 1, Train Loss: 500103.05, Avg Loss per sample: 12.7874
Epoch 2, Train Loss: 392542.67, Avg Loss per sample: 10.0371
Epoch 3, Train Loss: 379823.10, Avg Loss per sample: 9.7119
Epoch 4, Train Loss: 362054.72, Avg Loss per sample: 9.2576
Epoch 5, Train Loss: 353372.99, Avg Loss per sample: 9.0356
Epoch 6, Train Loss: 349279.16, Avg Loss per sample: 8.9309
Epoch 7, Train Loss: 346387.58, Avg Loss per sample: 8.8570
Epoch 8, Train Loss: 344754.23, Avg Loss per sample: 8.8152
Epoch 9, Train Loss: 343301.06, Avg Loss per sample: 8.7781
Epoch 10, Train Loss: 341153.73, Avg Loss per sample: 8.7232


In [33]:
import pandas as pd
from sklearn.preprocessing import MinMaxScaler
from torch import nn
from torch import optim

# Load your saved data
df = pd.read_csv('stratified_sample.csv')  
target_col = 'label' 

# Drop label column for unsupervised VAE
X = df.drop(columns=[target_col])

# Normalize features 
scaler = MinMaxScaler()
X_scaled = scaler.fit_transform(X)


import torch
from torch.utils.data import DataLoader, TensorDataset
from sklearn.model_selection import train_test_split

# Train/test split
X_train, X_val = train_test_split(X_scaled, test_size=0.2, random_state=42)
train_tensor = torch.tensor(X_train, dtype=torch.float32)
val_tensor = torch.tensor(X_val, dtype=torch.float32)

train_loader = DataLoader(TensorDataset(train_tensor), batch_size=256, shuffle=True)
val_loader = DataLoader(TensorDataset(val_tensor), batch_size=256)

# Define VAE
class VAE(nn.Module):
    def __init__(self, input_dim, latent_dim=10):
        super(VAE, self).__init__()
        self.encoder = nn.Sequential(
            nn.Linear(input_dim, 128),
            nn.ReLU(),
            nn.Linear(128, 64),
            nn.ReLU()
        )
        self.fc_mu = nn.Linear(64, latent_dim)
        self.fc_logvar = nn.Linear(64, latent_dim)

        self.decoder = nn.Sequential(
            nn.Linear(latent_dim, 64),
            nn.ReLU(),
            nn.Linear(64, 128),
            nn.ReLU(),
            nn.Linear(128, input_dim),
            nn.Sigmoid()
        )

    def reparameterize(self, mu, logvar):
        std = torch.exp(0.5 * logvar)
        eps = torch.randn_like(std)
        return mu + eps * std

    def forward(self, x):
        h = self.encoder(x)
        mu = self.fc_mu(h)
        logvar = self.fc_logvar(h)
        z = self.reparameterize(mu, logvar)
        return self.decoder(z), mu, logvar

def vae_loss(recon_x, x, mu, logvar):
    BCE = nn.functional.binary_cross_entropy(recon_x, x, reduction='sum')
    KLD = -0.5 * torch.sum(1 + logvar - mu.pow(2) - logvar.exp())
    return BCE + KLD

device = torch.device("cuda" if torch.cuda.is_available() else "cpu")
vae = VAE(input_dim=X_scaled.shape[1]).to(device)
optimizer = optim.Adam(vae.parameters(), lr=1e-3)
epochs = 10

# Train loop
for epoch in range(epochs):
    vae.train()
    train_loss = 0
    for batch in train_loader:
        data = batch[0].to(device)
        optimizer.zero_grad()
        recon, mu, logvar = vae(data)
        loss = vae_loss(recon, data, mu, logvar)
        loss.backward()
        train_loss += loss.item()
        optimizer.step()

    avg_loss = train_loss / len(train_loader.dataset)
    print(f"Epoch {epoch+1}, Train Loss: {train_loss:.2f}, Avg Loss per sample: {avg_loss:.4f}")

    
torch.save(vae.state_dict(), "stratified_vae_from_csv.pth")


Epoch 1, Train Loss: 2862624.99, Avg Loss per sample: 7.6645
Epoch 2, Train Loss: 2501205.84, Avg Loss per sample: 6.6968
Epoch 3, Train Loss: 2471769.02, Avg Loss per sample: 6.6180
Epoch 4, Train Loss: 2458277.50, Avg Loss per sample: 6.5819
Epoch 5, Train Loss: 2447872.99, Avg Loss per sample: 6.5540
Epoch 6, Train Loss: 2440110.56, Avg Loss per sample: 6.5333
Epoch 7, Train Loss: 2436058.80, Avg Loss per sample: 6.5224
Epoch 8, Train Loss: 2432953.22, Avg Loss per sample: 6.5141
Epoch 9, Train Loss: 2430430.56, Avg Loss per sample: 6.5073
Epoch 10, Train Loss: 2427941.69, Avg Loss per sample: 6.5007


In [32]:
import pandas as pd
from sklearn.preprocessing import MinMaxScaler
from torch import nn
from torch import optim

# Load your saved data
df = pd.read_csv('data_diffusion_data.csv')  
target_col = 'label' 

# Drop label column for unsupervised VAE
X = df.drop(columns=[target_col])

# Normalize features 
scaler = MinMaxScaler()
X_scaled = scaler.fit_transform(X)


import torch
from torch.utils.data import DataLoader, TensorDataset
from sklearn.model_selection import train_test_split

# Train/test split
X_train, X_val = train_test_split(X_scaled, test_size=0.2, random_state=42)
train_tensor = torch.tensor(X_train, dtype=torch.float32)
val_tensor = torch.tensor(X_val, dtype=torch.float32)

train_loader = DataLoader(TensorDataset(train_tensor), batch_size=256, shuffle=True)
val_loader = DataLoader(TensorDataset(val_tensor), batch_size=256)

# Define VAE
class VAE(nn.Module):
    def __init__(self, input_dim, latent_dim=10):
        super(VAE, self).__init__()
        self.encoder = nn.Sequential(
            nn.Linear(input_dim, 128),
            nn.ReLU(),
            nn.Linear(128, 64),
            nn.ReLU()
        )
        self.fc_mu = nn.Linear(64, latent_dim)
        self.fc_logvar = nn.Linear(64, latent_dim)

        self.decoder = nn.Sequential(
            nn.Linear(latent_dim, 64),
            nn.ReLU(),
            nn.Linear(64, 128),
            nn.ReLU(),
            nn.Linear(128, input_dim),
            nn.Sigmoid()
        )

    def reparameterize(self, mu, logvar):
        std = torch.exp(0.5 * logvar)
        eps = torch.randn_like(std)
        return mu + eps * std

    def forward(self, x):
        h = self.encoder(x)
        mu = self.fc_mu(h)
        logvar = self.fc_logvar(h)
        z = self.reparameterize(mu, logvar)
        return self.decoder(z), mu, logvar

def vae_loss(recon_x, x, mu, logvar):
    BCE = nn.functional.binary_cross_entropy(recon_x, x, reduction='sum')
    KLD = -0.5 * torch.sum(1 + logvar - mu.pow(2) - logvar.exp())
    return BCE + KLD

device = torch.device("cuda" if torch.cuda.is_available() else "cpu")
vae = VAE(input_dim=X_scaled.shape[1]).to(device)
optimizer = optim.Adam(vae.parameters(), lr=1e-3)
epochs = 10

# Train loop
for epoch in range(epochs):
    vae.train()
    train_loss = 0
    for batch in train_loader:
        data = batch[0].to(device)
        optimizer.zero_grad()
        recon, mu, logvar = vae(data)
        loss = vae_loss(recon, data, mu, logvar)
        loss.backward()
        train_loss += loss.item()
        optimizer.step()

    avg_loss = train_loss / len(train_loader.dataset)
    print(f"Epoch {epoch+1}, Train Loss: {train_loss:.2f}, Avg Loss per sample: {avg_loss:.4f}")

    
torch.save(vae.state_dict(), "data_diffusion_vae_from_csv.pth")


Epoch 1, Train Loss: 135946076.37, Avg Loss per sample: 13.8487
Epoch 2, Train Loss: 135520314.70, Avg Loss per sample: 13.8053
Epoch 3, Train Loss: 135503332.21, Avg Loss per sample: 13.8036
Epoch 4, Train Loss: 135495119.22, Avg Loss per sample: 13.8028
Epoch 5, Train Loss: 135486037.11, Avg Loss per sample: 13.8019
Epoch 6, Train Loss: 135483268.15, Avg Loss per sample: 13.8016
Epoch 7, Train Loss: 135477714.80, Avg Loss per sample: 13.8010
Epoch 8, Train Loss: 135470131.40, Avg Loss per sample: 13.8002
Epoch 9, Train Loss: 135471306.98, Avg Loss per sample: 13.8004
Epoch 10, Train Loss: 135471585.90, Avg Loss per sample: 13.8004
