In [1]:
import os
import pandas as pd
from google.colab import drive

# Mount Google Drive
drive.mount('/content/drive')


Mounted at /content/drive


In [2]:
def load_from_gdrive(data_dir):
  # Initialize an empty list to store the DataFrames
  data_frames = []

  # Loop through each .csv file in the directory
  for dir in data_dir:
    for file_name in os.listdir(dir):
        if file_name.endswith('.csv'):
            file_path = os.path.join(dir, file_name)

            # Load the .csv file into a DataFrame
            df = pd.read_csv(file_path)

            # Remove the first column
            df = df.iloc[:, 4:]

            df  = df.div(360)

            df  = df.clip(upper =1,lower = -1)


            # Append the DataFrame to the list
            data_frames.append(df)

  # Concatenate the DataFrames into a single DataFrame
  combined_df = pd.concat(data_frames, axis=0, ignore_index=True)
  return combined_df

In [3]:
data_dir = ['/content/drive/MyDrive/28 08 mod/Normal']

combined_df =  load_from_gdrive(data_dir)


In [6]:
import torch
import torch.nn as nn
import torch.optim as optim
from torch.utils.data import DataLoader, TensorDataset
from sklearn.preprocessing import MinMaxScaler
from torch.optim.lr_scheduler import StepLR
from sklearn.model_selection import train_test_split

class VAEAnomalyTabular(nn.Module):
    def __init__(self, input_size, latent_size, dropout_rate=0.4, l1_weight=0.000):
        super(VAEAnomalyTabular, self).__init__()
        self.encoder = self.make_encoder(input_size, latent_size, dropout_rate)
        self.decoder = self.make_decoder(latent_size, input_size, dropout_rate)
        self.l1_weight = l1_weight

    def make_encoder(self, input_size, latent_size, dropout_rate):
        return nn.Sequential(
            nn.Linear(input_size, 512),
            nn.LeakyReLU(),  # LeakyReLU activation
            nn.Dropout(dropout_rate),  # Dropout layer
            nn.Linear(512, 256),
            nn.LeakyReLU(),  # LeakyReLU activation
            nn.Dropout(dropout_rate),  # Dropout layer
            nn.Linear(256, latent_size * 2)
        )

    def make_decoder(self, latent_size, output_size, dropout_rate):
        return nn.Sequential(
            nn.Linear(latent_size, 256),
            nn.LeakyReLU(),  # LeakyReLU activation
            nn.Dropout(dropout_rate),  # Dropout layer
            nn.Linear(256, 512),
            nn.LeakyReLU(),  # LeakyReLU activation
            nn.Dropout(dropout_rate),  # Dropout layer
            nn.Linear(512, output_size)
        )

    def reparameterize(self, mu, logvar):
        std = torch.exp(0.5 * logvar)
        eps = torch.randn_like(std)
        return mu + eps * std

    def forward(self, x):
        latent_params = self.encoder(x)
        latent_params = latent_params.view(-1, latent_size * 2)
        mu = latent_params[:, :latent_size]
        logvar = latent_params[:, latent_size:]
        z = self.reparameterize(mu, logvar)
        reconstruction = self.decoder(z)
        return reconstruction, mu, logvar

    def l1_regularization(self):
        l1_reg = torch.tensor(0.0)
        for param in self.parameters():
            l1_reg += torch.norm(param, 1)
        return self.l1_weight * l1_reg

def train_vae(model, train_loader, optimizer, criterion, device):
    model.train()
    train_loss = 0
    for batch in train_loader:
        batch = batch.to(device)
        optimizer.zero_grad()
        reconstruction, mu, logvar = model(batch)
        loss = criterion(reconstruction, batch, mu, logvar)
        loss.backward()
        optimizer.step()
        train_loss += loss.item()
    return train_loss / len(train_loader)

def test_vae(model, test_loader, criterion, device):
    model.eval()
    test_loss = 0
    with torch.no_grad():
        for batch in test_loader:
            batch = batch.to(device)
            reconstruction, mu, logvar = model(batch)
            loss = criterion(reconstruction, batch, mu, logvar)
            test_loss += loss.item()
    return test_loss / len(test_loader)


tensor_data = torch.tensor(combined_df.values, dtype=torch.float32)

# Split the data into training and testing sets
train_data, test_data = train_test_split(tensor_data, test_size=0.2)

# Define the VAE model
input_size = 59  # Number of input variables
latent_size = 64
batch_size = 32

# Create data loaders
train_loader = DataLoader(tensor_data, batch_size=batch_size, shuffle=True)
#test_loader = DataLoader(test_data, batch_size=batch_size, shuffle=False)

vae = VAEAnomalyTabular(input_size, latent_size)

# Define the loss function
def vae_loss(reconstruction, x, mu, logvar):
    recon_loss = nn.functional.mse_loss(reconstruction, x, reduction='sum')
    kl_divergence = -0.5 * torch.sum(1 + logvar - mu.pow(2) - logvar.exp())
    return recon_loss + kl_divergence

optimizer = optim.SGD(vae.parameters(), lr=0.0001)#, weight_decay=0.001

# Train VAE
device = torch.device('cuda' if torch.cuda.is_available() else 'cpu')
epochs = 50
for epoch in range(epochs):
    train_loss = train_vae(vae, train_loader, optimizer, vae_loss, device)
    #test_loss = test_vae(vae, test_loader, vae_loss, device)
    print(f'Epoch {epoch+1}/{epochs}: Train Loss: {train_loss:.4f}')#, Test Loss: {test_loss:.4f}
    #scheduler.step()


Epoch 1/50: Train Loss: 22.0960


KeyboardInterrupt: ignored

In [8]:
import itertools
import torch
import torch.nn as nn
import torch.optim as optim
from torch.utils.data import DataLoader, TensorDataset
from sklearn.preprocessing import MinMaxScaler
from torch.optim.lr_scheduler import StepLR
from sklearn.model_selection import train_test_split
from google.colab import drive

# Mount Google Drive
#drive.mount('/content/gdrive')

# Define where to store the checkpoints
base_dir = "/content/drive/MyDrive/MGR_VAE/"

class VAEAnomalyTabular(nn.Module):
    def __init__(self, input_size, latent_size, encoder_neurons, decoder_neurons, dropout_rate):
        super(VAEAnomalyTabular, self).__init__()
        self.encoder = self.make_encoder(input_size, latent_size, encoder_neurons, dropout_rate)
        self.decoder = self.make_decoder(latent_size, input_size, decoder_neurons, dropout_rate)

    def make_encoder(self, input_size, latent_size, neurons, dropout_rate):
        layers = []
        prev_neurons = input_size
        for neuron in neurons:
            layers.extend([
                nn.Linear(prev_neurons, neuron),
                nn.LeakyReLU(),
                nn.Dropout(dropout_rate)
            ])
            prev_neurons = neuron
        layers.append(nn.Linear(prev_neurons, latent_size * 2))
        return nn.Sequential(*layers)

    def make_decoder(self, latent_size, output_size, neurons, dropout_rate):
        layers = []
        prev_neurons = latent_size
        for neuron in neurons:
            layers.extend([
                nn.Linear(prev_neurons, neuron),
                nn.LeakyReLU(),
                nn.Dropout(dropout_rate)
            ])
            prev_neurons = neuron
        layers.append(nn.Linear(prev_neurons, output_size))
        return nn.Sequential(*layers)

    def reparameterize(self, mu, logvar):
        std = torch.exp(0.5 * logvar)
        eps = torch.randn_like(std)
        return mu + eps * std

    def forward(self, x):
        latent_params = self.encoder(x)
        latent_params = latent_params.view(-1, latent_size * 2)
        mu = latent_params[:, :latent_size]
        logvar = latent_params[:, latent_size:]
        z = self.reparameterize(mu, logvar)
        reconstruction = self.decoder(z)
        return reconstruction, mu, logvar

    def l1_regularization(self):
        l1_reg = torch.tensor(0.0)
        for param in self.parameters():
            l1_reg += torch.norm(param, 1)
        return self.l1_weight * l1_reg

def train_vae(model, train_loader, optimizer, criterion, device):
    model.train()
    train_loss = 0
    for batch in train_loader:
        batch = batch.to(device)
        optimizer.zero_grad()
        reconstruction, mu, logvar = model(batch)
        loss = criterion(reconstruction, batch, mu, logvar)
        loss.backward()
        optimizer.step()
        train_loss += loss.item()
    return train_loss / len(train_loader)

def test_vae(model, test_loader, criterion, device):
    model.eval()
    test_loss = 0
    with torch.no_grad():
        for batch in test_loader:
            batch = batch.to(device)
            reconstruction, mu, logvar = model(batch)
            loss = criterion(reconstruction, batch, mu, logvar)
            test_loss += loss.item()
    return test_loss / len(test_loader)

def vae_loss(reconstruction, x, mu, logvar):
    recon_loss = nn.functional.mse_loss(reconstruction, x, reduction='sum')
    kl_divergence = -0.5 * torch.sum(1 + logvar - mu.pow(2) - logvar.exp())
    return recon_loss + kl_divergence

# Parameter grid
param_grid = {
    'encoder_neurons': [[1024, 512, 256],[256 ,128]],
    'decoder_neurons': [ [256, 512, 1024],[128,256]],
    'latent_space': [32, 64],
    'batch_size': [32, 64],
    'dropout_rate': [0.0, 0.2, 0.4],
    'optimizer': [(optim.Adam, 0.001), (optim.Adam, 0.0001), (optim.SGD, 0.001), (optim.SGD, 0.0001)]
}

# Generate all combinations of hyperparameters
grid = list(itertools.product(*(param_grid[HyperParam] for HyperParam in param_grid)))

input_size = 59
epochs = 100



for ix, params in enumerate(grid):

    logs = []

    # Early stopping parameters
    patience = 10
    early_stopping_counter = 0
    best_val_loss = float('inf')
    best_epoch = 0

    # Unpack parameters
    encoder_neurons, decoder_neurons, latent_size, batch_size, dropout_rate, (Optimizer, lr) = params

    # Check if encoder and decoder neurons match
    if encoder_neurons != decoder_neurons[::-1]:
        print(f'Skipping config {ix + 1} due to mismatch between encoder and decoder neurons')
        continue

    print(f'testing -> neurons_{encoder_neurons}_latent_{latent_size}_batch_{batch_size}_opt_{Optimizer.__name__}_lr_{lr}_dropout_{dropout_rate}')

    vae = VAEAnomalyTabular(input_size, latent_size, encoder_neurons, decoder_neurons, dropout_rate)

    optimizer = Optimizer(vae.parameters(), lr=lr)
    scheduler = StepLR(optimizer, step_size=10, gamma=0.1)

    # Create data loaders with the current batch_size
    train_loader = DataLoader(train_data, batch_size=batch_size, shuffle=True)
    test_loader = DataLoader(test_data, batch_size=batch_size, shuffle=False)

    # Train VAE
    device = torch.device('cuda' if torch.cuda.is_available() else 'cpu')
    vae.to(device)

    for epoch in range(epochs):
        train_loss = train_vae(vae, train_loader, optimizer, vae_loss, device)
        test_loss = test_vae(vae, test_loader, vae_loss, device)
        print(f'Config {ix + 1}/{len(grid)}, Epoch {epoch + 1}/{epochs}: Train Loss: {train_loss:.4f}, Test Loss: {test_loss:.4f}')

        # Early stopping check
        if test_loss < best_val_loss:
            best_val_loss = test_loss
            best_epoch = epoch
            early_stopping_counter = 0
        else:
            early_stopping_counter += 1
            if early_stopping_counter >= patience:
                print("Early stopping triggered.")
                break

        # Store results
        logs.append({
            'config_ix': ix,
            'epoch': epoch,
            'train_loss': train_loss,
            'test_loss': test_loss,
            'params': params
        })

    # Save checkpoint after each configuration
    checkpoint_filename = f'{test_loss:.4f}_neurons_{encoder_neurons}_latent_{latent_size}_batch_{batch_size}_opt_{Optimizer.__name__}_lr_{lr}_dropout_{dropout_rate}__epoch_{best_epoch}.pt'
    torch.save({
        'epoch': best_epoch,
        'model_state_dict': vae.state_dict(),
        'optimizer_state_dict': optimizer.state_dict(),
        'loss': test_loss,
        'hyperparameters': params,
    }, base_dir + checkpoint_filename)

        # Save logs to a file
    logs_df = pd.DataFrame(logs)
    logs_filename = f'{base_dir}logs_{vae.__class__.__name__}_{encoder_neurons}_{latent_size}_{batch_size}_{Optimizer.__name__}_{lr}.csv'
    logs_df.to_csv(logs_filename, index=False)

    print(f'Saved checkpoint: {checkpoint_filename}')

testing -> neurons_[1024, 512, 256]_latent_32_batch_32_opt_Adam_lr_0.001_dropout_0.0


KeyboardInterrupt: ignored