In [1]:
import os
import pandas as pd
from google.colab import drive

# Mount Google Drive
drive.mount('/content/drive')


Mounted at /content/drive


In [2]:
def load_from_gdrive(data_dir):
  # Initialize an empty list to store the DataFrames
  data_frames = []

  # Loop through each .csv file in the directory
  for dir in data_dir:
    for file_name in os.listdir(dir):
        if file_name.endswith('.csv'):
            file_path = os.path.join(dir, file_name)

            # Load the .csv file into a DataFrame
            df = pd.read_csv(file_path)

            # Remove the first column
            df = df.iloc[:, 4:]

            df  = df.div(360)

            df  = df.clip(upper =1,lower = -1)


            # Append the DataFrame to the list
            data_frames.append(df)

  # Concatenate the DataFrames into a single DataFrame
  combined_df = pd.concat(data_frames, axis=0, ignore_index=True)
  return combined_df

In [3]:
data_dir = ['/content/drive/MyDrive/28 08 mod/Normal']

combined_df =  load_from_gdrive(data_dir)


In [8]:
import torch
import torch.nn as nn
import torch.optim as optim
from torch.utils.data import DataLoader, TensorDataset
from sklearn.preprocessing import MinMaxScaler
from torch.optim.lr_scheduler import StepLR
from sklearn.model_selection import train_test_split

class VAEAnomalyConv(nn.Module):
    def __init__(self, input_channels, latent_size, sequence_length=59,  dropout_rate=0.0, l1_weight=0.000):
        super(VAEAnomalyConv, self).__init__()
        self.latent_size = latent_size
        self.l1_weight = l1_weight

        # Encoder layers
        self.encoder = nn.Sequential(
            nn.Conv1d(in_channels=1, out_channels=32, kernel_size=3, stride=1, padding=1),
            nn.LeakyReLU(negative_slope=0.01),
            nn.Dropout(p=dropout_rate),
            nn.Flatten(start_dim=1, end_dim=-1),
            nn.Linear(in_features=1888, out_features=latent_size)
        )

        # Decoder layers
        self.decoder = nn.Sequential(
            nn.Linear(in_features=latent_size, out_features=32*sequence_length),
            nn.LeakyReLU(negative_slope=0.01),
            nn.Dropout(p=dropout_rate),
            nn.Unflatten(1, (32, sequence_length)),  # Adjusted this line
            nn.ConvTranspose1d(in_channels=32, out_channels=1, kernel_size=3, stride=1, padding=1),
        )




    def forward(self, x):
      mu, logvar = self.encode(x)
      #print("Shape after Flatten layer in encoder:", mu.shape)
      z = self.reparameterize(mu, logvar)
      reconstruction = self.decode(z)
      return reconstruction, mu, logvar


    def encode(self, x):
      for i, layer in enumerate(self.encoder):
          x = layer(x)
          #print(f"Shape after layer {i} in encoder:", x.shape)
      mu = x
      logvar = x
      return mu, logvar



    def decode(self, z):
        decoded = self.decoder(z)
        return decoded

    def reparameterize(self, mu, logvar):
        std = torch.exp(0.5 * logvar)
        eps = torch.randn_like(std)
        z = mu + eps * std
        return z

    def l1_regularization(self):
        l1_reg = torch.tensor(0.0)
        for param in self.parameters():
            l1_reg += torch.norm(param, 1)
        return self.l1_weight * l1_reg


def train_vae(model, train_loader, optimizer, criterion, device):
    model.train()
    train_loss = 0
    for batch in train_loader:
        batch = batch.to(device)
        batch = batch.view(batch.shape[0], 1, batch.shape[1])
        optimizer.zero_grad()
        reconstruction, mu, logvar = model(batch)
        loss = criterion(reconstruction, batch, mu, logvar)
        loss.backward()
        optimizer.step()
        train_loss += loss.item()
    return train_loss / len(train_loader)

def test_vae(model, test_loader, criterion, device):
    model.eval()
    test_loss = 0
    with torch.no_grad():
        for batch in test_loader:
            batch = batch.to(device)
            batch = batch.view(batch.shape[0], 1, batch.shape[1])
            reconstruction, mu, logvar = model(batch)
            loss = criterion(reconstruction, batch, mu, logvar)
            test_loss += loss.item()
    return test_loss / len(test_loader)


# Split the data into training and testing sets
#train_data, test_data = train_test_split(tensor_data, test_size=0.4)


# Define the VAE model
input_size = 59  # Number of input features
latent_size = 64  # Size of the latent space
batch_size = 32

dropout_rate= 0.0

tens_train_data = torch.tensor(combined_df.values, dtype=torch.float32)
# Create data loaders
train_loader = DataLoader(tens_train_data.unsqueeze(2), batch_size=batch_size, shuffle=True)
#test_loader = DataLoader(test_data.unsqueeze(2), batch_size=batch_size, shuffle=False)


device = torch.device('cuda' if torch.cuda.is_available() else 'cpu')
vae = VAEAnomalyConv(input_size, latent_size).to(device)
print(vae)

# Define the loss function
def vae_loss(reconstruction, x, mu, logvar):
    recon_loss = nn.functional.mse_loss(reconstruction, x, reduction='sum')
    kl_divergence = -0.5 * torch.sum(1 + logvar - mu.pow(2) - logvar.exp())
    return recon_loss + kl_divergence

# Optimizer
optimizer = optim.Adam(vae.parameters(), lr=0.001)  # Adjust learning rate

epochs = 50
for epoch in range(epochs):
    train_loss = train_vae(vae, train_loader, optimizer, vae_loss, device)
    #test_loss = test_vae(vae, test_loader, vae_loss, device)
    print(f'Epoch {epoch+1}/{epochs}: Train Loss: {train_loss:.4f}')


VAEAnomalyConv(
  (encoder): Sequential(
    (0): Conv1d(1, 32, kernel_size=(3,), stride=(1,), padding=(1,))
    (1): LeakyReLU(negative_slope=0.01)
    (2): Dropout(p=0.0, inplace=False)
    (3): Flatten(start_dim=1, end_dim=-1)
    (4): Linear(in_features=1888, out_features=64, bias=True)
  )
  (decoder): Sequential(
    (0): Linear(in_features=64, out_features=1888, bias=True)
    (1): LeakyReLU(negative_slope=0.01)
    (2): Dropout(p=0.0, inplace=False)
    (3): Unflatten(dim=1, unflattened_size=(32, 59))
    (4): ConvTranspose1d(32, 1, kernel_size=(3,), stride=(1,), padding=(1,))
  )
)


KeyboardInterrupt: ignored

In [11]:
import pandas as pd
from sklearn.model_selection import ParameterGrid
from torch.optim.lr_scheduler import ReduceLROnPlateau

import torch
import torch.nn as nn
import torch.optim as optim
from torch.utils.data import DataLoader, TensorDataset
from sklearn.preprocessing import MinMaxScaler
from torch.optim.lr_scheduler import StepLR
from sklearn.model_selection import train_test_split

class VAEAnomalyConv(nn.Module):
    def __init__(self, input_channels, latent_size, num_filters, sequence_length=59,  dropout_rate=0.0, l1_weight=0.000):
        super(VAEAnomalyConv, self).__init__()
        self.latent_size = latent_size
        self.l1_weight = l1_weight

        # Encoder layers
        self.encoder = nn.Sequential(
            nn.Conv1d(in_channels=1, out_channels=num_filters, kernel_size=3, stride=1, padding=1),
            nn.LeakyReLU(negative_slope=0.01),
            nn.Dropout(p=dropout_rate),
            nn.Flatten(start_dim=1, end_dim=-1),
            nn.Linear(in_features=num_filters*sequence_length, out_features=latent_size)
        )

        # Decoder layers
        self.decoder = nn.Sequential(
            nn.Linear(in_features=latent_size, out_features=num_filters*sequence_length),
            nn.LeakyReLU(negative_slope=0.01),
            nn.Dropout(p=dropout_rate),
            nn.Unflatten(1, (num_filters, sequence_length)),
            nn.ConvTranspose1d(in_channels=num_filters, out_channels=1, kernel_size=3, stride=1, padding=1),
        )



    def forward(self, x):
      mu, logvar = self.encode(x)
      #print("Shape after Flatten layer in encoder:", mu.shape)
      z = self.reparameterize(mu, logvar)
      reconstruction = self.decode(z)
      return reconstruction, mu, logvar


    def encode(self, x):
      for i, layer in enumerate(self.encoder):
          x = layer(x)
          #print(f"Shape after layer {i} in encoder:", x.shape)
      mu = x
      logvar = x
      return mu, logvar



    def decode(self, z):
        decoded = self.decoder(z)
        return decoded

    def reparameterize(self, mu, logvar):
        std = torch.exp(0.5 * logvar)
        eps = torch.randn_like(std)
        z = mu + eps * std
        return z

    def l1_regularization(self):
        l1_reg = torch.tensor(0.0)
        for param in self.parameters():
            l1_reg += torch.norm(param, 1)
        return self.l1_weight * l1_reg


def train_vae(model, train_loader, optimizer, criterion, device):
    model.train()
    train_loss = 0
    for batch in train_loader:
        batch = batch.to(device)
        batch = batch.view(batch.shape[0], 1, batch.shape[1])
        optimizer.zero_grad()
        reconstruction, mu, logvar = model(batch)
        loss = criterion(reconstruction, batch, mu, logvar)
        loss.backward()
        optimizer.step()
        train_loss += loss.item()
    return train_loss / len(train_loader)

def test_vae(model, test_loader, criterion, device):
    model.eval()
    test_loss = 0
    with torch.no_grad():
        for batch in test_loader:
            batch = batch.to(device)
            batch = batch.view(batch.shape[0], 1, batch.shape[1])
            reconstruction, mu, logvar = model(batch)
            loss = criterion(reconstruction, batch, mu, logvar)
            test_loss += loss.item()
    return test_loss / len(test_loader)

# Define the loss function
def vae_loss(reconstruction, x, mu, logvar):
    recon_loss = nn.functional.mse_loss(reconstruction, x, reduction='sum')
    kl_divergence = -0.5 * torch.sum(1 + logvar - mu.pow(2) - logvar.exp())
    return recon_loss + kl_divergence

input_size = 59
device = torch.device('cuda' if torch.cuda.is_available() else 'cpu')
# Parameter grid
param_grid = {
    'lr': [0.001, 0.01],
    'dropout_rate': [0.0, 0.2],
    'latent_size': [128, 64],
    'num_filters': [32, 64],
}
#change kernel size to 3/7 and padding to 1/3 by hand, leave stride to 1
log_df = pd.DataFrame(columns=['epoch', 'train_loss', 'test_loss', 'parameters'])

# Define the directory to save the model and log
save_dir = '/content/drive/MyDrive/MGR_VAE_CNN/'

from sklearn.model_selection import train_test_split
import torch


tensor_data = torch.tensor(combined_df.values, dtype=torch.float32)
train_data, test_data = train_test_split(tensor_data, test_size=0.4,random_state = 42)
validation_data, test_data = train_test_split(test_data, test_size=0.5,random_state = 42)

# Grid search
for params in ParameterGrid(param_grid):
    print(f'Current parameters: {params}')
    # Update params
    vae = VAEAnomalyConv(input_size, params['latent_size'], params['num_filters'], dropout_rate=params['dropout_rate']).to(device)
    train_loader = DataLoader(train_data.unsqueeze(2), batch_size=32, shuffle=True)
    test_loader = DataLoader(test_data.unsqueeze(2), batch_size=32, shuffle=False)
    optimizer = optim.Adam(vae.parameters(), lr=params['lr'])
    scheduler = ReduceLROnPlateau(optimizer, 'min', patience=5)

    # Train
    for epoch in range(50):
        train_loss = train_vae(vae, train_loader, optimizer, vae_loss, device)
        test_loss = test_vae(vae, test_loader, vae_loss, device)
        scheduler.step(test_loss)

        print(f'Epoch {epoch+1}/{50}: Train Loss: {train_loss:.4f}, Test Loss: {test_loss:.4f}')

        # Early stopping
        if epoch > 5 and log_df['test_loss'].iloc[-5:].mean() < test_loss:
            print("Break")
            break

    # Save model and training log
    model_path = f'{save_dir}model_{test_loss:.4f}_{params}.pt'
    log_path = f'{save_dir}log_{test_loss:.4f}_{params}.csv'
    torch.save(vae.state_dict(), model_path)
    log_df.to_csv(log_path, index=False)

    # Print the names of the saved files
    print(f'Saved model to {model_path}')
    print(f'Saved log to {log_path}')


Current parameters: {'dropout_rate': 0.0, 'latent_size': 128, 'lr': 0.001, 'num_filters': 32}


KeyboardInterrupt: ignored