In [29]:
%reset -f
import torch
import numpy as np
import pandas as pd
import torch.nn as nn
import torch.optim as optim
from sklearn.metrics import roc_curve, auc
from torch.utils.data import random_split, DataLoader
from sklearn.model_selection import train_test_split

from customDatasets.audioDataset import AudioDataset
from torchviz import make_dot
import hiddenlayer as hl

In [30]:
# free gpu
if torch.cuda.is_available():
    torch.cuda.empty_cache()

In [31]:
class Autoencoder(nn.Module):
    def __init__(self, input_size, encoding_dim):
        super(Autoencoder, self).__init__()
        self.encoder = nn.Sequential(
            nn.Linear(input_size, 1024),
            nn.BatchNorm1d(1024),
            nn.LeakyReLU(0.2),
            nn.Linear(1024, 512),
            nn.BatchNorm1d(512),
            nn.LeakyReLU(0.2),
            nn.Linear(512,512),
            nn.BatchNorm1d(512),
            nn.LeakyReLU(0.2),
            nn.Linear(512, encoding_dim),
            nn.BatchNorm1d(encoding_dim),
            nn.LeakyReLU(0.2)
        )

        self.decoder = nn.Sequential(
            nn.Linear(encoding_dim, 512),
            nn.BatchNorm1d(512),
            nn.LeakyReLU(0.2),
            nn.Linear(512, 512),
            nn.BatchNorm1d(512),
            nn.LeakyReLU(0.2),
            nn.Linear(512, 1024),
            nn.BatchNorm1d(1024),
            nn.LeakyReLU(0.2),
            nn.Linear(1024, input_size),
        )

    def forward(self, x):
        x = x.view(x.size(0), -1)
        encoded = self.encoder(x)
        decoded = self.decoder(encoded)
        return decoded

In [32]:
# test for deciding the mels parameters
from utils.audioUtils import AudioUtil
from torchaudio.transforms import MelSpectrogram, AmplitudeToDB
audio_file = "./data/train/normal_id_00_00000000.wav"

aud = AudioUtil.open(audio_file)
sig, sr = aud
print(sig.shape)
mel = MelSpectrogram(sr, n_fft=1000, hop_length=501, n_mels=128)
spec = mel(sig)
ampl = AmplitudeToDB(top_db=80)
spec = ampl(spec)



print(spec.shape)

In [33]:
def train_model(model, train_dl, val_dl, test_dl, criterion, optimizer, device, epochs=5,step_size=5,wandb=None):
    lr_scheduler=torch.optim.lr_scheduler.StepLR(optimizer, step_size=step_size, gamma=0.1)
    min_val_loss = np.inf
    for epoch in range(epochs):
        train_losses = []
        val_losses = []

        for inputs, labels in train_dl:
            model.train()
            inputs, labels = inputs.to(device), labels.to(device)
            outputs = model(inputs)
            loss = criterion(outputs, inputs.view(inputs.size(0), -1))

            optimizer.zero_grad()
            loss.backward()
            optimizer.step()
            train_losses.append(loss.item())
        lr_scheduler.step()
        print(f'Epoch[{epoch + 1}/{epochs}], Train loss: {np.average(train_losses): .4f}')
        
        
        for inputs, labels in val_dl:
            model.eval()
            with torch.no_grad():
                inputs, labels = inputs.to(device), labels.to(device)
                outputs = model(inputs)
                loss = criterion(outputs, inputs.view(inputs.size(0), -1))
                val_losses.append(loss.item())
        print(f'Epoch[{epoch + 1}/{epochs}], Val loss: {np.average(val_losses): .4f}')
 
        scores = []
        full_labels = []
        for inputs, labels in test_dl:
            model.eval()
            with torch.no_grad():
                inputs, labels = inputs.to(device), labels.to(device)
                outputs = model(inputs)
                mse = torch.sum((outputs - inputs.view(inputs.size(0), -1)) ** 2, dim=1) / outputs.shape[1]
                scores.append(mse)
                full_labels.append(labels)
        
        full_labels = torch.cat([label for label in full_labels])
        scores = torch.cat([score for score in scores])
        fpr, tpr, _ = roc_curve(full_labels.cpu().detach(), scores.cpu().detach(), pos_label=0)
        roc_auc = auc(fpr, tpr)
        print(roc_auc)
        if wandb:
            wandb.log({"train_loss": np.average(train_losses), "val_loss": np.average(val_losses), "roc_auc": roc_auc})
        if np.average(val_losses) < min_val_loss:
            min_val_loss = np.average(val_losses)
        return min_val_loss        

In [34]:
def set_seed(seed = 42):
    np.random.seed(seed)
    torch.manual_seed(seed)
    torch.cuda.manual_seed(seed)

CONFIG = {
    "seed": 42,
    "epochs": 20,
    "num_classes": 2,
    "learning_rate": 0.01,
    "train_batch_size": 32,
    "val_batch_size": 16,
    "test_batch_size": 128,
    "criterion": nn.MSELoss(),
    "device":
        torch.device(
            "cuda:0" if torch.cuda.is_available()
            else "mps" if torch.backends.mps.is_available()
            else "cpu"
        )
}

set_seed(CONFIG['seed'])

data_path = "./data/train/"
data_path_test = "./data/test/"


meta_train_df = pd.read_csv("./data/train.csv")
meta_test_df = pd.read_csv("./data/test.csv")

train_df = meta_train_df[['filename', 'is_normal', 'machine_id']]
range_train, range_test = train_test_split(range(len(train_df)), test_size=0.2, train_size=0.8, random_state=None, shuffle=True, stratify=meta_train_df['machine_id'])
val_df = train_df.iloc[range_test].reset_index(drop=True)
train_df = train_df.iloc[range_train].reset_index(drop=True)
train_dataset = AudioDataset(train_df, data_path,in_memory=True, sgram_type="mel")
val_dataset = AudioDataset(val_df, data_path,in_memory=True, sgram_type="mel")
test_df = meta_test_df[['filename', 'is_normal', 'machine_id']]
test_dataset = AudioDataset(test_df, data_path_test, in_memory=True, sgram_type="mel")

train_ds = train_dataset
val_ds = val_dataset
test_ds = test_dataset

train_dl = DataLoader(train_ds, batch_size=CONFIG['train_batch_size'], shuffle=True)
val_dl = DataLoader(val_ds, batch_size=CONFIG['val_batch_size'], shuffle=False)
test_dl = DataLoader(test_ds, batch_size=CONFIG["test_batch_size"], shuffle=False)

In [35]:
input_size = next(iter(train_dl))[0].shape[1] * next(iter(train_dl))[0].shape[2] * next(iter(train_dl))[0].shape[3]

In [36]:
import torch
from torchviz import make_dot

# create some sample input data
x = next(iter(train_dl))[0]
# generate predictions for the sample data
y = Autoencoder(input_size=input_size,encoding_dim=64)(x)

# generate a model architecture visualization
#make_dot(y.mean(),
#         params=dict(Autoencoder(input_size=input_size,encoding_dim=64).named_parameters()),
#         show_attrs=True,
#         show_saved=True).render("MyPyTorchModel_torchviz", format="png")


In [37]:
# compute metrics
inputs_cat=[]
for inputs, labels in train_dl:
    inputs_cat.append(inputs)
inputs_cat = torch.cat(inputs_cat,dim=0)
print(inputs_cat.shape)

In [38]:
# compute the mean and std value for each frequency of the batch_sizexchannelxtimexfrequecy
mean = torch.mean(inputs_cat, dim=0)
std = torch.std(inputs_cat, dim=0)
print(mean.shape)
print(std.shape)
train_dataset.mean = mean
train_dataset.std = std
val_dataset.mean = mean
val_dataset.std = std
test_dataset.mean = mean
test_dataset.std = std

In [39]:
training=True
input_size = next(iter(train_dl))[0].shape[1] * next(iter(train_dl))[0].shape[2] * next(iter(train_dl))[0].shape[3]
measures = []
# testing emb space size
if training:
    for emb_space_size in [32, 64, 128, 256, 512]:
        model = Autoencoder(encoding_dim=emb_space_size, input_size=input_size)
        model = model.to(CONFIG["device"])
        optimizer = optim.Adam(model.parameters(), lr=CONFIG["learning_rate"])
        measures.append(train_model(model, train_dl, val_dl, test_dl, CONFIG["criterion"], optimizer, CONFIG["device"], epochs=50))
    for emb_space_size, measure in zip([32, 64, 128, 256, 512], measures):
        print(f"Emb space size: {emb_space_size}, Train loss: {measure[0]}, Val loss: {measure[1]}, ROC AUC: {measure[2]}")
for (measure, emb_space_size) in zip(measures, [32, 64, 128, 256, 512]):
    print({"emb_space_size": emb_space_size, "val_loss": measure})

In [None]:
# take the best one and train it for more epochs
if training:
    emb_space_measures=[32, 64, 128, 256, 512]
    model = Autoencoder(encoding_dim=emb_space_measures[np.argmax([measure[2] for measure in measures])], input_size=input_size)
    optimizer = optim.Adam(model.parameters(), lr=CONFIG["learning_rate"])
    model = model.to(CONFIG["device"])

In [None]:
if training:
    train_model(model, train_dl, val_dl, test_dl, CONFIG["criterion"], optimizer, CONFIG["device"], CONFIG["epochs"])

# Train different AE for each machine_id

In [None]:
def set_seed(seed = 42):
    np.random.seed(seed)
    torch.manual_seed(seed)
    torch.cuda.manual_seed(seed)

learning_rate={0:0.001, 2:0.01, 4:0.1}
CONFIG = {
    "seed": 42,
    "epochs": 20,
    "num_classes": 2,
    "learning_rate": learning_rate,
    "train_batch_size": 32,
    "val_batch_size": 16,
    "test_batch_size": 128,
    "criterion": nn.MSELoss(),
    "device":
        torch.device(
            "cuda:0" if torch.cuda.is_available()
            else "mps" if torch.backends.mps.is_available()
            else "cpu"
        )
}

set_seed(CONFIG['seed'])

data_path = "./data/train/"
data_path_test = "./data/test/"


meta_train_df = pd.read_csv("./data/train.csv")
meta_test_df = pd.read_csv("./data/test.csv")

train_df = meta_train_df[['filename', 'is_normal', 'machine_id']]

train_df_0= train_df[train_df['machine_id'] == 0].reset_index(drop=True)
[range_train, range_test] = train_test_split(range(len(train_df_0)), test_size=0.2, train_size=0.8, random_state=None, shuffle=True, stratify=train_df_0['machine_id'])
val_df_0 = train_df_0.iloc[range_test].reset_index(drop=True)
train_df_0= train_df_0.iloc[range_train].reset_index(drop=True)
train_dataset_0 = AudioDataset(train_df_0, data_path,in_memory=True, sgram_type="mel")
val_dataset_0 = AudioDataset(val_df_0, data_path,in_memory=True, sgram_type="mel")
train_ds_0 = train_dataset_0
val_ds_0 = val_dataset_0

train_df_2= train_df[train_df['machine_id'] == 2].reset_index(drop=True)
[range_train, range_test] = train_test_split(range(len(train_df_2)), test_size=0.2, train_size=0.8, random_state=None, shuffle=True, stratify=train_df_2['machine_id'])
val_df_2 = train_df_2.iloc[range_test].reset_index(drop=True)
train_df_2= train_df_2.iloc[range_train].reset_index(drop=True)
train_dataset_2 = AudioDataset(train_df_2, data_path,in_memory=True, sgram_type="mel")
val_dataset_2 = AudioDataset(val_df_2, data_path,in_memory=True, sgram_type="mel")
train_ds_2 = train_dataset_2
val_ds_2 = val_dataset_2

train_df_4= train_df[train_df['machine_id'] == 4].reset_index(drop=True)
[range_train, range_test] = train_test_split(range(len(train_df_4)), test_size=0.2, train_size=0.8, random_state=None, shuffle=True, stratify=train_df_4['machine_id'])
val_df_4 = train_df_4.iloc[range_test].reset_index(drop=True)
train_df_4= train_df_4.iloc[range_train].reset_index(drop=True) 
train_dataset_4 = AudioDataset(train_df_4, data_path,in_memory=True, sgram_type="mel")
val_dataset_4 = AudioDataset(val_df_4, data_path,in_memory=True, sgram_type="mel")
train_ds_4 = train_dataset_4
val_ds_4 = val_dataset_4

test_df = meta_test_df[['filename', 'is_normal', 'machine_id']]

test_df_0= test_df[test_df['machine_id'] == 0].reset_index(drop=True)
test_dataset_0 = AudioDataset(test_df_0, data_path_test, in_memory=True, sgram_type="mel")

test_df_2= test_df[test_df['machine_id'] == 2].reset_index(drop=True)
test_dataset_2 = AudioDataset(test_df_2, data_path_test, in_memory=True, sgram_type="mel")

test_df_4= test_df[test_df['machine_id'] == 4].reset_index(drop=True)
test_dataset_4 = AudioDataset(test_df_4, data_path_test, in_memory=True, sgram_type="mel")

test_ds_0 = test_dataset_0
test_ds_2 = test_dataset_2
test_ds_4 = test_dataset_4

train_dl_0 = DataLoader(train_ds_0, batch_size=CONFIG['train_batch_size'], shuffle=True)
val_dl_0 = DataLoader(val_ds_0, batch_size=CONFIG['val_batch_size'], shuffle=False)
test_dl_0 = DataLoader(test_ds_0, batch_size=CONFIG["test_batch_size"], shuffle=False)

train_dl_2 = DataLoader(train_ds_2, batch_size=CONFIG['train_batch_size'], shuffle=True)
val_dl_2 = DataLoader(val_ds_2, batch_size=CONFIG['val_batch_size'], shuffle=False)
test_dl_2 = DataLoader(test_ds_2, batch_size=CONFIG["test_batch_size"], shuffle=False)

train_dl_4 = DataLoader(train_ds_4, batch_size=CONFIG['train_batch_size'], shuffle=True)
val_dl_4 = DataLoader(val_ds_4, batch_size=CONFIG['val_batch_size'], shuffle=False)
test_dl_4 = DataLoader(test_ds_4, batch_size=CONFIG["test_batch_size"], shuffle=False)

# create dict
train_dl_dict = {0: train_dl_0, 2: train_dl_2, 4: train_dl_4}
val_dl_dict = {0: val_dl_0, 2: val_dl_2, 4: val_dl_4}
test_dl_dict = {0: test_dl_0, 2: test_dl_2, 4: test_dl_4}
train_dataset_dict={0: train_dataset_0, 2: train_dataset_2, 4: train_dataset_4}
val_dataset_dict={0: val_dataset_0, 2: val_dataset_2, 4: val_dataset_4}
test_dataset_dict={0: test_dataset_0, 2: test_dataset_2, 4: test_dataset_4}

In [None]:
# train_ds_0, val_ds_0, test_ds_0

In [None]:
# compute metrics
inputs_cat={}
for key in train_dl_dict.keys():
    inputs_cat[key]=[]
    for inputs, labels in train_dl_dict[key]:
        inputs_cat[key].append(inputs)
    inputs_cat[key] = torch.cat([input for input in inputs_cat[key]])
    print(inputs_cat[key].shape)

In [None]:
# compute the min and max value for each frequency of the batch_sizexchannelxtimexfrequecy
for key in train_dl_dict.keys():
    min = torch.min(inputs_cat[key], dim=0).values
    max = torch.max(inputs_cat[key], dim=0).values
    print(max.shape)
    print(min.shape)
    train_dataset_dict[key].min = min
    train_dataset_dict[key].max = max
    val_dataset_dict[key].min = min
    val_dataset_dict[key].max = max
    test_dataset_dict[key].min = min
    test_dataset_dict[key].max = max

In [None]:
training=True
models_dict={}
if training:
    for key in train_dl_dict.keys():
        input_size = next(iter(train_dl_dict[key]))[0].shape[1] * next(iter(train_dl_dict[key]))[0].shape[2] * next(iter(train_dl_dict[key]))[0].shape[3]
        model = Autoencoder(input_size, encoding_dim=128)
        model = model.to(CONFIG["device"])
        optimizer = optim.Adam(model.parameters(), lr=CONFIG["learning_rate"][key])
        train_model(model, train_dl_dict[key], val_dl_dict[key], test_dl_dict[key], CONFIG["criterion"], optimizer, CONFIG["device"], CONFIG["epochs"],10)
        models_dict[key]=model