In [1]:
%reset -f
import torch
import numpy as np
import pandas as pd
import torch.nn as nn
import torch.optim as optim
from sklearn.metrics import roc_curve, auc
from torch.utils.data import random_split, DataLoader


from customDatasets.audioDataset import AudioDataset


In [2]:
# free gpu
if torch.cuda.is_available():
    torch.cuda.empty_cache()

In [3]:
class Autoencoder(nn.Module):
    def __init__(self, input_size, encoding_dim):
        super(Autoencoder, self).__init__()
        self.encoder = nn.Sequential(
            nn.Linear(input_size, 512),
            nn.LeakyReLU(0.2),
            nn.Linear(512, encoding_dim),
            nn.LeakyReLU(0.2)
        )

        self.decoder = nn.Sequential(
            nn.Linear(encoding_dim, 512),
            nn.ReLU(),
            nn.Linear(512, input_size),
            nn.Sigmoid()
        )

    def forward(self, x):
        x = x.view(x.size(0), -1)
        encoded = self.encoder(x)
        decoded = self.decoder(encoded)
        return decoded

In [4]:
# test for deciding the mels parameters
from utils.audioUtils import AudioUtil
from torchaudio.transforms import MelSpectrogram, AmplitudeToDB
audio_file = "./data/train/normal_id_00_00000000.wav"

aud = AudioUtil.open(audio_file)
sig, sr = aud
mel = MelSpectrogram(sr, n_fft=1000, hop_length=501, n_mels=128)
spec = mel(sig)
ampl = AmplitudeToDB(top_db=80)
spec = ampl(spec)



print(spec.shape)

torch.Size([1, 128, 320])


In [5]:
def train_model(model, train_dl, val_dl, test_dl, criterion, optimizer, device, epochs=5,step_size=5):
    lr_scheduler=torch.optim.lr_scheduler.StepLR(optimizer, step_size=step_size, gamma=0.1)
    for epoch in range(epochs):
        train_losses = []
        val_losses = []

        for inputs, labels in train_dl:
            model.train()
            inputs, labels = inputs.to(device), labels.to(device)
            outputs = model(inputs)
            loss = criterion(outputs, inputs.view(inputs.size(0), -1))

            optimizer.zero_grad()
            loss.backward()
            optimizer.step()
            train_losses.append(loss.item())
        lr_scheduler.step()
        print(f'Epoch[{epoch + 1}/{epochs}], Train loss: {np.average(train_losses): .4f}')
        
        
        for inputs, labels in val_dl:
            model.eval()
            with torch.no_grad():
                inputs, labels = inputs.to(device), labels.to(device)
                outputs = model(inputs)
                loss = criterion(outputs, inputs.view(inputs.size(0), -1))
                val_losses.append(loss.item())
        print(f'Epoch[{epoch + 1}/{epochs}], Val loss: {np.average(val_losses): .4f}')
 
        scores = []
        full_labels = []
        for inputs, labels in test_dl:
            model.eval()
            with torch.no_grad():
                inputs, labels = inputs.to(device), labels.to(device)
                outputs = model(inputs)
                mse = torch.sum((outputs - inputs.view(inputs.size(0), -1)) ** 2, dim=1) / outputs.shape[1]
                scores.append(mse)
                full_labels.append(labels)
        
        full_labels = torch.cat([label for label in full_labels])
        scores = torch.cat([score for score in scores])
        fpr, tpr, _ = roc_curve(full_labels.cpu().detach(), scores.cpu().detach(), pos_label=0)
        roc_auc = auc(fpr, tpr)
        print(roc_auc)

In [6]:
def set_seed(seed = 42):
    np.random.seed(seed)
    torch.manual_seed(seed)
    torch.cuda.manual_seed(seed)

CONFIG = {
    "seed": 42,
    "epochs": 20,
    "num_classes": 2,
    "learning_rate": 0.01,
    "train_batch_size": 32,
    "val_batch_size": 16,
    "test_batch_size": 128,
    "criterion": nn.MSELoss(),
    "device":
        torch.device(
            "cuda:0" if torch.cuda.is_available()
            else "mps" if torch.backends.mps.is_available()
            else "cpu"
        )
}

set_seed(CONFIG['seed'])

data_path = "./data/train/"
data_path_test = "./data/test/"


meta_train_df = pd.read_csv("./data/train.csv")
meta_test_df = pd.read_csv("./data/test.csv")

train_df = meta_train_df[['filename', 'is_normal', 'machine_id']]
train_dataset = AudioDataset(train_df, data_path,in_memory=True, sgram_type="mel")
test_df = meta_test_df[['filename', 'is_normal', 'machine_id']]
test_dataset = AudioDataset(test_df, data_path_test, in_memory=True, sgram_type="mel")

num_items = len(train_dataset)
num_train = int(0.8 * num_items)
num_val = num_items-num_train

train_ds, val_ds = random_split(train_dataset, [num_train, num_val])
test_ds = test_dataset


train_dl = DataLoader(train_ds, batch_size=CONFIG['train_batch_size'], shuffle=True)
val_dl = DataLoader(val_ds, batch_size=CONFIG['val_batch_size'], shuffle=False)
test_dl = DataLoader(test_ds, batch_size=CONFIG["test_batch_size"], shuffle=False)

In [7]:
input_size = next(iter(train_dl))[0].shape[1] * next(iter(train_dl))[0].shape[2] * next(iter(train_dl))[0].shape[3]
model = Autoencoder(input_size, encoding_dim=128)
model = model.to(CONFIG["device"])
optimizer = optim.Adam(model.parameters(), lr=CONFIG["learning_rate"])

In [8]:
# compute metrics
inputs_cat=[]
for inputs, labels in train_dl:
    inputs_cat.append(inputs)
inputs_cat = torch.cat([input for input in inputs_cat])
print(inputs_cat.shape)

torch.Size([1896, 1, 320, 128])


In [9]:
# compute the min and max value for each frequency of the batch_sizexchannelxtimexfrequecy
min = torch.min(inputs_cat, dim=0).values
max = torch.max(inputs_cat, dim=0).values
print(max.shape)
print(min.shape)
train_dataset.min = min
train_dataset.max = max
test_dataset.min = min
test_dataset.max = max

torch.Size([1, 320, 128])
torch.Size([1, 320, 128])


In [10]:
training=False
if training:
    train_model(model, train_dl, val_dl, test_dl, CONFIG["criterion"], optimizer, CONFIG["device"], CONFIG["epochs"])

# Train different AE for each machine_id

In [24]:
def set_seed(seed = 42):
    np.random.seed(seed)
    torch.manual_seed(seed)
    torch.cuda.manual_seed(seed)

learning_rate={0:0.001, 2:0.01, 4:0.1}
CONFIG = {
    "seed": 42,
    "epochs": 20,
    "num_classes": 2,
    "learning_rate": learning_rate,
    "train_batch_size": 32,
    "val_batch_size": 16,
    "test_batch_size": 128,
    "criterion": nn.MSELoss(),
    "device":
        torch.device(
            "cuda:0" if torch.cuda.is_available()
            else "mps" if torch.backends.mps.is_available()
            else "cpu"
        )
}

set_seed(CONFIG['seed'])

data_path = "./data/train/"
data_path_test = "./data/test/"


meta_train_df = pd.read_csv("./data/train.csv")
meta_test_df = pd.read_csv("./data/test.csv")

train_df = meta_train_df[['filename', 'is_normal', 'machine_id']]

train_df_0= train_df[train_df['machine_id'] == 0].reset_index(drop=True)
train_dataset_0 = AudioDataset(train_df_0, data_path,in_memory=True, sgram_type="mel")

train_df_2= train_df[train_df['machine_id'] == 2].reset_index(drop=True)
train_dataset_2 = AudioDataset(train_df_2, data_path,in_memory=True, sgram_type="mel")

train_df_4= train_df[train_df['machine_id'] == 4].reset_index(drop=True)
train_dataset_4 = AudioDataset(train_df_4, data_path,in_memory=True, sgram_type="mel")

test_df = meta_test_df[['filename', 'is_normal', 'machine_id']]

test_df_0= test_df[test_df['machine_id'] == 0].reset_index(drop=True)
test_dataset_0 = AudioDataset(test_df_0, data_path_test, in_memory=True, sgram_type="mel")

test_df_2= test_df[test_df['machine_id'] == 2].reset_index(drop=True)
test_dataset_2 = AudioDataset(test_df_2, data_path_test, in_memory=True, sgram_type="mel")

test_df_4= test_df[test_df['machine_id'] == 4].reset_index(drop=True)
test_dataset_4 = AudioDataset(test_df_4, data_path_test, in_memory=True, sgram_type="mel")

num_items = len(train_dataset_0)
num_train = int(0.8 * num_items)
num_val = num_items-num_train
train_ds_0, val_ds_0 = random_split(train_dataset_0, [num_train, num_val])

num_items = len(train_dataset_2)
num_train = int(0.8 * num_items)
num_val = num_items-num_train
train_ds_2, val_ds_2 = random_split(train_dataset_2, [num_train, num_val])

num_items = len(train_dataset_4)
num_train = int(0.8 * num_items)
num_val = num_items-num_train
train_ds_4, val_ds_4 = random_split(train_dataset_4, [num_train, num_val])

test_ds_0 = test_dataset_0
test_ds_2 = test_dataset_2
test_ds_4 = test_dataset_4

train_dl_0 = DataLoader(train_ds_0, batch_size=CONFIG['train_batch_size'], shuffle=True)
val_dl_0 = DataLoader(val_ds_0, batch_size=CONFIG['val_batch_size'], shuffle=False)
test_dl_0 = DataLoader(test_ds_0, batch_size=CONFIG["test_batch_size"], shuffle=False)

train_dl_2 = DataLoader(train_ds_2, batch_size=CONFIG['train_batch_size'], shuffle=True)
val_dl_2 = DataLoader(val_ds_2, batch_size=CONFIG['val_batch_size'], shuffle=False)
test_dl_2 = DataLoader(test_ds_2, batch_size=CONFIG["test_batch_size"], shuffle=False)

train_dl_4 = DataLoader(train_ds_4, batch_size=CONFIG['train_batch_size'], shuffle=True)
val_dl_4 = DataLoader(val_ds_4, batch_size=CONFIG['val_batch_size'], shuffle=False)
test_dl_4 = DataLoader(test_ds_4, batch_size=CONFIG["test_batch_size"], shuffle=False)

# create dict
train_dl_dict = {0: train_dl_0, 2: train_dl_2, 4: train_dl_4}
val_dl_dict = {0: val_dl_0, 2: val_dl_2, 4: val_dl_4}
test_dl_dict = {0: test_dl_0, 2: test_dl_2, 4: test_dl_4}
train_dataset_dict={0: train_dataset_0, 2: train_dataset_2, 4: train_dataset_4}
test_dataset_dict={0: test_dataset_0, 2: test_dataset_2, 4: test_dataset_4}

In [25]:
# train_ds_0, val_ds_0, test_ds_0

In [26]:
# compute metrics
inputs_cat={}
for key in train_dl_dict.keys():
    inputs_cat[key]=[]
    for inputs, labels in train_dl_dict[key]:
        inputs_cat[key].append(inputs)
    inputs_cat[key] = torch.cat([input for input in inputs_cat[key]])
    print(inputs_cat[key].shape)

torch.Size([774, 1, 320, 128])
torch.Size([774, 1, 320, 128])
torch.Size([347, 1, 320, 128])


In [27]:
# compute the min and max value for each frequency of the batch_sizexchannelxtimexfrequecy
for key in train_dl_dict.keys():
    min = torch.min(inputs_cat[key], dim=0).values
    max = torch.max(inputs_cat[key], dim=0).values
    print(max.shape)
    print(min.shape)
    train_dataset_dict[key].min = min
    train_dataset_dict[key].max = max
    test_dataset_dict[key].min = min
    test_dataset_dict[key].max = max

torch.Size([1, 320, 128])
torch.Size([1, 320, 128])
torch.Size([1, 320, 128])
torch.Size([1, 320, 128])
torch.Size([1, 320, 128])
torch.Size([1, 320, 128])


In [28]:
training=True
models_dict={}
if training:
    for key in train_dl_dict.keys():
        input_size = next(iter(train_dl_dict[key]))[0].shape[1] * next(iter(train_dl_dict[key]))[0].shape[2] * next(iter(train_dl_dict[key]))[0].shape[3]
        model = Autoencoder(input_size, encoding_dim=128)
        model = model.to(CONFIG["device"])
        optimizer = optim.Adam(model.parameters(), lr=CONFIG["learning_rate"][key])
        train_model(model, train_dl_dict[key], val_dl_dict[key], test_dl_dict[key], CONFIG["criterion"], optimizer, CONFIG["device"], CONFIG["epochs"],10)
        models_dict[key]=model

Epoch[1/20], Train loss:  0.0321
Epoch[1/20], Val loss:  0.0224
0.8522191011235956
Epoch[2/20], Train loss:  0.0229
Epoch[2/20], Val loss:  0.0200
0.8562921348314606
Epoch[3/20], Train loss:  0.0197
Epoch[3/20], Val loss:  0.0182
0.888876404494382
Epoch[4/20], Train loss:  0.0186
Epoch[4/20], Val loss:  0.0175
0.8892134831460675
Epoch[5/20], Train loss:  0.0171
Epoch[5/20], Val loss:  0.0168
0.9003932584269663
Epoch[6/20], Train loss:  0.0167
Epoch[6/20], Val loss:  0.0168
0.8949157303370787
Epoch[7/20], Train loss:  0.0164
Epoch[7/20], Val loss:  0.0158
0.9194662921348316
Epoch[8/20], Train loss:  0.0155
Epoch[8/20], Val loss:  0.0153
0.9253089887640451
Epoch[9/20], Train loss:  0.0158
Epoch[9/20], Val loss:  0.0157
0.9151404494382023
Epoch[10/20], Train loss:  0.0154
Epoch[10/20], Val loss:  0.0158
0.949185393258427
Epoch[11/20], Train loss:  0.0150
Epoch[11/20], Val loss:  0.0150
0.9041011235955057
Epoch[12/20], Train loss:  0.0150
Epoch[12/20], Val loss:  0.0147
0.9112921348314607
