In [52]:
%reset -f
import torch
import numpy as np
import pandas as pd
import torch.nn as nn
import torch.optim as optim
from sklearn.metrics import roc_curve, auc
from torch.utils.data import random_split, DataLoader
from sklearn.model_selection import train_test_split
import wandb


from customDatasets.audioDataset import AudioDataset


In [53]:
# free gpu
if torch.cuda.is_available():
    torch.cuda.empty_cache()

In [54]:
class Autoencoder(nn.Module):
    def __init__(self, input_size, encoding_dim):
        super(Autoencoder, self).__init__()
        self.encoder = nn.Sequential(
            nn.Linear(input_size, 1024),
            nn.BatchNorm1d(1024),
            nn.LeakyReLU(0.2),
            nn.Linear(1024, 512),
            nn.BatchNorm1d(512),
            nn.LeakyReLU(0.2),
            nn.Linear(512, encoding_dim),
            nn.BatchNorm1d(encoding_dim),
            nn.LeakyReLU(0.2)
        )

        self.decoder = nn.Sequential(
            nn.Linear(encoding_dim, 1024),
            nn.BatchNorm1d(1024),
            nn.LeakyReLU(0.2),
            nn.Linear(1024, 512),
            nn.BatchNorm1d(512),
            nn.LeakyReLU(0.2),
            nn.Linear(512, input_size),
        )

    def forward(self, x):
        x = x.view(x.size(0), -1)
        encoded = self.encoder(x)
        decoded = self.decoder(encoded)
        return decoded

In [55]:
# test for deciding the mels parameters
from utils.audioUtils import AudioUtil
from torchaudio import transforms
import torch
audio_file = "./data/train/normal_id_00_00000000.wav"

aud = AudioUtil.open(audio_file)
sig, sr = aud
mel = transforms.MelSpectrogram(sr, n_fft=1000, hop_length=501, n_mels=128)
spec = mel(sig)
ampl = transforms.AmplitudeToDB(top_db=80)
spec = ampl(spec)


print(spec.shape)


torch.Size([1, 128, 320])


In [56]:
def train_model(model, train_dl, val_dl, test_dl, criterion, optimizer, device, wandb=None, epochs=5,step_size=5):
    lr_scheduler=torch.optim.lr_scheduler.StepLR(optimizer, step_size=step_size, gamma=0.5)
    best_val_loss = np.inf
    for epoch in range(epochs):
        train_losses = []
        val_losses = []
        for inputs, labels in train_dl:
            model.train()
            inputs, labels = inputs.to(device), labels.to(device)
            outputs = model(inputs)
            loss = criterion(outputs, inputs.view(inputs.size(0), -1))

            optimizer.zero_grad()
            loss.backward()
            optimizer.step()
            train_losses.append(loss.item())
        lr_scheduler.step()
        print(f'Epoch[{epoch + 1}/{epochs}], Train loss: {np.average(train_losses): .4f}')
        
        for inputs, labels in val_dl:
            model.eval()
            with torch.no_grad():
                inputs, labels = inputs.to(device), labels.to(device)
                inputs = inputs.view(inputs.shape[0]*inputs.shape[1], -1)
                outputs = model(inputs)
                loss = criterion(outputs, inputs)
                val_losses.append(loss.item())
        print(f'Epoch[{epoch + 1}/{epochs}], Val loss: {np.average(val_losses): .4f}')
        if np.average(val_losses) < best_val_loss:
            best_val_loss = np.average(val_losses)
 
        full_scores = []
        full_labels = []
        for inputs, labels in test_dl:
            inputs, labels = inputs.to(CONFIG["device"]), labels.to(CONFIG["device"])
            model.eval()
            with torch.no_grad():
                tmp_scores = []
                for idx in range (10):
                    outputs = model(inputs[:, idx, :, :])
                    mse = torch.sum((outputs - inputs[:, idx, :, :].view(inputs.size(0), -1)) ** 2, dim=1, keepdim=True) / outputs.shape[1]
                    tmp_scores.append(mse)

                scores = torch.cat(tmp_scores, dim=1)
                scores = torch.max(scores, dim=1).values

                full_scores.append(scores)
                full_labels.append(labels)
        
        full_labels = torch.cat([label for label in full_labels])
        full_scores = torch.cat([score for score in full_scores])
        fpr, tpr, _ = roc_curve(full_labels.cpu().detach(), full_scores.cpu().detach(), pos_label=0)
        roc_auc = auc(fpr, tpr)
        print(roc_auc)
        if wandb:
            wandb.log({"roc_auc test": roc_auc, "val_loss": np.average(val_losses), "train_loss": np.average(train_losses)})
    return best_val_loss

In [57]:
def set_seed(seed = 42):
    np.random.seed(seed)
    torch.manual_seed(seed)
    torch.cuda.manual_seed(seed)

CONFIG = {
    "seed": 42,
    "epochs": 500,
    "num_classes": 2,
    "learning_rate": 0.001,
    "train_batch_size": 32,
    "val_batch_size": 16,
    "test_batch_size": 128,
    "criterion": nn.MSELoss(),
    "device":
        torch.device(
            "cuda:0" if torch.cuda.is_available()
            else "mps" if torch.backends.mps.is_available()
            else "cpu"
        )
}

set_seed(CONFIG['seed'])

data_path = "./data/train/"
data_path_test = "./data/test/"


meta_train_df = pd.read_csv("./data/train.csv")
meta_test_df = pd.read_csv("./data/test.csv")

train_df = meta_train_df[['filename', 'is_normal', 'machine_id']]
range_train, range_test = train_test_split(range(len(train_df)), test_size=0.2, train_size=0.8, random_state=None, shuffle=True, stratify=meta_train_df['machine_id'])
val_df = train_df.iloc[range_test].reset_index(drop=True)
train_df = train_df.iloc[range_train].reset_index(drop=True)
train_dataset = AudioDataset(train_df, data_path,in_memory=True, sgram_type="mel", augment=True, split_sgram=True)
val_dataset = AudioDataset(val_df, data_path,in_memory=True, sgram_type="mel", augment=False, test_mode=True)
test_df = meta_test_df[['filename', 'is_normal', 'machine_id']]
test_dataset = AudioDataset(test_df, data_path_test, in_memory=True, sgram_type="mel", augment=False, test_mode=True)

train_ds = train_dataset
val_ds = val_dataset
test_ds = test_dataset

train_dl = DataLoader(train_ds, batch_size=CONFIG['train_batch_size'], shuffle=True)
val_dl = DataLoader(val_ds, batch_size=CONFIG['val_batch_size'], shuffle=False)
test_dl = DataLoader(test_ds, batch_size=CONFIG["test_batch_size"], shuffle=False)

In [58]:
inputs, labels = next(iter(train_dl))

print(inputs.shape)

torch.Size([32, 1, 32, 128])


In [59]:
input_size = next(iter(train_dl))[0].shape[1] * next(iter(train_dl))[0].shape[2] * next(iter(train_dl))[0].shape[3]
model = Autoencoder(input_size, encoding_dim=128)
model = model.to(CONFIG["device"])
optimizer = optim.Adam(model.parameters(), lr=CONFIG["learning_rate"], weight_decay=1e-5)

In [60]:
# compute metrics
inputs_cat=[]
train_dataset.test_mode = True
for inputs, labels in train_dl:
    inputs_cat.append(inputs)
inputs_cat = torch.cat([input for input in inputs_cat])
inputs_cat = inputs_cat.view(-1,inputs_cat.shape[2],inputs_cat.shape[3])
print(inputs_cat.shape)
train_dataset.test_mode = False

torch.Size([32, 10, 32, 128])
torch.Size([32, 10, 32, 128])
torch.Size([32, 10, 32, 128])
torch.Size([32, 10, 32, 128])
torch.Size([32, 10, 32, 128])
torch.Size([32, 10, 32, 128])
torch.Size([32, 10, 32, 128])
torch.Size([32, 10, 32, 128])
torch.Size([32, 10, 32, 128])
torch.Size([32, 10, 32, 128])
torch.Size([32, 10, 32, 128])
torch.Size([32, 10, 32, 128])
torch.Size([32, 10, 32, 128])
torch.Size([32, 10, 32, 128])
torch.Size([32, 10, 32, 128])
torch.Size([32, 10, 32, 128])
torch.Size([32, 10, 32, 128])
torch.Size([32, 10, 32, 128])
torch.Size([32, 10, 32, 128])
torch.Size([32, 10, 32, 128])
torch.Size([32, 10, 32, 128])
torch.Size([32, 10, 32, 128])
torch.Size([32, 10, 32, 128])
torch.Size([32, 10, 32, 128])
torch.Size([32, 10, 32, 128])
torch.Size([32, 10, 32, 128])
torch.Size([32, 10, 32, 128])
torch.Size([32, 10, 32, 128])
torch.Size([32, 10, 32, 128])
torch.Size([32, 10, 32, 128])
torch.Size([32, 10, 32, 128])
torch.Size([32, 10, 32, 128])
torch.Size([32, 10, 32, 128])
torch.Size

In [61]:
# compute the mean and std value for each frequency of the batch_sizexchannelxtimexfrequecy
mean = torch.mean(inputs_cat, dim=0)
std = torch.std(inputs_cat, dim=0)
print(mean.shape)
print(std.shape)
train_dataset.mean = mean
train_dataset.std = std
val_dataset.mean = mean
val_dataset.std = std
test_dataset.mean = mean
test_dataset.std = std

torch.Size([32, 128])
torch.Size([32, 128])


In [62]:
# init wandb
wandb.login()



True

In [63]:
# start a new wandb run to track this script
wandb.init(
    # set the wandb project where this run will be logged
    project="Challenge_2_AETimeFrames",
)
# save all the parameters from the CONFIG dict
wandb.config.update(CONFIG)
print(wandb.config)

VBox(children=(Label(value='0.002 MB of 0.002 MB uploaded\r'), FloatProgress(value=1.0, max=1.0)))

0,1
roc_auc test,█▇▇▇▇▇▇▇▇▇▇▇▇▅▇▃▃▂▅▇▁▄▆▅▅▁▆▄▄▅▃▄▄▅▄▄▄▆▄▅
train_loss,█████████████▂▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁
val_loss,▇████████████▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁

0,1
roc_auc test,0.74243
train_loss,0.00807
val_loss,0.00788


VBox(children=(Label(value='Waiting for wandb.init()...\r'), FloatProgress(value=0.011113357544440481, max=1.0…

{'seed': 42, 'epochs': 500, 'num_classes': 2, 'learning_rate': 0.001, 'train_batch_size': 32, 'val_batch_size': 16, 'test_batch_size': 128, 'criterion': 'MSELoss()', 'device': 'cuda:0'}


In [64]:
training=True
input_size = next(iter(train_dl))[0].shape[1] * next(iter(train_dl))[0].shape[2] * next(iter(train_dl))[0].shape[3]
measures = []
# testing emb space size
if training:
    for emb_space_size in [32, 64, 128, 256, 512]:
        model = Autoencoder(encoding_dim=emb_space_size, input_size=input_size)
        model = model.to(CONFIG["device"])
        optimizer = optim.Adam(model.parameters(), lr=CONFIG["learning_rate"])
        measures.append(train_model(model, train_dl, val_dl, test_dl, CONFIG["criterion"], optimizer, CONFIG["device"], epochs=50))

Epoch[1/50], Train loss:  0.6746
Epoch[1/50], Val loss:  0.6003
0.7428963795255931
Epoch[2/50], Train loss:  0.5692
Epoch[2/50], Val loss:  0.5675
0.7365334997919267
Epoch[3/50], Train loss:  0.5441
Epoch[3/50], Val loss:  0.5391
0.774173949230129
Epoch[4/50], Train loss:  0.5303
Epoch[4/50], Val loss:  0.5257
0.7635289221806075
Epoch[5/50], Train loss:  0.5164
Epoch[5/50], Val loss:  0.5053
0.7769912609238451
Epoch[6/50], Train loss:  0.5071
Epoch[6/50], Val loss:  0.4937
0.7730836454431961
Epoch[7/50], Train loss:  0.4946
Epoch[7/50], Val loss:  0.4853
0.7694506866416979
Epoch[8/50], Train loss:  0.4953
Epoch[8/50], Val loss:  0.4869
0.7779026217228464
Epoch[9/50], Train loss:  0.4941
Epoch[9/50], Val loss:  0.4874
0.7673574698293801
Epoch[10/50], Train loss:  0.4871
Epoch[10/50], Val loss:  0.4897
0.7904577611319183
Epoch[11/50], Train loss:  0.4819
Epoch[11/50], Val loss:  0.4732
0.7790262172284645
Epoch[12/50], Train loss:  0.4775
Epoch[12/50], Val loss:  0.4702
0.7735289221806076

In [65]:
if training:
    emb_spaces=[32, 64, 128, 256, 512]
    for emb_space_size, measure in zip(emb_spaces, measures):
        print(f"Emb space size: {emb_space_size}, Train loss: {measure}")
    print(f"Best emb space size: {emb_spaces[np.argmin([measure for measure in measures])]}")

Emb space size: 32, Train loss: 0.45140895048777263
Emb space size: 64, Train loss: 0.4439425359169642
Emb space size: 128, Train loss: 0.44581543107827504
Emb space size: 256, Train loss: 0.444801339507103
Emb space size: 512, Train loss: 0.44873113334178927
Best emb space size: 64


In [66]:
# take the best one and train it for more epochs
if training:
    emb_space_measures=[32, 64, 128, 256, 512]
    model = Autoencoder(encoding_dim=emb_space_measures[np.argmin([measure for measure in measures])], input_size=input_size)
    model = model.to(CONFIG["device"])
    optimizer = optim.Adam(model.parameters(), lr=CONFIG["learning_rate"])

In [67]:
training=True
if training:
    train_model(model, train_dl, val_dl, test_dl, CONFIG["criterion"], optimizer, CONFIG["device"], wandb, CONFIG["epochs"], 50)

Epoch[1/500], Train loss:  0.6842
Epoch[1/500], Val loss:  0.6048
0.7135830212234707
Epoch[2/500], Train loss:  0.5814
Epoch[2/500], Val loss:  0.5660
0.7516853932584271
Epoch[3/500], Train loss:  0.5628
Epoch[3/500], Val loss:  0.5384
0.763017062005826
Epoch[4/500], Train loss:  0.5421
Epoch[4/500], Val loss:  0.5310
0.7494298793175198
Epoch[5/500], Train loss:  0.5227
Epoch[5/500], Val loss:  0.5070
0.7659300873907617
Epoch[6/500], Train loss:  0.5153
Epoch[6/500], Val loss:  0.5094
0.7873325010403662
Epoch[7/500], Train loss:  0.5099
Epoch[7/500], Val loss:  0.4939
0.7687723678734915
Epoch[8/500], Train loss:  0.5014
Epoch[8/500], Val loss:  0.5528
0.7629588014981273
Epoch[9/500], Train loss:  0.4966
Epoch[9/500], Val loss:  0.4867
0.7868456096545984
Epoch[10/500], Train loss:  0.4931
Epoch[10/500], Val loss:  0.4875
0.774889721181856
Epoch[11/500], Train loss:  0.4860
Epoch[11/500], Val loss:  0.4859
0.792544735746983
Epoch[12/500], Train loss:  0.4783
Epoch[12/500], Val loss:  0.4