In [7]:
%reset -f
import torch
import numpy as np
import pandas as pd
import torch.nn as nn
import torch.optim as optim
from sklearn.metrics import roc_curve, auc
from torch.utils.data import random_split, DataLoader
from sklearn.model_selection import train_test_split
import wandb


from customDatasets.audioDataset import AudioDataset


In [8]:
# free gpu
if torch.cuda.is_available():
    torch.cuda.empty_cache()

In [9]:
class Autoencoder(nn.Module):
    def __init__(self, input_size, encoding_dim):
        super(Autoencoder, self).__init__()
        self.encoder = nn.Sequential(
            nn.Linear(input_size, 1024),
            nn.BatchNorm1d(1024),
            nn.LeakyReLU(0.2),
            nn.Linear(1024, 512),
            nn.BatchNorm1d(512),
            nn.LeakyReLU(0.2),
            nn.Linear(512, encoding_dim),
            nn.BatchNorm1d(encoding_dim),
            nn.LeakyReLU(0.2)
        )

        self.decoder = nn.Sequential(
            nn.Linear(encoding_dim, 1024),
            nn.BatchNorm1d(1024),
            nn.LeakyReLU(0.2),
            nn.Linear(1024, 512),
            nn.BatchNorm1d(512),
            nn.LeakyReLU(0.2),
            nn.Linear(512, input_size),
        )

    def forward(self, x):
        x = x.view(x.size(0), -1)
        encoded = self.encoder(x)
        decoded = self.decoder(encoded)
        return decoded

In [10]:
# test for deciding the mels parameters
from utils.audioUtils import AudioUtil
from torchaudio import transforms
import torch
audio_file = "./data/train/normal_id_00_00000000.wav"

aud = AudioUtil.open(audio_file)
sig, sr = aud
mel = transforms.MelSpectrogram(sr, n_fft=1000, hop_length=501, n_mels=128)
spec = mel(sig)
ampl = transforms.AmplitudeToDB(top_db=80)
spec = ampl(spec)


print(spec.shape)


torch.Size([1, 128, 320])


In [11]:
def train_model(model, train_dl, val_dl, test_dl, criterion, optimizer, device, wandb=None, epochs=5,step_size=5):
    lr_scheduler=torch.optim.lr_scheduler.StepLR(optimizer, step_size=step_size, gamma=0.5)
    best_val_loss = np.inf
    for epoch in range(epochs):
        train_losses = []
        val_losses = []
        for inputs, labels in train_dl:
            model.train()
            inputs, labels = inputs.to(device), labels.to(device)
            outputs = model(inputs)
            loss = criterion(outputs, inputs.view(inputs.size(0), -1))

            optimizer.zero_grad()
            loss.backward()
            optimizer.step()
            train_losses.append(loss.item())
        lr_scheduler.step()
        print(f'Epoch[{epoch + 1}/{epochs}], Train loss: {np.average(train_losses): .4f}')
        
        for inputs, labels in val_dl:
            model.eval()
            with torch.no_grad():
                inputs, labels = inputs.to(device), labels.to(device)
                inputs = inputs.view(inputs.shape[0]*inputs.shape[1], -1)
                outputs = model(inputs)
                loss = criterion(outputs, inputs)
                val_losses.append(loss.item())
        print(f'Epoch[{epoch + 1}/{epochs}], Val loss: {np.average(val_losses): .4f}')
        if np.average(val_losses) < best_val_loss:
            best_val_loss = np.average(val_losses)
 
        full_scores = []
        full_labels = []
        for inputs, labels in test_dl:
            inputs, labels = inputs.to(CONFIG["device"]), labels.to(CONFIG["device"])
            model.eval()
            with torch.no_grad():
                tmp_scores = []
                for idx in range (10):
                    outputs = model(inputs[:, idx, :, :])
                    mse = torch.sum((outputs - inputs[:, idx, :, :].view(inputs.size(0), -1)) ** 2, dim=1, keepdim=True) / outputs.shape[1]
                    tmp_scores.append(mse)

                scores = torch.cat(tmp_scores, dim=1)
                scores = torch.max(scores, dim=1).values

                full_scores.append(scores)
                full_labels.append(labels)
        
        full_labels = torch.cat([label for label in full_labels])
        full_scores = torch.cat([score for score in full_scores])
        fpr, tpr, _ = roc_curve(full_labels.cpu().detach(), full_scores.cpu().detach(), pos_label=0)
        roc_auc = auc(fpr, tpr)
        print(roc_auc)
        if wandb:
            wandb.log({"roc_auc test": roc_auc, "val_loss": np.average(val_losses), "train_loss": np.average(train_losses)})
    return best_val_loss

In [12]:
def set_seed(seed = 42):
    np.random.seed(seed)
    torch.manual_seed(seed)
    torch.cuda.manual_seed(seed)

CONFIG = {
    "seed": 42,
    "epochs": 500,
    "num_classes": 2,
    "learning_rate": 0.001,
    "train_batch_size": 32,
    "val_batch_size": 16,
    "test_batch_size": 128,
    "criterion": nn.MSELoss(),
    "device":
        torch.device(
            "cuda:0" if torch.cuda.is_available()
            else "mps" if torch.backends.mps.is_available()
            else "cpu"
        )
}

set_seed(CONFIG['seed'])

data_path = "./data/train/"
data_path_test = "./data/test/"


meta_train_df = pd.read_csv("./data/train.csv")
meta_test_df = pd.read_csv("./data/test.csv")

train_df = meta_train_df[['filename', 'is_normal', 'machine_id']]
range_train, range_test = train_test_split(range(len(train_df)), test_size=0.2, train_size=0.8, random_state=None, shuffle=True, stratify=meta_train_df['machine_id'])
val_df = train_df.iloc[range_test].reset_index(drop=True)
train_df = train_df.iloc[range_train].reset_index(drop=True)
train_dataset = AudioDataset(train_df, data_path,in_memory=True, sgram_type="mel", augment=True, split_sgram=True)
val_dataset = AudioDataset(val_df, data_path,in_memory=True, sgram_type="mel", augment=False, test_mode=True)
test_df = meta_test_df[['filename', 'is_normal', 'machine_id']]
test_dataset = AudioDataset(test_df, data_path_test, in_memory=True, sgram_type="mel", augment=False, test_mode=True)

train_ds = train_dataset
val_ds = val_dataset
test_ds = test_dataset

train_dl = DataLoader(train_ds, batch_size=CONFIG['train_batch_size'], shuffle=True)
val_dl = DataLoader(val_ds, batch_size=CONFIG['val_batch_size'], shuffle=False)
test_dl = DataLoader(test_ds, batch_size=CONFIG["test_batch_size"], shuffle=False)

In [13]:
inputs, labels = next(iter(train_dl))

print(inputs.shape)

torch.Size([32, 1, 32, 128])


In [14]:
input_size = next(iter(train_dl))[0].shape[1] * next(iter(train_dl))[0].shape[2] * next(iter(train_dl))[0].shape[3]
model = Autoencoder(input_size, encoding_dim=128)
model = model.to(CONFIG["device"])
optimizer = optim.Adam(model.parameters(), lr=CONFIG["learning_rate"], weight_decay=1e-5)

In [15]:
# compute metrics
inputs_cat=[]
train_dataset.test_mode = True
for inputs, labels in train_dl:
    print(inputs.shape)
    inputs_cat.append(inputs)
inputs_cat = torch.cat([input for input in inputs_cat])
inputs_cat = inputs_cat.view(-1,inputs_cat.shape[2],inputs_cat.shape[3])
print(inputs_cat.shape)
train_dataset.test_mode = False

torch.Size([32, 10, 32, 128])
torch.Size([32, 10, 32, 128])
torch.Size([32, 10, 32, 128])
torch.Size([32, 10, 32, 128])
torch.Size([32, 10, 32, 128])
torch.Size([32, 10, 32, 128])
torch.Size([32, 10, 32, 128])
torch.Size([32, 10, 32, 128])
torch.Size([32, 10, 32, 128])
torch.Size([32, 10, 32, 128])
torch.Size([32, 10, 32, 128])
torch.Size([32, 10, 32, 128])
torch.Size([32, 10, 32, 128])
torch.Size([32, 10, 32, 128])
torch.Size([32, 10, 32, 128])
torch.Size([32, 10, 32, 128])
torch.Size([32, 10, 32, 128])
torch.Size([32, 10, 32, 128])
torch.Size([32, 10, 32, 128])
torch.Size([32, 10, 32, 128])
torch.Size([32, 10, 32, 128])
torch.Size([32, 10, 32, 128])
torch.Size([32, 10, 32, 128])
torch.Size([32, 10, 32, 128])
torch.Size([32, 10, 32, 128])
torch.Size([32, 10, 32, 128])
torch.Size([32, 10, 32, 128])
torch.Size([32, 10, 32, 128])
torch.Size([32, 10, 32, 128])
torch.Size([32, 10, 32, 128])
torch.Size([32, 10, 32, 128])
torch.Size([32, 10, 32, 128])
torch.Size([32, 10, 32, 128])
torch.Size

In [16]:
# compute the mean and std value for each frequency of the batch_sizexchannelxtimexfrequecy
mean = torch.mean(inputs_cat, dim=0)
std = torch.std(inputs_cat, dim=0)
print(mean.shape)
print(std.shape)
train_dataset.mean = mean
train_dataset.std = std
val_dataset.mean = mean
val_dataset.std = std
test_dataset.mean = mean
test_dataset.std = std

torch.Size([32, 128])
torch.Size([32, 128])


In [17]:
# compute metrics
inputs_cat=[]
train_dataset.test_mode = True
for inputs, labels in train_dl:
    print(inputs.shape)
    inputs_cat.append(inputs)
inputs_cat = torch.cat([input for input in inputs_cat])
inputs_cat = inputs_cat.view(-1,inputs_cat.shape[2],inputs_cat.shape[3])
print(inputs_cat.shape)
train_dataset.test_mode = False

torch.Size([32, 10, 32, 128])
torch.Size([32, 10, 32, 128])
torch.Size([32, 10, 32, 128])
torch.Size([32, 10, 32, 128])
torch.Size([32, 10, 32, 128])
torch.Size([32, 10, 32, 128])
torch.Size([32, 10, 32, 128])
torch.Size([32, 10, 32, 128])
torch.Size([32, 10, 32, 128])
torch.Size([32, 10, 32, 128])
torch.Size([32, 10, 32, 128])
torch.Size([32, 10, 32, 128])
torch.Size([32, 10, 32, 128])
torch.Size([32, 10, 32, 128])
torch.Size([32, 10, 32, 128])
torch.Size([32, 10, 32, 128])
torch.Size([32, 10, 32, 128])
torch.Size([32, 10, 32, 128])
torch.Size([32, 10, 32, 128])
torch.Size([32, 10, 32, 128])
torch.Size([32, 10, 32, 128])
torch.Size([32, 10, 32, 128])
torch.Size([32, 10, 32, 128])
torch.Size([32, 10, 32, 128])
torch.Size([32, 10, 32, 128])
torch.Size([32, 10, 32, 128])
torch.Size([32, 10, 32, 128])
torch.Size([32, 10, 32, 128])
torch.Size([32, 10, 32, 128])
torch.Size([32, 10, 32, 128])
torch.Size([32, 10, 32, 128])
torch.Size([32, 10, 32, 128])
torch.Size([32, 10, 32, 128])
torch.Size

In [18]:
# compute the min and max value for each frequency of the batch_sizexchannelxtimexfrequecy
min = torch.min(inputs_cat, dim=0).values
max = torch.max(inputs_cat, dim=0).values
print(max.shape)
print(min.shape)
train_dataset.min = min
train_dataset.max = max
val_dataset.min = min
val_dataset.max = max
test_dataset.min = min
test_dataset.max = max

torch.Size([32, 128])
torch.Size([32, 128])


In [19]:
# init wandb
wandb.login()

[34m[1mwandb[0m: Currently logged in as: [33mmicheleferrero9[0m ([33mai-ml-monitor[0m). Use [1m`wandb login --relogin`[0m to force relogin


True

In [21]:
# start a new wandb run to track this script
wandb.init(
    # set the wandb project where this run will be logged
    project="Challenge_2_AETimeFrames",
)
# save all the parameters from the CONFIG dict
wandb.config.update(CONFIG)
print(wandb.config)

{'seed': 42, 'epochs': 500, 'num_classes': 2, 'learning_rate': 0.001, 'train_batch_size': 32, 'val_batch_size': 16, 'test_batch_size': 128, 'criterion': 'MSELoss()', 'device': 'cuda:0'}


In [None]:
training=True
input_size = next(iter(train_dl))[0].shape[1] * next(iter(train_dl))[0].shape[2] * next(iter(train_dl))[0].shape[3]
measures = []
# testing emb space size
if training:
    for emb_space_size in [32, 64, 128, 256, 512]:
        model = Autoencoder(encoding_dim=emb_space_size, input_size=input_size)
        model = model.to(CONFIG["device"])
        optimizer = optim.Adam(model.parameters(), lr=CONFIG["learning_rate"])
        measures.append(train_model(model, train_dl, val_dl, test_dl, CONFIG["criterion"], optimizer, CONFIG["device"], epochs=100))
    for emb_space_size, measure in zip([32, 64, 128, 256, 512], measures):
        print(f"Emb space size: {emb_space_size}, Train loss: {measure[0]}, Val loss: {measure[1]}, ROC AUC: {measure[2]}")
for (measure, emb_space_size) in zip(measures, [32, 64, 128, 256, 512]):
    print({"emb_space_size": emb_space_size, "val_loss": measure})

Epoch[1/100], Train loss:  0.0407
Epoch[1/100], Val loss:  0.0119
0.763491468997087
Epoch[2/100], Train loss:  0.0120
Epoch[2/100], Val loss:  0.0168
0.648635039533916
Epoch[3/100], Train loss:  0.0121
Epoch[3/100], Val loss:  0.0109
0.7445443196004995
Epoch[4/100], Train loss:  0.0111
Epoch[4/100], Val loss:  0.0106
0.7336662505201831
Epoch[5/100], Train loss:  0.0110
Epoch[5/100], Val loss:  0.0100
0.7836745734498545


In [None]:
# take the best one and train it for more epochs
if training:
    emb_space_measures=[32, 64, 128, 256, 512]
    model = Autoencoder(encoding_dim=emb_space_measures[np.argmax([measure[2] for measure in measures])], input_size=input_size)
    model = model.to(CONFIG["device"])

In [None]:
training=True
if training:
    train_model(model, train_dl, val_dl, test_dl, CONFIG["criterion"], optimizer, CONFIG["device"], wandb, CONFIG["epochs"], 50)