In [1]:
%reset -f
import torch
import numpy as np
import pandas as pd
import torch.nn as nn
import torch.optim as optim
from sklearn.metrics import roc_curve, auc
from torch.utils.data import random_split, DataLoader
from sklearn.model_selection import train_test_split

from customDatasets.audioDataset import AudioDataset
from torchviz import make_dot
import hiddenlayer as hl

In [2]:
# free gpu
if torch.cuda.is_available():
    torch.cuda.empty_cache()

In [None]:
def set_seed(seed = 42):
    np.random.seed(seed)
    torch.manual_seed(seed)
    torch.cuda.manual_seed(seed)

CONFIG = {
    "seed": 42,
    "epochs": 20,
    "num_classes": 2,
    "learning_rate": 0.01,
    "train_batch_size": 32,
    "val_batch_size": 32,
    "test_batch_size": 128,
    "criterion": nn.MSELoss(),
    "noise": torch.randn_like,
    "device":
        torch.device(
            "cuda:0" if torch.cuda.is_available()
            else "mps" if torch.backends.mps.is_available()
            else "cpu"
        )
}

print(CONFIG["device"])
set_seed(CONFIG['seed'])

data_path = "./data/train/"
data_path_test = "./data/test/"


meta_train_df = pd.read_csv("./data/train.csv")
meta_test_df = pd.read_csv("./data/test.csv")

train_df = meta_train_df[['filename', 'is_normal', 'machine_id']]
range_train, range_test = train_test_split(range(len(train_df)), test_size=0.2, train_size=0.8, random_state=None, shuffle=True, stratify=meta_train_df['machine_id'])
val_df = train_df.iloc[range_test].reset_index(drop=True)
train_df = train_df.iloc[range_train].reset_index(drop=True)
train_dataset = AudioDataset(train_df, data_path, sgram_type="mel", augment=True, split_sgram=True, in_memory=True)
val_dataset = AudioDataset(val_df, data_path, sgram_type="mel", augment=False, test_mode=True, in_memory=True)
test_df = meta_test_df[['filename', 'is_normal', 'machine_id']]
test_dataset = AudioDataset(test_df, data_path_test, sgram_type="mel", augment=False, test_mode=True, in_memory=True)

train_ds = train_dataset
val_ds = val_dataset
test_ds = test_dataset

train_dl = DataLoader(train_ds, batch_size=CONFIG['train_batch_size'], shuffle=True)
val_dl = DataLoader(val_ds, batch_size=CONFIG['val_batch_size'], shuffle=False)
test_dl = DataLoader(test_ds, batch_size=CONFIG["test_batch_size"], shuffle=False)
input_size = next(iter(train_dl))[0].shape[1] * next(iter(train_dl))[0].shape[2] * next(iter(train_dl))[0].shape[3]
print(input_size)

cuda:0


In [None]:
# compute metrics
inputs_cat=[]
for inputs, labels in train_dl:
    inputs_cat.append(inputs)
inputs_cat = torch.cat(inputs_cat,dim=0)
print(inputs_cat.shape)

In [None]:
# compute the mean and std value for each frequency of the batch_sizexchannelxtimexfrequecy
mean = torch.mean(inputs_cat, dim=0)
std = torch.std(inputs_cat, dim=0)
print(mean.shape)
print(std.shape)
train_dataset.mean = mean
train_dataset.std = std
val_dataset.mean = mean
val_dataset.std = std
test_dataset.mean = mean
test_dataset.std = std

In [None]:
torch.cuda.empty_cache()
from diffusers import UNet2DModel
print("torch.cuda.memory_allocated: %fGB"%(torch.cuda.memory_allocated(0)/1024/1024/1024))
model = UNet2DModel(in_channels=1,
                    out_channels=1,
                    sample_size=(mean.shape[1],mean.shape[2]), # the target image resolution
                    block_out_channels=(128,128,256,256,512,512), # the numbe of output channels for eaxh UNet block
                    down_block_types=(
                        "DownBlock2D", # a regular ResNet downsampling block
                        "DownBlock2D",
                        "DownBlock2D",
                        "DownBlock2D",
                        "AttnDownBlock2D", # a ResNet downsampling block with spatial self-attention
                        "DownBlock2D",
                    ),
                    up_block_types=(
                        "UpBlock2D", # a regular ResNet upsampling block
                        "AttnUpBlock2D", # a ResNet upsampling block with spatial self-attention
                        "UpBlock2D",
                        "UpBlock2D",
                        "UpBlock2D",
                        "UpBlock2D",
                    ),
                   )
model.to(CONFIG['device'])
print("torch.cuda.memory_allocated: %fGB"%(torch.cuda.memory_allocated(0)/1024/1024/1024))

In [None]:
optimizer = optim.Adam(model.parameters(), lr=0.0001)

In [None]:
# training in a diffusion way, by adding gaussian noise to the input data and then doing the mse between the reconstructed data and the original data
for epoch in range(CONFIG['epochs']):
    lr_scheduler=torch.optim.lr_scheduler.StepLR(optimizer, step_size=4, gamma=0.1)
    model.train()
    for i, (x, y) in enumerate(train_dl):
        # add gaussian noise with time t
        # 32, 1 ,32 ,128
        t=torch.rand(x.shape[0])
        noisy_x = x+torch.cat([(t[i].unsqueeze(0) * CONFIG['noise'](x[0])).view(1,x[0].shape[0],x[0].shape[1],x[0].shape[2]) for i in range(len(t))], dim=0)
        # 32,1,32,128
        noisy_x=noisy_x.to(CONFIG['device'])
        t=t.to(CONFIG['device'])
        x=x.to(CONFIG['device'])
        denoised_x=model(noisy_x,t)
        loss=CONFIG['criterion'](denoised_x.sample, x)
        optimizer.zero_grad()
        loss.backward()
        optimizer.step()
        # do a 
        if i % 10 == 0:
            print(f"Epoch {epoch}, iter {i}, loss: {loss.item()}")
    full_scores = []
    full_labels = []
    for inputs, labels in test_dl:
        inputs = inputs.view(inputs.shape[0],inputs.shape[1],1,inputs.shape[2],inputs.shape[3])
        inputs, labels = inputs.to(CONFIG["device"]), labels.to(CONFIG["device"])
        model.eval()
        # 128, 10, 1, 32, 128
        with torch.no_grad():
            tmp_scores = []
            for idx in range (10):
                t=torch.rand(inputs.shape[0])
                t=t.to(CONFIG['device'])
                # 1,10
                inputs_noised=inputs+torch.cat([(t[i].unsqueeze(0)*CONFIG['noise'](inputs[0])).view(1,*(inputs[0].shape)) for i in range(len(t))])
                inputs_noised=inputs_noised.to(CONFIG['device'])
                outputs = model(inputs_noised[:, idx, :, :, :],t)
                mse = torch.sum((outputs.sample.view(outputs.sample.shape[0],-1) - inputs[:, idx, :, :, :].view(inputs.size(0), -1)) ** 2, dim=1, keepdim=True) / outputs.sample.shape[1]
                tmp_scores.append(mse)
            scores = torch.cat(tmp_scores, dim=1)
            scores = torch.max(scores, dim=1).values
            full_scores.append(scores)
            full_labels.append(labels)
        
    full_labels = torch.cat([label for label in full_labels])
    full_scores = torch.cat([score for score in full_scores])
    fpr, tpr, _ = roc_curve(full_labels.cpu().detach(), full_scores.cpu().detach(), pos_label=0)
    roc_auc = auc(fpr, tpr)
    print(roc_auc)
        