In [1]:
import torch
from torch import nn
from torch.optim import Adam
import torchaudio
import torchvision
from torchvision import transforms
from torch.utils.data import Dataset, DataLoader
from skimage.util import img_as_ubyte
import pandas as pd
import os
import glob
import numpy as np
import librosa
import matplotlib.pyplot as plt
import sys
import io
from sklearn.metrics import roc_auc_score

In [2]:
device = torch.device("cuda:0" if torch.cuda.is_available() else "cpu")
device

device(type='cpu')

In [3]:
from google.colab import drive
drive.mount('/content/drive/')

Mounted at /content/drive/


In [4]:
class MimiiDataset(Dataset):
    def __init__(self,audio_dir, n_fft = 1024, win_length = 1024,
                 hop_length = 512,power = 2,n_mels = 128,pad_mode = 'reflect',
                 sr = 16000,center = True,norm = None):
      
        super(MimiiDataset, self).__init__()
        self.audio_dir = audio_dir
        self.n_mels = n_mels
        self.n_fft = n_fft
        self.win_length = win_length
        self.hop_length = hop_length
        self.power = power
        self.pad_mode = pad_mode
        self.sr = sr
        self.center = center
        self.norm = norm

    def get_files(self):
       return self.train_files, self.test_files
    
    def get_data(self,device, id):
        
        self.train_files, self.train_labels = self._train_file_list(device, id)
        self.test_files, self.test_labels = self._test_file_list(device, id)
        
        self.train_data = self.get_audios(self.train_files)
        self.test_data = self.get_audios(self.test_files)
        
        return self.train_data, self.test_data, self.train_labels, self.test_labels
    
    def _train_file_list(self, device, id):
        query = os.path.abspath(
            f"{self.audio_dir}/{device}/train/normal_id_0{id}*.wav"
        )
        train_normal_files = sorted(glob.glob(query))
        train_normal_labels = np.zeros(len(train_normal_files))
        
        query = os.path.abspath(
                f"{self.audio_dir}/{device}/train/anomaly_id_0{id}*.wav"
            )
        train_anomaly_files = sorted(glob.glob(query))
        train_anomaly_labels = np.ones(len(train_anomaly_files))
        
        train_file_list = np.concatenate((train_normal_files, train_anomaly_files), axis=0)
        train_labels = np.concatenate((train_normal_labels, train_anomaly_labels), axis=0)
        
        return train_file_list, train_labels
    
    def _test_file_list(self, device, id):     
        query = os.path.abspath(
            f"{self.audio_dir}/{device}/test/normal_id_0{id}*.wav"
            )
        test_normal_files = sorted(glob.glob(query))
        test_normal_labels = np.zeros(len(test_normal_files))
        
        query = os.path.abspath(
            f"{self.audio_dir}/{device}/test/anomaly_id_0{id}*.wav"
            )
        test_anomaly_files = sorted(glob.glob(query))
        test_anomaly_labels = np.ones(len(test_anomaly_files))
        
        test_file_list = np.concatenate((test_normal_files, 
                                          test_anomaly_files), axis=0)
        test_labels = np.concatenate((test_normal_labels,
                                      test_anomaly_labels), axis=0)
          
        return test_file_list, test_labels

    def normalize(self,tensor):
        tensor_minusmean = tensor - tensor.mean()
        return tensor_minusmean/np.absolute(tensor_minusmean).max()

    def make0min(self,tensornd):
        tensor = tensornd.numpy()
        res = np.where(tensor == 0, 1E-19 , tensor)
        return torch.from_numpy(res)

    def spectrogrameToImage(self,specgram):
        # specgram = torchaudio.transforms.MelSpectrogram(n_fft=1024, win_length=1024, 
        #                                                 hop_length=512, power=2, 
        #                                                 normalized=True, n_mels=128)(waveform )
        specgram= self.make0min(specgram)
        specgram = specgram.log2()[0,:,:].numpy()
        
        tr2image = transforms.Compose([transforms.ToPILImage()])

        specgram= self.normalize(specgram)
        # specgram = img_as_ubyte(specgram)
        specgramImage = tr2image(specgram)
        return specgramImage

    def get_logmelspectrogram(self, waveform):
        melspec = librosa.feature.melspectrogram(
          n_fft=self.n_fft, win_length=self.win_length, hop_length=self.hop_length,
          power=self.power,n_mels=self.n_mels,pad_mode=self.pad_mode,sr=self.sr,
          center=self.center,norm=self.norm,htk=True,
          y = waveform.numpy()
        )

        logmelspec = librosa.power_to_db(melspec)

        return logmelspec

    def get_melspectrogram(self,waveform):
        melspec = librosa.feature.melspectrogram(
            n_fft=self.n_fft, win_length=self.win_length, hop_length=self.hop_length,
            power=self.power,n_mels=self.n_mels,pad_mode=self.pad_mode,sr=self.sr,
            center=self.center,norm=self.norm,htk=True,
            y = waveform.numpy()
        )

        return melspec
    
    def get_mfcc(self,waveform):
        mfcc = librosa.feature.mfcc(    
            n_fft=self.n_fft, win_length=self.win_length, 
            hop_length=self.hop_length,pad_mode=self.pad_mode,sr=self.sr,
            center=self.center,norm=self.norm,n_mfcc=40,
            y = waveform.numpy()
        )

        return mfcc

    def get_chroma_stft(self,waveform):
        stft = librosa.feature.chroma_stft(
            n_fft=self.n_fft, win_length=self.win_length, 
            hop_length=self.hop_length,pad_mode=self.pad_mode,sr=self.sr,
            center=self.center,norm=self.norm,n_chroma=12,
            y=waveform.numpy()
        )

        return stft

    def get_spectral_contrast(self,waveform):
        spec_contrast = librosa.feature.spectral_contrast(    
            n_fft=self.n_fft, win_length=self.win_length,center=self.center,
            hop_length=self.hop_length,pad_mode=self.pad_mode,sr=self.sr,
            y = waveform.numpy()
        )

        return spec_contrast
    
    def get_tonnetz(self,waveform):
        harmonic = librosa.effects.harmonic(waveform.numpy())
        tonnetz = librosa.feature.tonnetz(y=harmonic,sr=self.sr)

        return tonnetz

    def get_audios(self, file_list):
        data = []
        for i in range(len(file_list)):
          y, sr = torchaudio.load(file_list[i])  
          data.append(y)

        return data
    def _derive_data(self, file_list):
        train_data = []
        test_data = []
        train_mode = True
        for file_list in [self.train_files, self.test_files]:
          tr2tensor = transforms.Compose([transforms.PILToTensor()])
          data = []
          for j in range(len(file_list)):
            y, sr = torchaudio.load(file_list[j])  
            spec = self.get_melspectrogram(y)
            spec = self.spectrogrameToImage(spec)
            spec = spec.convert('RGB')
            vectors = tr2tensor(spec)
            if train_mode:     
              train_data.append(vectors)
            else:
              test_data.append(vectors)
            
          train_mode = False
                
        return data

In [5]:
dataset = MimiiDataset('/content/drive/MyDrive/mimii')

In [51]:
y_train, y_test = torch.load('/content/drive/MyDrive/labels/y_tr_fan6.pt'), torch.load('/content/drive/MyDrive/labels/y_ts_fan6.pt')

In [43]:
train_mf, test_mf = torch.load('/content/drive/MyDrive/mixed_features/train_mf_fan6.pt'), torch.load('/content/drive/MyDrive/mixed_features/test_mf_fan6.pt')

In [44]:
train_data = DataLoader(train_mf, batch_size=32, shuffle = True)
test_data = DataLoader(test_mf, batch_size = 32, shuffle= False)

In [25]:
class UNet_FC(nn.Module):

  def __init__(self, in_features):
    super().__init__()
    
    self.bn = nn.BatchNorm1d(128)
    self.relu = nn.ReLU()

    self.fc0 = nn.Linear(in_features=in_features,out_features=in_features)

    # encoder
    self.fc1 = nn.Linear(in_features=in_features, out_features=128)
    self.fc2 = nn.Linear(in_features=128,out_features=128)
    self.fc3 = nn.Linear(in_features=128, out_features=128)
    self.fc4 = nn.Linear(in_features=128, out_features=128)
    self.fc5 = nn.Linear(in_features=128, out_features=8)

    # decoder
    self.fc6 = nn.Linear(in_features=8, out_features=128)
    self.fc7 = nn.Linear(in_features=128*2, out_features=128)
    self.fc8 = nn.Linear(in_features=128*2, out_features=128)
    self.fc9 = nn.Linear(in_features=128*2, out_features=128)

    self.out = nn.Linear(in_features=128*2, out_features=in_features)

  def encoder(self, x):
    input = self.fc0(x)

    x1 = self.relu(self.bn(self.fc1(input)))
    x2 = self.relu(self.bn(self.fc2(x1)))
    x3 = self.relu(self.bn(self.fc3(x2)))
    x4 = self.relu(self.bn(self.fc4(x3)))
    x5 = self.relu(self.fc5(x4))

    return [x5, x4, x3, x2, x1]

  def decoder(self, x):
    x6 = self.relu(self.fc6(x[0]))
    con1 = torch.cat((x6,x[1]), 1) 
    x7 = self.relu(self.bn(self.fc7(con1)))
    con2 = torch.cat((x7,x[2]), 1)
    x8 = self.relu(self.bn(self.fc8(con2)))
    con3 = torch.cat((x8,x[3]), 1)
    x9 = self.relu(self.bn(self.fc9(con3)))
    con4 = torch.cat((x9,x[4]), 1)

    x10 = self.out(con4)

    return x10
  
  def forward(self, x):
    # encoded = self.encoder(x)

    # decoded = self.decoder(encoded)
    input = self.fc0(x)

    x1 = self.relu(self.bn(self.fc1(input)))
    x2 = self.relu(self.bn(self.fc2(x1)))
    x3 = self.relu(self.bn(self.fc3(x2)))
    x4 = self.relu(self.bn(self.fc4(x3)))
    x5 = self.relu(self.fc5(x4))

    xy = [x5, x4, x3, x2, x1]

    x6 = self.relu(self.fc6(xy[0]))
    con1 = torch.cat((x6,xy[1]), 1) 
    x7 = self.relu(self.bn(self.fc7(con1)))
    con2 = torch.cat((x7,xy[2]), 1)
    x8 = self.relu(self.bn(self.fc8(con2)))
    con3 = torch.cat((x8,xy[3]), 1)
    x9 = self.relu(self.bn(self.fc9(con3)))
    con4 = torch.cat((x9,xy[4]), 1)

    x10 = self.out(con4)

    # return decoded
    return x10

In [45]:
def train(model, optimizer, criterion, data_tr, data_val, scheduler = None,
          epochs = 3000, device = 'cpu'):
    # X_val, Y_val = next(iter(data_val))
    losses = []
    prev_avg_loss = 100000
    for epoch in range(epochs):
        train_avg_loss = 0
        test_avg_loss = 0
        # model.train()  # train mode
        for batch in data_tr:
          # data to device
          batch = batch.to(device)
          # set parameter gradients to zero
          optimizer.zero_grad()
          # forward
          # print(Y_batch.shape)
          predictions = model(batch)
          loss = criterion(predictions, batch)
          loss.backward() # backward-pass
          optimizer.step()  # update weights
          # calculate loss to show the user
          if scheduler:
            scheduler.step(loss)
          train_avg_loss += loss / len(data_tr)

        # model.eval()
        for batch in data_val:
          with torch.no_grad():
            preds = model(batch.to(device)).cpu()
            loss = criterion(preds,batch)
            test_avg_loss += loss / len(data_val)
                    
        losses.append(train_avg_loss.item())
        if (epoch+1)%100 == 0:
          print("{}/{} train_loss: {} test_loss:{}".format(epoch+1, epochs, train_avg_loss, test_avg_loss))
        # if test_avg_loss < 70:
        #   break
    return losses

In [48]:
unet = UNet_FC(in_features=193).to(device)
optimizer = Adam(params = unet.parameters(), lr = 10e-3)
# optimizer = Adam(params = unet.parameters())
criterion = nn.MSELoss()
scheduler = torch.optim.lr_scheduler.ReduceLROnPlateau(optimizer, factor=0.5, 
                                                       min_lr=10e-4, mode = 'min',
                                                       patience = 30)

In [49]:
losses= train(model = unet, optimizer = optimizer, criterion=criterion, data_tr=train_data,
               data_val = test_data, scheduler = scheduler, device = device)

100/3000 train_loss: 393.33758544921875 test_loss:866.8572998046875
200/3000 train_loss: 230.05113220214844 test_loss:644.7105712890625
300/3000 train_loss: 154.05084228515625 test_loss:548.1653442382812
400/3000 train_loss: 129.13450622558594 test_loss:497.3250732421875
500/3000 train_loss: 94.10118865966797 test_loss:440.3699035644531
600/3000 train_loss: 83.0447769165039 test_loss:436.1976318359375
700/3000 train_loss: 72.84921264648438 test_loss:426.2709655761719
800/3000 train_loss: 69.89957427978516 test_loss:422.68292236328125
900/3000 train_loss: 56.385101318359375 test_loss:395.4942626953125
1000/3000 train_loss: 53.32418441772461 test_loss:391.0612487792969
1100/3000 train_loss: 56.231163024902344 test_loss:382.98199462890625
1200/3000 train_loss: 47.891075134277344 test_loss:369.5806579589844
1300/3000 train_loss: 46.3026008605957 test_loss:392.6584777832031
1400/3000 train_loss: 41.783287048339844 test_loss:379.3235168457031
1500/3000 train_loss: 45.735923767089844 test_los

In [52]:
avg_loss = 0
# unet.eval()
# unet.train()
preds = []
i = 0
test_anomaly_losses = []
test_normal_losses = []
test_losses = []
# test_real = y_test_fan0.tolist()
# y_test_fan0 = y_test_fan0.tolist()
for batch in test_data:
  with torch.no_grad():
    # unet.train()
    predictions = unet(batch.to(device)).cpu()
    preds.append(predictions)
  loss = criterion(predictions, batch.cpu())
  for j in range(len(predictions)):
    if int(y_test[i]) == 1:
      test_anomaly_losses.append(float(criterion(predictions[j], batch[j])))
    else:
      test_normal_losses.append(float(criterion(predictions[j], batch[j])))
    i += 1
    test_losses.append(criterion(predictions[j], batch[j]))
  # print(loss)
  # print(loss)
  avg_loss += loss / len(test_data)
# avg_loss

avg_loss

tensor(355.1450)

In [53]:
sum(test_anomaly_losses)/len(test_anomaly_losses) , sum(test_normal_losses)/len(test_normal_losses)

(429.3116823867417, 39.278732738494874)

In [54]:
# torch.save(unet, "unet_fan2_2")

In [55]:
results = []
vals = np.arange(50, 650, 0.1).tolist()
for threshold in vals:
  preds = []
  for j in range(len(test_losses)):
    if test_losses[j] > threshold:
      preds.append(1)
    else:
      preds.append(0)
  
  results.append(roc_auc_score(y_test,preds))

print(max(results))

0.9794598337950139
