In [1]:
import torch
from torch import nn
from torch.optim import Adam
import torchaudio
import torchvision
from torchvision import transforms
from torch.utils.data import Dataset, DataLoader
from skimage.util import img_as_ubyte
import pandas as pd
import os
import glob
import numpy as np
import librosa
import matplotlib.pyplot as plt
import sys
import io
from sklearn.metrics import roc_auc_score

In [3]:
from google.colab import drive
drive.mount('/content/drive/')

Mounted at /content/drive/


# This is an example of generating mixed features for one machine

In [4]:
class MimiiDataset(Dataset):
    def __init__(self,audio_dir, n_fft = 1024, win_length = 1024,
                 hop_length = 512,power = 2,n_mels = 128,pad_mode = 'reflect',
                 sr = 16000,center = True,norm = None):
      
        super(MimiiDataset, self).__init__()
        self.audio_dir = audio_dir
        self.n_mels = n_mels
        self.n_fft = n_fft
        self.win_length = win_length
        self.hop_length = hop_length
        self.power = power
        self.pad_mode = pad_mode
        self.sr = sr
        self.center = center
        self.norm = norm

    def get_files(self):
       return self.train_files, self.test_files
    
    def get_data(self,device, id):
        
        self.train_files, self.train_labels = self._train_file_list(device, id)
        self.test_files, self.test_labels = self._test_file_list(device, id)
        
        self.train_data = self.get_audios(self.train_files)
        self.test_data = self.get_audios(self.test_files)
        
        return self.train_data, self.test_data, self.train_labels, self.test_labels
    
    def _train_file_list(self, device, id):
        query = os.path.abspath(
            f"{self.audio_dir}/{device}/train/normal_id_0{id}*.wav"
        )
        train_normal_files = sorted(glob.glob(query))
        train_normal_labels = np.zeros(len(train_normal_files))
        
        query = os.path.abspath(
                f"{self.audio_dir}/{device}/train/anomaly_id_0{id}*.wav"
            )
        train_anomaly_files = sorted(glob.glob(query))
        train_anomaly_labels = np.ones(len(train_anomaly_files))
        
        train_file_list = np.concatenate((train_normal_files, train_anomaly_files), axis=0)
        train_labels = np.concatenate((train_normal_labels, train_anomaly_labels), axis=0)
        
        return train_file_list, train_labels
    
    def _test_file_list(self, device, id):     
        query = os.path.abspath(
            f"{self.audio_dir}/{device}/test/normal_id_0{id}*.wav"
            )
        test_normal_files = sorted(glob.glob(query))
        test_normal_labels = np.zeros(len(test_normal_files))
        
        query = os.path.abspath(
            f"{self.audio_dir}/{device}/test/anomaly_id_0{id}*.wav"
            )
        test_anomaly_files = sorted(glob.glob(query))
        test_anomaly_labels = np.ones(len(test_anomaly_files))
        
        test_file_list = np.concatenate((test_normal_files, 
                                          test_anomaly_files), axis=0)
        test_labels = np.concatenate((test_normal_labels,
                                      test_anomaly_labels), axis=0)
          
        return test_file_list, test_labels

    def normalize(self,tensor):
        tensor_minusmean = tensor - tensor.mean()
        return tensor_minusmean/np.absolute(tensor_minusmean).max()

    def make0min(self,tensornd):
        tensor = tensornd.numpy()
        res = np.where(tensor == 0, 1E-19 , tensor)
        return torch.from_numpy(res)

    def spectrogrameToImage(self,specgram):
        # specgram = torchaudio.transforms.MelSpectrogram(n_fft=1024, win_length=1024, 
        #                                                 hop_length=512, power=2, 
        #                                                 normalized=True, n_mels=128)(waveform )
        specgram= self.make0min(specgram)
        specgram = specgram.log2()[0,:,:].numpy()
        
        tr2image = transforms.Compose([transforms.ToPILImage()])

        specgram= self.normalize(specgram)
        # specgram = img_as_ubyte(specgram)
        specgramImage = tr2image(specgram)
        return specgramImage

    def get_logmelspectrogram(self, waveform):
        melspec = librosa.feature.melspectrogram(
          n_fft=self.n_fft, win_length=self.win_length, hop_length=self.hop_length,
          power=self.power,n_mels=self.n_mels,pad_mode=self.pad_mode,sr=self.sr,
          center=self.center,norm=self.norm,htk=True,
          y = waveform.numpy()
        )

        logmelspec = librosa.power_to_db(melspec)

        return logmelspec

    def get_melspectrogram(self,waveform):
        melspec = librosa.feature.melspectrogram(
            n_fft=self.n_fft, win_length=self.win_length, hop_length=self.hop_length,
            power=self.power,n_mels=self.n_mels,pad_mode=self.pad_mode,sr=self.sr,
            center=self.center,norm=self.norm,htk=True,
            y = waveform.numpy()
        )

        return melspec
    
    def get_mfcc(self,waveform):
        mfcc = librosa.feature.mfcc(    
            n_fft=self.n_fft, win_length=self.win_length, 
            hop_length=self.hop_length,pad_mode=self.pad_mode,sr=self.sr,
            center=self.center,norm=self.norm,n_mfcc=40,
            y = waveform.numpy()
        )

        return mfcc

    def get_chroma_stft(self,waveform):
        stft = librosa.feature.chroma_stft(
            n_fft=self.n_fft, win_length=self.win_length, 
            hop_length=self.hop_length,pad_mode=self.pad_mode,sr=self.sr,
            center=self.center,norm=self.norm,n_chroma=12,
            y=waveform.numpy()
        )

        return stft

    def get_spectral_contrast(self,waveform):
        spec_contrast = librosa.feature.spectral_contrast(    
            n_fft=self.n_fft, win_length=self.win_length,center=self.center,
            hop_length=self.hop_length,pad_mode=self.pad_mode,sr=self.sr,
            y = waveform.numpy()
        )

        return spec_contrast
    
    def get_tonnetz(self,waveform):
        harmonic = librosa.effects.harmonic(waveform.numpy())
        tonnetz = librosa.feature.tonnetz(y=harmonic,sr=self.sr)

        return tonnetz

    def get_audios(self, file_list):
        data = []
        for i in range(len(file_list)):
          y, sr = torchaudio.load(file_list[i])  
          data.append(y)

        return data
    def _derive_data(self, file_list):
        train_data = []
        test_data = []
        train_mode = True
        for file_list in [self.train_files, self.test_files]:
          tr2tensor = transforms.Compose([transforms.PILToTensor()])
          data = []
          for j in range(len(file_list)):
            y, sr = torchaudio.load(file_list[j])  
            spec = self.get_melspectrogram(y)
            spec = self.spectrogrameToImage(spec)
            spec = spec.convert('RGB')
            vectors = tr2tensor(spec)
            if train_mode:     
              train_data.append(vectors)
            else:
              test_data.append(vectors)
            
          train_mode = False
                
        return data

In [6]:
dataset = MimiiDataset('/content/drive/MyDrive/mimii')

In [7]:
def mean_mfccs(wave_list):
  data = []
  for wave in wave_list:
    mfcc = np.mean(dataset.get_mfcc(wave)[0], axis = 1)
    data.append(mfcc)
  
  return data

def mean_stfts(wave_list):
  data = []
  for wave in wave_list:
    stft = np.mean(dataset.get_chroma_stft(wave)[0], axis = 1)
    data.append(stft)
  
  return data

def mean_melspecs(wave_list):
  data = []
  for wave in wave_list:
    melspec = np.mean(dataset.get_melspectrogram(wave)[0], axis = 1)
    data.append(melspec)
  
  return data

def mean_spec_contrasts(wave_list):
  data = []
  for wave in wave_list:
    spec_contrast = np.mean(dataset.get_spectral_contrast(wave)[0], axis = 1)
    data.append(spec_contrast)
  
  return data
  
def mean_tonnetzs(wave_list):
  data = []
  for wave in wave_list:
    tonnetz = np.mean(dataset.get_tonnetz(wave)[0], axis = 1)
    data.append(tonnetz)
  
  return data

In [8]:
df_train1, df_test1, y_train1, y_test1 = dataset.get_data('ToyConveyor', 1)

In [9]:
train_melspecs, test_melspecs = mean_melspecs(df_train1), mean_melspecs(df_test1)
train_mfccs, test_mfccs = mean_mfccs(df_train1), mean_mfccs(df_test1)
train_stfts, test_stfts = mean_stfts(df_train1), mean_stfts(df_test1)
train_spec_contrasts, test_spec_contrasts = mean_spec_contrasts(df_train1), mean_spec_contrasts(df_test1)
train_tonnetzs, test_tonnetzs = mean_tonnetzs(df_train1), mean_tonnetzs(df_test1)

In [12]:
def make_mixed_f(melspecs,mfccs,stfts,spec_contrasts,tonnetzs):
  mixed_f = []

  for i in range(len(melspecs)):

    mf = np.concatenate((melspecs[i],mfccs[i],stfts[i],
                          spec_contrasts[i],tonnetzs[i])).tolist()

    mixed_f.append(mf)


  mixed_f = torch.FloatTensor(mixed_f)
  return mixed_f

In [13]:
train_mixed_f = make_mixed_f(train_melspecs,train_mfccs,train_stfts,train_spec_contrasts,train_tonnetzs)
test_mixed_f = make_mixed_f(test_melspecs,test_mfccs,test_stfts,test_spec_contrasts,test_tonnetzs)

In [14]:
torch.save(train_mixed_f, '/content/drive/MyDrive/mixed_features/y_tr_toyconveyor1.pt')
torch.save(test_mixed_f, '/content/drive/MyDrive/mixed_features/y_ts_toyconveyor1.pt')