# Dataset MIMII

Read a good description of the dataset here:

https://github.com/BA-HanseML/NF_Prj_MIMII_Dataset/blob/master/doc/about_the_dataset.md 


A showroom of the recordings is available here:
https://ba-hanseml.github.io/MIMII_show_room/showroom.html

https://github.com/BA-HanseML/NF_Prj_MIMII_Dataset/blob/master/NF_Prj_MIMII_presentation_short.pdf

## Machine parts
There are 4 machine parts and their audio recordings. Each machine part has a normal and abnormal recording.

- pump
- valve
- rail slider
- fan

## Dataset Structure
https://github.com/BA-HanseML/NF_Prj_MIMII_Dataset/blob/master/dataset/dataset_struct.md 


In [1]:
import os, sys
import glob
import torch
import torchvision.transforms as T
from torch.utils.data import Dataset, DataLoader
import librosa
import PIL as Image
import numpy as np
import matplotlib.pyplot as plt

# General config

In [2]:
#Please edit as needed. This is the path to pngs
base_data_path = "./data/wav_data/"

Anudeep some thoughts on the dataloader.
- If we consider a supervised method. The dataloader should only read the normal data and store [spectrogram, label]. label can be [0, 1, 2, 3] (pump, valve, fan, slider )
- If we consider a 1-class unsupervised method. We should train only on normal data of a particular class. ie
class MIMII(Dataset):
    def __init__(self, data_paths, machine).
and the label can be [1, 0]. ie normal or abnormal 


In [3]:
from tqdm import trange
from collections import defaultdict

class MIMII(Dataset):
    def __init__(self, base_path, machine):
        
        # Parameters for conversion to MEL spectrogram 
        self.n_mels = 64
        self.frames = 5
        self.n_fft = 2048
        self.hop_length = 512
        self.power = 2.0
        self.base_path = base_path
        assert type(machine) == list
        assert len(machine) > 1
        
        
        
        
        
        
               
            
            
    def __getitem__(self, index):
        
        # return and indexed item from the list
        # NOTE: There are 8 spectrograms per audio, one for each microphone
        return torch.from_numpy(self.spectrograms[index]), torch.from_numpy(np.array([self.labels[index]])),\
    torch.from_numpy(self.sampling_rates[index])
    
        
    def __len__(self):
        
        # number of samples loaded
        return len(self.)
    
        
    def convert_to_spectrogram(self, wav_file_path):
        signal, sampling_rate = self.load_sound_file(wav_file_path)
        
#         ## Perform fourier transform
#         stft = librosa.stft(signal, n_fft=self.n_fft, hop_length=self.hop_length)
#         # Map the magnitude to a decibel scale:
#         dB = librosa.amplitude_to_db(np.abs(stft), ref=np.max)
        
        ## Mel spectrogram calculation
        images = []
        for channel in range(signal.shape[0]):
            mel = librosa.feature.melspectrogram(signal[channel], sr=sampling_rate, n_fft=self.n_fft, hop_length=self.hop_length,\
                                             n_mels=self.n_mels)
            db_mel = librosa.power_to_db(mel, ref=np.max)
            img = scale_minmax(db_mel, 0, 255).astype(np.uint8)
            img = np.flip(img, axis=0)
            img = 255 - img
            img = Image.fromarray(img)
            images.append(img)
        
        return np.array(images), np.array(sampling_rate)
        
    
    def load_sound_file(self, wav_name, mono=False, channel=0):
        multi_channel_data, sampling_rate = librosa.load(wav_name, sr=None, mono=mono)
        signal = np.array(multi_channel_data)
    
        return signal, sampling_rate
    
    
    def scale_minmax(self, X, _min=0.0, _max=1.0):
        """
        Minmax scaler for a numpy array

        PARAMS
        ======
            X (numpy array) - array to scale
            min (float) - minimum value of the scaling range (default: 0.0)
            max (float) - maximum value of the scaling range (default: 1.0)
        """
        X_std = (X - X.min()) / (X.max() - X.min())
        X_scaled = X_std * (_max - _min) + _min
        
        return X_scaled
    
    transform = T.Compose([T.ToPILImage(), T.ToTensor()])
        

In [4]:
dataset = MIMII(base_data_path, snr="6_dB", machine=['fan', 'pump', 'slider', 'valve'])

label_map: {'fan': 0, 'pump': 1, 'slider': 2, 'valve': 3}


In [5]:
dataloader = DataLoader(dataset=dataset, batch_size=10, shuffle=True, num_workers=2)
iterable = iter(dataloader)