In [3]:
import os, sys
import glob
import torch
import torchvision.transforms as T
from torch.utils.data import Dataset, DataLoader
import librosa
import numpy as np
import matplotlib.pyplot as plt
from tqdm import tqdm
from PIL import Image

In [79]:
def convert_to_spectrogram(wav_file_path):
    signal, sampling_rate = load_sound_file(wav_file_path)

    #Mel spectrogram calculation
    db_mels = []
    for channel in range(signal.shape[0]):
        mel = librosa.feature.melspectrogram(signal[channel], sr=sampling_rate, n_fft=self.n_fft, hop_length=self.hop_length,\
                                         n_mels=self.n_mels)
        db_mel = librosa.power_to_db(mel, ref=np.max)
        db_mels.append(db_mel)

    return np.array(db_mels), np.array(sampling_rate)
        
    
def load_sound_file(wav_name, mono=False, channel=0):
    multi_channel_data, sampling_rate = librosa.load(wav_name, sr=None, mono=mono)
    signal = np.array(multi_channel_data)

    return signal, sampling_rate

def scale_minmax(X, min=0.0, max=1.0):
    """
    Minmax scaler for a numpy array
    
    PARAMS
    ======
        X (numpy array) - array to scale
        min (float) - minimum value of the scaling range (default: 0.0)
        max (float) - maximum value of the scaling range (default: 1.0)
    """
    X_std = (X - X.min()) / (X.max() - X.min())
    X_scaled = X_std * (max - min) + min
    return X_scaled

def generate_spectrograms(list_files, output_dir, n_mels=64, n_fft=2048, hop_length=512):
    """
    Generate spectrograms pictures from a list of WAV files. Each sound
    file in WAV format is processed to generate a spectrogram that will 
    be saved as a PNG file.
    
    PARAMS
    ======
        list_files (list) - list of WAV files to process
        output_dir (string) - root directory to save the spectrogram to
        n_mels (integer) - number of Mel buckets (default: 64)
        n_fft (integer) - length of the windowed signal to compute the short Fourier transform on
        hop_length (integer) - window increment when computing STFT
        
    RETURNS
    =======
        files (list) - list of spectrogram files (PNG format)
    """
    files = []
    
    # Loops through all files:
    for index in tqdm(range(len(list_files)), desc=f'Building spectrograms for {output_dir}'):
        
        # Building file name for the spectrogram PNG picture:
        file = list_files[index]
        path_components = file.split('/')
        
        # machine_id = id_00, id_02...
        # sound_type = normal or abnormal
        # wav_file is the name of the original sound file without the .wav extension
        machine_id, sound_type = path_components[-3], path_components[-2]
        machine = path_components[-5]
        wav_file = path_components[-1].split('.')[0]
        filename =  machine + '-' + sound_type + '-' + machine_id + '-' + wav_file + '.png'
        
        # Example: train/normal/normal-id_02-00000259.png:
        filename = os.path.join(output_dir, sound_type, filename)
        #print(filename)
        if not os.path.exists(filename):
            FRAME_SIZE = 2048
            HOP_SIZE = 512
            
            # Loading sound file and generate Mel spectrogram:
            signal, sr = librosa.load(file)#load_sound_file(file)
            #print(signal.shape)
            
            spec = librosa.stft(signal, n_fft=FRAME_SIZE, hop_length=HOP_SIZE)
            spec = spec.astype(np.float16)
            #print(spec.dtype)
            #print("Memory size of numpy array in bytes:",spec.size * spec.itemsize)
            np.savez_compressed(filename[:-4] + "-ch" + str(0), spec)
            #print(spec.shape, sr)
            
#             for ctr in range(signal.shape[0]):
#                 mels = librosa.feature.melspectrogram(y=signal[ctr], sr=sr, n_mels=n_mels, n_fft=n_fft, hop_length=hop_length)
#                 mels = librosa.power_to_db(mels, ref=np.max)

#                 # Preprocess the image: min-max, putting 
#                 # low frequency at bottom and inverting to 
#                 # match higher energy with black pixels:
#                 img = scale_minmax(mels, 0, 255).astype(np.uint8)
#                 img = np.flip(img, axis=0)
#                 img = 255 - img
#                 img = Image.fromarray(img)

#                 # Saving the picture generated to disk:
#                 img.save(filename[:-4] + "-ch" + str(ctr+1) + ".png" )

#         files.append(filename)
        
    return files

In [74]:
#Please edit as needed. This path should have the following directory structure
#    base_data_path -> {0,6,neg6}_dB_{fan,pump,valve,slider}/{fan,pump,valve,slider}/*.wav
base_data_path = "/home/ubuntu/anudeep/machine_sound/"

def get_data_path(SNR, machine, ID):
    return glob.glob(base_data_path + \
                     "{}_{}/*/id_{}/*/*".format(SNR, machine, ID))

# Direct paths to audio files
paths = get_data_path('0_dB', 'fan', '00')
paths = paths[0:2]
paths.sort()

In [81]:
paths = glob.glob(base_data_path + "*/*/*/*/*")

In [None]:
a = generate_spectrograms(paths, "/home/ubuntu/anudeep/machine_sound/image_data_2/")

  spec = spec.astype(np.float16)
Building spectrograms for /home/ubuntu/anudeep/machine_sound/image_data_2/:   2%|▏         | 1148/54057 [05:09<3:50:25,  3.83it/s]

In [None]:
paths = glob.glob(base_data_path + "*/*/*/*/*")
len(paths)

In [None]:
paths = glob.glob(base_data_path + "*/*/*/*/*")
len(paths)

In [8]:
imgs = glob.glob('/home/ubuntu/anudeep/machine_sound/image_data/normal/*')

In [10]:
imgs.sort()

In [14]:
unique_dBs = []

for k in imgs:
    _t = k.split('/')[-1].split('-')[0]
    if _t not in unique_dBs:
        unique_dBs.append(_t)

In [15]:
unique_dBs

['0_dB_fan',
 '0_dB_pump',
 '0_dB_slider',
 '0_dB_valve',
 '6_dB_fan',
 '6_dB_pump',
 '6_dB_slider',
 '6_dB_valve',
 'neg6_dB_fan',
 'neg6_dB_pump',
 'neg6_dB_slider',
 'neg6_dB_valve']

In [65]:
a = np.load('./abnormal/0_dB_fan-abnormal-id_00-00000030-ch0.npy')

In [67]:
a.shape, a.dtype

((), dtype('float32'))