In [1]:
# =====================================================================
# Import
# =====================================================================

# Import internal modules
import os.path
from typing import List, Set, Dict, TypedDict, Tuple, Optional

# Import 3rd party modules
import pandas as pd
import numpy as np
import librosa
import librosa.display
import matplotlib.pyplot as plt

In [2]:
def get_sound_files(folder: str) -> pd.DataFrame:
    """
    Function to get all sound files within a folder.
    Param: `folder` name includes background noise level and machine type (e.g. -6_db_fan)    
    Return: a DataFrame  
    """
    # Create empty lists
    sound_list: List[str] = []  # will contain all sound filenames
    sound_path_list: List[str] = []  # will contain the sound's filepaths
    noise_db_list: List[int] = [] # will contain the background noise level in db of each sound
    machine_type_list: List[str] = [] # will contain the machine type of each sound
    model_id_list: List[str] = []  # will contain the machine product id of each sound
    target_list: List[str] = [] # will contain the target value (normal=0 or anormal=1) for each sound

    # Get the relative path of the directory that contains all the sound files
    folder_path = os.path.join("assets", "sounds", folder)

    # Get all the filenames within the directory
    for path, dirs, files in os.walk(folder_path):
        for filename in files:
            # Search only filenames with the extension ".wav"
            if filename.lower().endswith(".wav"):

                # Get the filename
                sound_list.append(filename)

                # Get the filepath
                sound_path = os.path.join(path, filename)
                sound_path_list.append(sound_path)
                
                # Split filepath to retrieve the information
                path_splitted = sound_path.split("/")
                
                # Get the background noise in db
                noise_db = int(path_splitted[2].split("_")[0])
                noise_db_list.append(noise_db)
                
                # Get the machine type
                machine_type = path_splitted[2].split("_")[2]
                machine_type_list.append(machine_type)
                
                # Get the model id
                model_id = path_splitted[3].split("_")[1]
                model_id_list.append(model_id)
                
                # Get target variable (normal or anormal)
                target = path_splitted[4]
                target_list.append(target)   
    
    # Create list with the data
    data: List[float, int, str] = list(zip(noise_db_list, machine_type_list, model_id_list, sound_list, sound_path_list, target_list))
    
    # Create list with column names
    cols: List[str] = ["noise_db", "machine_type", "model_id", "sound", "sound_path", "target"]
    
    # Return a DataFrame from the lists
    return pd.DataFrame(data=data, columns=cols)

def get_all_sounds(db_list: List[int], machine_type_list: List[str]) -> pd.DataFrame:
    """
    Function to get all sound files for specified lists of background noise and machine type.
    Param: * `db_list` is a list of background noise level (i.e. -6, 0, 6)
           * `machine_type_list` is a list of machine type (i.e. fan, pump, valve, slider) 
    Return: a DataFrame  
    """
    df_list = []
    for db in db_list:
        for machine_type in machine_type_list:
            df = get_sound_files(f"{db}_db_{machine_type}")
            df_list.append(df)
            
    return pd.concat(df_list, axis=0)

In [3]:
def get_audio_features(sound_path: str) -> List[float]:
    """
    Function to extract audio features from the sound file
    Param: `sound_path` is the filepath of the sound
    Return: a list of audio features aggregated by their average values
    """
    # Read audio file
    y, sr = librosa.load(sound_path)
    
    # Get spectogram
    spect: np.ndarray = np.abs(librosa.stft(y, hop_length=512))
    spect_mean: np.float = np.mean(spect)
    
    # Get mel spectogram
    mel_spect: np.ndarray = librosa.feature.melspectrogram(y=y, sr=sr, n_fft=2048, hop_length=1024)
    mel_spect_mean: np.float = np.mean(mel_spect)

    # Get chromagram
    chroma: np.ndarray = librosa.feature.chroma_stft(S=spect, sr=sr)
    chroma_mean: np.float = np.mean(chroma)

    # Get constant-Q chromagram
    chroma_cq: np.ndarray = librosa.feature.chroma_cqt(y=y, sr=sr)
    chroma_cq_mean: np.float = np.mean(chroma_cq)
    
    # Get chromagram cens
    chroma_cens: np.ndarray = librosa.feature.chroma_cens(y=y, sr=sr)
    chroma_cens_mean: np.float = np.mean(chroma_cens)
    
    # Get mfcc
    mfcc: np.ndarray = librosa.feature.mfcc(y=y, sr=sr, n_mfcc=40)
    mfcc_mean: np.float = np.mean(mfcc)

    # Get rms
    S: np.ndarray
    phase: np.ndarray
    S, phase = librosa.magphase(librosa.stft(y))
    rms: np.ndarray = librosa.feature.rms(S=S)
    rms_mean: np.float = np.mean(rms)
    
    # Get spectral centroid
    cent: np.ndarray = librosa.feature.spectral_centroid(y=y, sr=sr)
    cent_mean: np.float = np.mean(cent)

    # Get spectral bandwidth
    spec_bw: np.ndarray = librosa.feature.spectral_bandwidth(y=y, sr=sr)
    spec_bw_mean: np.float = np.mean(spec_bw)

    # Get spectral contrast
    contrast: np.ndarray = librosa.feature.spectral_contrast(S=S, sr=sr)
    contrast_mean: np.float = np.mean(contrast)

    # Get spectral flatness
    flatness: np.ndarray = librosa.feature.spectral_flatness(y=y)
    flatness_mean: np.float = np.mean(flatness)
    
    # Get roll-off frequency
    roll_off: np.ndarray = librosa.feature.spectral_rolloff(y=y, sr=sr, roll_percent=0.95)
    roll_off_mean: np.float = np.mean(roll_off)

    # tonal centroid features (tonnetz)
    tonnetz: np.ndarray = librosa.feature.tonnetz(y=y, sr=sr)
    tonnetz_mean: np.float = np.mean(tonnetz)

    # zero-crossing rate
    zero_crossing_rate: np.ndarray = librosa.feature.zero_crossing_rate(y)
    zero_crossing_rate_mean: np.float = np.mean(zero_crossing_rate)
    
    # Return a list of audio features aggregated by their average values
    return [spect_mean, mel_spect_mean,
            chroma_mean, chroma_cq_mean, chroma_cens_mean,
            mfcc_mean, rms_mean, cent_mean, 
            spec_bw_mean, contrast_mean, flatness_mean,
            roll_off_mean, tonnetz_mean, zero_crossing_rate_mean]

In [4]:
df = get_all_sounds([-6], ["slider"]).iloc[:3]
df.head()

Unnamed: 0,noise_db,machine_type,model_id,sound,sound_path,target
0,-6,slider,0,00000059.wav,assets/sounds/-6_db_slider/id_00/abnormal/0000...,abnormal
1,-6,slider,0,00000071.wav,assets/sounds/-6_db_slider/id_00/abnormal/0000...,abnormal
2,-6,slider,0,00000065.wav,assets/sounds/-6_db_slider/id_00/abnormal/0000...,abnormal


In [5]:
# Create list with column names
audio_features_cols: List[str] = ["spectogram", "mel_spectogram",
                   "chromagram", "chromagram_cq", "chromagram_cens",
                   "mfcc", "rms", "spectral_centroid", 
                   "spectral_bandwidth", "spectral_contrast", "spectral_flatness",
                   "roll_off_frequency", "tonnetz", "zero_crossing_rate"]

df[audio_features_cols] = df[["sound_path"]].apply(lambda x: pd.Series(get_audio_features(x.sound_path)), axis=1)

In [6]:
df.head()

Unnamed: 0,noise_db,machine_type,model_id,sound,sound_path,target,spectogram,mel_spectogram,chromagram,chromagram_cq,chromagram_cens,mfcc,rms,spectral_centroid,spectral_bandwidth,spectral_contrast,spectral_flatness,roll_off_frequency,tonnetz,zero_crossing_rate
0,-6,slider,0,00000059.wav,assets/sounds/-6_db_slider/id_00/abnormal/0000...,abnormal,0.103656,0.028626,0.508862,0.514267,0.264862,-5.768414,0.006748,1746.776097,1955.708303,24.889785,0.000356,6312.725788,-0.014067,0.077769
1,-6,slider,0,00000071.wav,assets/sounds/-6_db_slider/id_00/abnormal/0000...,abnormal,0.102113,0.025405,0.592927,0.509345,0.265619,-5.547102,0.00639,1762.671976,1924.965304,24.371674,0.000404,6249.724931,-0.02664,0.080915
2,-6,slider,0,00000065.wav,assets/sounds/-6_db_slider/id_00/abnormal/0000...,abnormal,0.103404,0.029101,0.615363,0.51919,0.26435,-5.850099,0.006801,1797.617686,2018.689316,23.995691,0.000357,6408.051429,-0.015122,0.074452
