In [61]:
# =====================================================================
# Import
# =====================================================================

# Import internal modules
import os.path
from typing import List, Set, Dict, TypedDict, Tuple, Optional

# Import 3rd party modules
import pandas as pd
import numpy as np
import librosa
import librosa.display
import matplotlib.pyplot as plt

In [62]:
def get_sound_files(folder: str) -> pd.DataFrame:
    """
    Function to get all sound files within a folder.
    Param: `folder` name includes background noise level and machine type (e.g. -6_db_fan)    
    Return: a DataFrame  
    """
    # Create empty lists
    sound_list: List[str] = []  # will contain all sound filenames
    sound_path_list: List[str] = []  # will contain the sound's filepaths
    noise_db_list: List[int] = [] # will contain the background noise level in db of each sound
    machine_type_list: List[str] = [] # will contain the machine type of each sound
    model_id_list: List[str] = []  # will contain the machine product id of each sound
    target_list: List[str] = [] # will contain the target value (normal=0 or anormal=1) for each sound

    # Get the relative path of the directory that contains all the sound files
    folder_path = os.path.join("assets", "sounds", folder)

    # Get all the filenames within the directory
    for path, dirs, files in os.walk(folder_path):
        for filename in files:
            # Search only filenames with the extension ".wav"
            if filename.lower().endswith(".wav"):

                # Get the filename
                sound_list.append(filename)

                # Get the filepath
                sound_path = os.path.join(path, filename)
                sound_path_list.append(sound_path)
                
                # Split filepath to retrieve the information
                path_splitted = sound_path.split("/")
                
                # Get the background noise in db
                noise_db = int(path_splitted[2].split("_")[0])
                noise_db_list.append(noise_db)
                
                # Get the machine type
                machine_type = path_splitted[2].split("_")[2]
                machine_type_list.append(machine_type)
                
                # Get the model id
                model_id = path_splitted[3].split("_")[1]
                model_id_list.append(model_id)
                
                # Get target variable (normal or anormal)
                target = path_splitted[4]
                target_list.append(target)   
    
    # Create list with the data
    data: List[float, int, str] = list(zip(noise_db_list, machine_type_list, model_id_list, sound_list, sound_path_list, target_list))
    
    # Create list with column names
    cols: List[str] = ["noise_db", "machine_type", "model_id", "sound", "sound_path", "target"]
    
    # Return a DataFrame from the lists
    return pd.DataFrame(data=data, columns=cols)

def get_all_sounds(db_list: List[int], machine_type_list: List[str]) -> pd.DataFrame:
    """
    Function to get all sound files for specified lists of background noise and machine type.
    Param: * `db_list` is a list of background noise level (i.e. -6, 0, 6)
           * `machine_type_list` is a list of machine type (i.e. fan, pump, valve, slider) 
    Return: a DataFrame  
    """
    df_list = []
    for db in db_list:
        for machine_type in machine_type_list:
            df = get_sound_files(f"{db}_dB_{machine_type}")
            df_list.append(df)
            
    return pd.concat(df_list, axis=0)

In [158]:
def get_audio_features(sound_path: str) -> List[float]:
    """
    Function to extract audio features from the sound file
    Param: `sound_path` is the filepath of the sound
    Return: a list of audio features aggregated by their average values
    """
    # Read audio file
    y, sr = librosa.load(sound_path)

    # short-time Fourier Transform
    stft = librosa.stft(y)

    # Get spectogram
    spect: np.ndarray = np.abs(stft)
    spect_mean: np.float = np.mean(spect)
    spect_min: np.float = np.min(spect)
    spect_max: np.float = np.max(spect)
    spect_std: np.float = np.std(spect)
    
    # Get mel spectogram
    mel_spect: np.ndarray = librosa.feature.melspectrogram(y=y, sr=sr, n_fft=2048)
    mel_spect_mean: np.float = np.mean(mel_spect)
    mel_spect_min: np.float = np.min(mel_spect)
    mel_spect_max: np.float = np.max(mel_spect)
    mel_spect_std: np.float = np.std(mel_spect)

    # Get chromagram
    chroma: np.ndarray = librosa.feature.chroma_stft(S=spect, sr=sr)
    chroma_mean: np.float = np.mean(chroma)
    chroma_min: np.float = np.min(chroma)
    chroma_max: np.float = np.max(chroma)
    chroma_std: np.float = np.std(chroma)

    # Get constant-Q chromagram
    chroma_cq: np.ndarray = librosa.feature.chroma_cqt(y=y, sr=sr)
    chroma_cq_mean: np.float = np.mean(chroma_cq)
    chroma_cq_min: np.float = np.min(chroma_cq)
    chroma_cq_max: np.float = np.max(chroma_cq)
    chroma_cq_std: np.float = np.std(chroma_cq)
    
    # Get chromagram cens
    chroma_cens: np.ndarray = librosa.feature.chroma_cens(y=y, sr=sr)
    chroma_cens_mean: np.float = np.mean(chroma_cens)
    chroma_cens_min: np.float = np.min(chroma_cens)
    chroma_cens_max: np.float = np.max(chroma_cens)
    chroma_cens_std: np.float = np.std(chroma_cens)

    # Get mfcc
    mfcc: np.ndarray = librosa.feature.mfcc(y=y, sr=sr, n_mfcc=40)
    mfcc_mean: np.float = np.mean(mfcc)
    mfcc_min: np.float = np.min(mfcc)
    mfcc_max: np.float = np.max(mfcc)
    mfcc_std: np.float = np.std(mfcc)

    # Get rms
    S: np.ndarray
    phase: np.ndarray
    S, phase = librosa.magphase(librosa.stft(y))
    rms: np.ndarray = librosa.feature.rms(S=S)
    rms_mean: np.float = np.mean(rms)
    rms_min: np.float = np.min(rms)
    rms_max: np.float = np.max(rms)
    rms_std: np.float = np.std(rms)
    
    # Get spectral centroid
    cent: np.ndarray = librosa.feature.spectral_centroid(y=y, sr=sr)
    cent_mean: np.float = np.mean(cent)
    cent_min: np.float = np.min(cent)
    cent_max: np.float = np.max(cent)
    cent_std: np.float = np.std(cent)
        
    # Get spectral bandwidth
    spec_bw: np.ndarray = librosa.feature.spectral_bandwidth(y=y, sr=sr)
    spec_bw_mean: np.float = np.mean(spec_bw)
    spec_bw_min: np.float = np.min(spec_bw)
    spec_bw_max: np.float = np.max(spec_bw)
    spec_bw_std: np.float = np.std(spec_bw)
        
    # Get spectral contrast
    contrast: np.ndarray = librosa.feature.spectral_contrast(S=S, sr=sr)
    contrast_mean: np.float = np.mean(contrast)
    contrast_min: np.float = np.min(contrast)
    contrast_max: np.float = np.max(contrast)
    contrast_std: np.float = np.std(contrast)

    # Get spectral flatness
    flatness: np.ndarray = librosa.feature.spectral_flatness(y=y)
    flatness_mean: np.float = np.mean(flatness)
    flatness_min: np.float = np.min(flatness)
    flatness_max: np.float = np.max(flatness)
    flatness_std: np.float = np.std(flatness)
    
    # Get roll-off frequency
    roll_off: np.ndarray = librosa.feature.spectral_rolloff(y=y, sr=sr, roll_percent=0.95)
    roll_off_mean: np.float = np.mean(roll_off)
    roll_off_min: np.float = np.min(roll_off)
    roll_off_max: np.float = np.max(roll_off)
    roll_off_std: np.float = np.std(roll_off)

    # tonal centroid features (tonnetz)
    tonnetz: np.ndarray = librosa.feature.tonnetz(y=y, sr=sr)
    tonnetz_mean: np.float = np.mean(tonnetz)
    tonnetz_min: np.float = np.min(tonnetz)
    tonnetz_max: np.float = np.max(tonnetz)
    tonnetz_std: np.float = np.std(tonnetz)
        
    # zero-crossing rate
    zero_crossing_rate: np.ndarray = librosa.feature.zero_crossing_rate(y)
    zero_crossing_rate_mean: np.float = np.mean(zero_crossing_rate)
    zero_crossing_rate_min: np.float = np.min(zero_crossing_rate)
    zero_crossing_rate_max: np.float = np.max(zero_crossing_rate)
    zero_crossing_rate_std: np.float = np.std(zero_crossing_rate)
    
    # d_harmonic, d_percussive
    d_harmonic, d_percussive = librosa.decompose.hpss(stft)
    d_harmonic_mean: np.float = np.mean(d_harmonic)
    d_harmonic_min: np.float = np.min(d_harmonic)
    d_harmonic_max: np.float = np.max(d_harmonic)    
    d_harmonic_std: np.float = np.std(d_harmonic)
        
    d_percussive_mean: np.float = np.mean(d_percussive)
    d_percussive_min: np.float = np.min(d_percussive)
    d_percussive_max: np.float = np.max(d_percussive)    
    d_percussive_std: np.float = np.std(d_percussive)  
    
    
    # Return a list of audio features aggregated by their average values
    return [spect_mean, spect_min, spect_max, spect_std,
            mel_spect_mean, mel_spect_min, mel_spect_max, mel_spect_std,
            chroma_mean, chroma_min, chroma_max, chroma_std,
            chroma_cq_mean, chroma_cq_min, chroma_cq_max, chroma_cq_std,
            chroma_cens_mean, chroma_cens_min, chroma_cens_max, chroma_cens_std,
            mfcc_mean, mfcc_min, mfcc_max, mfcc_std,
            rms_mean, rms_min, rms_max, rms_std,
            cent_mean, cent_min, cent_max, cent_std,
            spec_bw_mean, spec_bw_min, spec_bw_max, spec_bw_std,
            contrast_mean, contrast_min, contrast_max, contrast_std,
            flatness_mean, flatness_min, flatness_max, flatness_std,
            roll_off_mean, roll_off_min, roll_off_max, roll_off_std,
            tonnetz_mean, tonnetz_min, tonnetz_max, tonnetz_std,
            zero_crossing_rate_mean, zero_crossing_rate_min, zero_crossing_rate_max, zero_crossing_rate_std,
            d_harmonic_mean, d_harmonic_min, d_harmonic_max, d_harmonic_std,
            d_percussive_mean, d_percussive_min, d_percussive_max, d_percussive_std
           ]

In [159]:
df_test = get_all_sounds([-6, 0, 6], ["fan", "valve", "pump", "slider"])
# df_test = get_all_sounds([-6], ["fan"])
df.head()

Unnamed: 0,noise_db,machine_type,model_id,sound,sound_path,target
0,-6,fan,0,00000059.wav,assets/sounds/-6_dB_fan/id_00/abnormal/0000005...,abnormal
1,-6,fan,0,00000071.wav,assets/sounds/-6_dB_fan/id_00/abnormal/0000007...,abnormal
2,-6,fan,0,00000065.wav,assets/sounds/-6_dB_fan/id_00/abnormal/0000006...,abnormal
3,-6,fan,0,00000273.wav,assets/sounds/-6_dB_fan/id_00/abnormal/0000027...,abnormal
4,-6,fan,0,00000267.wav,assets/sounds/-6_dB_fan/id_00/abnormal/0000026...,abnormal


In [161]:
df_test.shape

(54057, 6)

In [171]:
df_test_sample = df_test.sample(frac=0.030)

In [172]:
df_test_sample.shape

(1622, 6)

In [165]:
df_test_sample.head()

Unnamed: 0,noise_db,machine_type,model_id,sound,sound_path,target
2152,0,slider,4,00000155.wav,assets/sounds/0_dB_slider/id_04/abnormal/00000...,abnormal
3610,0,slider,2,00000597.wav,assets/sounds/0_dB_slider/id_02/normal/0000059...,normal
3955,0,valve,2,00000427.wav,assets/sounds/0_dB_valve/id_02/normal/00000427...,normal
1118,-6,slider,0,00000341.wav,assets/sounds/-6_dB_slider/id_00/normal/000003...,normal
1913,-6,slider,6,00000469.wav,assets/sounds/-6_dB_slider/id_06/normal/000004...,normal


In [145]:
df_test.head()

Unnamed: 0,noise_db,machine_type,model_id,sound,sound_path,target
0,-6,fan,0,00000059.wav,assets/sounds/-6_dB_fan/id_00/abnormal/0000005...,abnormal
1,-6,fan,0,00000071.wav,assets/sounds/-6_dB_fan/id_00/abnormal/0000007...,abnormal
2,-6,fan,0,00000065.wav,assets/sounds/-6_dB_fan/id_00/abnormal/0000006...,abnormal
3,-6,fan,0,00000273.wav,assets/sounds/-6_dB_fan/id_00/abnormal/0000027...,abnormal
4,-6,fan,0,00000267.wav,assets/sounds/-6_dB_fan/id_00/abnormal/0000026...,abnormal


In [150]:
import time

In [173]:
# Create list with column names
start = time.time()
# Code
audio_features_cols: List[str] = ["spect_mean", "spect_min", "spect_max", "spect_std",
            "mel_spect_mean", "mel_spect_min", "mel_spect_max", "mel_spect_std",
            "chroma_mean", "chroma_min", "chroma_max", "chroma_std",
            "chroma_cq_mean", "chroma_cq_min", "chroma_cq_max", "chroma_cq_std",
            "chroma_cens_mean", "chroma_cens_min", "chroma_cens_max", "chroma_cens_std",
            "mfcc_mean", "mfcc_min", "mfcc_max", "mfcc_std",
            "rms_mean", "rms_min", "rms_max", "rms_std",
            "cent_mean", "cent_min", "cent_max", "cent_std",
            "spec_bw_mean", "spec_bw_min", "spec_bw_max", "spec_bw_std",
            "contrast_mean", "contrast_min", "contrast_max", "contrast_std",
            "flatness_mean", "flatness_min", "flatness_max", "flatness_std",
            "roll_off_mean", "roll_off_min", "roll_off_max", "roll_off_std",
            "tonnetz_mean", "tonnetz_min", "tonnetz_max", "tonnetz_std",
            "zero_crossing_rate_mean", "zero_crossing_rate_min", "zero_crossing_rate_max", "zero_crossing_rate_std",
            "d_harmonic_mean", "d_harmonic_min", "d_harmonic_max", "d_harmonic_std",
            "d_percussive_mean", "d_percussive_min", "d_percussive_max", "d_percussive_std"
           ]

df_test_sample[audio_features_cols] = df_test_sample[["sound_path"]].apply(lambda x: pd.Series(get_audio_features(x.sound_path)), axis=1)

end = time.time()
print(end - start)


2088.652580022812


In [177]:
df_test_sample.to_csv("test_sample_csv", index=False)

In [None]:
df_test

In [154]:
df_test.head()

Unnamed: 0,noise_db,machine_type,model_id,sound,sound_path,target,spect_mean,spect_min,spect_max,spect_std,...,zero_crossing_rate_max,zero_crossing_rate_std,d_harmonic_mean,d_harmonic_min,d_harmonic_max,d_harmonic_std,d_percussive_mean,d_percussive_min,d_percussive_max,d_percussive_std
0,-6,fan,0,00000059.wav,assets/sounds/-6_dB_fan/id_00/abnormal/0000005...,abnormal,0.087980+0.000000j,6.923417e-12+0.000000e+00j,8.509282+0.000000j,0.324739+0.000000j,...,0.077148+0.000000j,0.008583+0.000000j,0.000017+0.000006j,-8.145981-1.055241j,7.884060-1.348021j,0.296380+0.000000j,-0.000017-0.000007j,-2.492729+0.000000j,2.080021+0.000000j,0.081029+0.000000j
1,-6,fan,0,00000071.wav,assets/sounds/-6_dB_fan/id_00/abnormal/0000007...,abnormal,0.100928+0.000000j,2.516883e-10+0.000000e+00j,9.722576+0.000000j,0.347875+0.000000j,...,0.061035+0.000000j,0.006300+0.000000j,0.000014-0.000016j,-9.119480-2.299127j,9.128906-0.800953j,0.279787+0.000000j,-0.000014+0.000016j,-2.056131+0.805898j,2.128010+0.007904j,0.125176+0.000000j
2,-6,fan,0,00000065.wav,assets/sounds/-6_dB_fan/id_00/abnormal/0000006...,abnormal,0.089889+0.000000j,1.997541e-10+0.000000e+00j,9.564067+0.000000j,0.333833+0.000000j,...,0.077148+0.000000j,0.008145+0.000000j,-0.000052-0.000033j,-8.458093-2.363396j,8.495137-1.337206j,0.305922+0.000000j,0.000051+0.000034j,-2.657297+1.209692j,1.850809-0.670836j,0.082564+0.000000j
3,-6,fan,0,00000273.wav,assets/sounds/-6_dB_fan/id_00/abnormal/0000027...,abnormal,0.090242+0.000000j,2.936516e-11+0.000000e+00j,6.608639+0.000000j,0.276757+0.000000j,...,0.060059+0.000000j,0.005807+0.000000j,-0.000036-0.000019j,-5.723155+1.621211j,5.065529-2.003252j,0.196627+0.000000j,0.000037+0.000018j,-1.919492-0.459299j,1.993472-0.029171j,0.118090+0.000000j
4,-6,fan,0,00000267.wav,assets/sounds/-6_dB_fan/id_00/abnormal/0000026...,abnormal,0.087292+0.000000j,3.599187e-11+0.000000e+00j,7.771596+0.000000j,0.259563+0.000000j,...,0.083984+0.000000j,0.007461+0.000000j,-0.000028-0.000029j,-7.386082-1.169338j,6.961869+0.017125j,0.220806+0.000000j,0.000028+0.000029j,-1.619270-0.154349j,1.597498+0.738019j,0.085296+0.000000j


In [73]:
# Create list with column names
audio_features_cols: List[str] = ["spectogram", "mel_spectogram",
                   "chromagram", "chromagram_cq", "chromagram_cens",
                   "mfcc", "rms", "spectral_centroid", 
                   "spectral_bandwidth", "spectral_contrast", "spectral_flatness",
                   "roll_off_frequency", "tonnetz", "zero_crossing_rate"]

# Create empty columns
for col in audio_features_cols:
    df_test[col] = np.nan

for idx, sound in enumerate(df_test.sound_path.values):
    feature_values_list = get_audio_features(sound)
#     print(feature_values_list)
    for i in range(len(feature_values_list)):
#         print(idx, str(audio_features_cols[i]), feature_values_list[i])
        df_test[audio_features_cols[i]].iloc[idx] = feature_values_list[i]

A value is trying to be set on a copy of a slice from a DataFrame.
Try using .loc[row_indexer,col_indexer] = value instead

See the caveats in the documentation: https://pandas.pydata.org/pandas-docs/stable/user_guide/indexing.html#returning-a-view-versus-a-copy
  df_test[col] = np.nan
A value is trying to be set on a copy of a slice from a DataFrame

See the caveats in the documentation: https://pandas.pydata.org/pandas-docs/stable/user_guide/indexing.html#returning-a-view-versus-a-copy
  self._setitem_single_block(indexer, value, name)
A value is trying to be set on a copy of a slice from a DataFrame

See the caveats in the documentation: https://pandas.pydata.org/pandas-docs/stable/user_guide/indexing.html#returning-a-view-versus-a-copy
  iloc._setitem_with_indexer(indexer, value, self.name)


In [None]:
# Create list with column names
audio_features_cols: List[str] = ["spectogram", "mel_spectogram",
                   "chromagram", "chromagram_cq", "chromagram_cens",
                   "mfcc", "rms", "spectral_centroid", 
                   "spectral_bandwidth", "spectral_contrast", "spectral_flatness",
                   "roll_off_frequency", "tonnetz", "zero_crossing_rate"]

# Create empty columns
for col in audio_features_cols:
    df_test[col] = np.nan

for idx, sound in enumerate(df_test.sound_path.values):
    feature_values_list = get_audio_features(sound)
#     print(feature_values_list)
    for i in range(len(feature_values_list)):
#         print(idx, str(audio_features_cols[i]), feature_values_list[i])
        df_test[audio_features_cols[i]].iloc[idx] = feature_values_list[i]

In [78]:
# Create list with column names
audio_features_cols: List[str] = ["spectogram", "mel_spectogram",
                   "chromagram", "chromagram_cq", "chromagram_cens",
                   "mfcc", "rms", "spectral_centroid", 
                   "spectral_bandwidth", "spectral_contrast", "spectral_flatness",
                   "roll_off_frequency", "tonnetz", "zero_crossing_rate"]

# Create empty columns
for col in audio_features_cols:
    df_test[col] = np.nan
df_test.head()

A value is trying to be set on a copy of a slice from a DataFrame.
Try using .loc[row_indexer,col_indexer] = value instead

See the caveats in the documentation: https://pandas.pydata.org/pandas-docs/stable/user_guide/indexing.html#returning-a-view-versus-a-copy
  df_test[col] = np.nan


Unnamed: 0,noise_db,machine_type,model_id,sound,sound_path,target,spectogram,mel_spectogram,chromagram,chromagram_cq,chromagram_cens,mfcc,rms,spectral_centroid,spectral_bandwidth,spectral_contrast,spectral_flatness,roll_off_frequency,tonnetz,zero_crossing_rate
0,-6,fan,0,00000059.wav,assets/sounds/-6_dB_fan/id_00/abnormal/0000005...,abnormal,,,,,,,,,,,,,,
1,-6,fan,0,00000071.wav,assets/sounds/-6_dB_fan/id_00/abnormal/0000007...,abnormal,,,,,,,,,,,,,,
2,-6,fan,0,00000065.wav,assets/sounds/-6_dB_fan/id_00/abnormal/0000006...,abnormal,,,,,,,,,,,,,,
3,-6,fan,0,00000273.wav,assets/sounds/-6_dB_fan/id_00/abnormal/0000027...,abnormal,,,,,,,,,,,,,,
4,-6,fan,0,00000267.wav,assets/sounds/-6_dB_fan/id_00/abnormal/0000026...,abnormal,,,,,,,,,,,,,,


In [79]:
audio_features_list_all_sounds.append(df_test.sound_path.values)

In [92]:
audio_features_list_all_sounds = []
for sound_path in df_test.sound_path.values:
    audio_features_list = get_audio_features(sound_path)
    audio_features_list.append(sound_path)
    audio_features_list_all_sounds.append(audio_features_list)
audio_features_list_all_sounds

[[0.08797987,
  0.03412381,
  0.51950985,
  0.3487876,
  0.24360700893714782,
  -4.0447407,
  0.007393384808553433,
  1180.264030852038,
  1579.0006992822268,
  24.305225494964127,
  0.00013324826,
  5154.828999601218,
  0.04250314218185303,
  0.049592834614269145,
  'assets/sounds/-6_dB_fan/id_00/abnormal/00000059.wav'],
 [0.100927845,
  0.039812844,
  0.75366986,
  0.38985088,
  0.2539249235509849,
  -3.4927447,
  0.007938459534366313,
  1036.5130803953173,
  1375.484191023654,
  21.28200050194736,
  0.00011218998,
  4208.891873459252,
  0.050503582874733076,
  0.03764976979408353,
  'assets/sounds/-6_dB_fan/id_00/abnormal/00000071.wav'],
 [0.08988863,
  0.03654531,
  0.53370386,
  0.35151482,
  0.24256478440084428,
  -3.9055371,
  0.007584500401487915,
  1271.4472060240762,
  1735.2508219906167,
  24.272329736071338,
  0.00014936722,
  5743.070144830336,
  0.0437188022909946,
  0.04943649398201856,
  'assets/sounds/-6_dB_fan/id_00/abnormal/00000065.wav'],
 [0.0902425,
  0.025801158,

In [123]:
audio_features_list_all_sounds = []
for sound_path in df_test.sound_path.values:
    audio_features_list = get_audio_features(sound_path)
#     audio_features_list.append(sound_path)
    audio_features_list_all_sounds.append(audio_features_list)

# Create list with column names
audio_features_cols: List[str] = ["spectogram", "mel_spectogram",
                   "chromagram", "chromagram_cq", "chromagram_cens",
                   "mfcc", "rms", "spectral_centroid", 
                   "spectral_bandwidth", "spectral_contrast", "spectral_flatness",
                   "roll_off_frequency", "tonnetz", "zero_crossing_rate"]
df_audio_features = pd.DataFrame(data=audio_features_list_all_sounds,columns=audio_features_cols)

df_all = pd.concat([df_test, df_audio_features], axis=1)

In [124]:
df_all.head()

Unnamed: 0,noise_db,machine_type,model_id,sound,sound_path,target,spectogram,mel_spectogram,chromagram,chromagram_cq,chromagram_cens,mfcc,rms,spectral_centroid,spectral_bandwidth,spectral_contrast,spectral_flatness,roll_off_frequency,tonnetz,zero_crossing_rate
0,-6,fan,0,00000059.wav,assets/sounds/-6_dB_fan/id_00/abnormal/0000005...,abnormal,0.08798,0.034124,0.51951,0.348788,0.243607,-4.044741,0.007393,1180.264031,1579.000699,24.305225,0.000133,5154.829,0.042503,0.049593
1,-6,fan,0,00000071.wav,assets/sounds/-6_dB_fan/id_00/abnormal/0000007...,abnormal,0.100928,0.039813,0.75367,0.389851,0.253925,-3.492745,0.007938,1036.51308,1375.484191,21.282001,0.000112,4208.891873,0.050504,0.03765
2,-6,fan,0,00000065.wav,assets/sounds/-6_dB_fan/id_00/abnormal/0000006...,abnormal,0.089889,0.036545,0.533704,0.351515,0.242565,-3.905537,0.007585,1271.447206,1735.250822,24.27233,0.000149,5743.070145,0.043719,0.049436
3,-6,fan,0,00000273.wav,assets/sounds/-6_dB_fan/id_00/abnormal/0000027...,abnormal,0.090242,0.025801,0.730573,0.508074,0.269959,-3.973263,0.006392,1050.479779,1347.219254,21.13812,0.000141,4106.271922,-0.022377,0.044327
4,-6,fan,0,00000267.wav,assets/sounds/-6_dB_fan/id_00/abnormal/0000026...,abnormal,0.087292,0.022661,0.583526,0.484912,0.270946,-4.673796,0.005993,1414.325444,1763.148493,23.825591,0.000246,5714.992047,-0.038684,0.066287


In [125]:
df_all.shape

(5550, 20)

In [95]:
df_audio_features.sound_path[1]

'assets/sounds/-6_dB_fan/id_00/abnormal/00000071.wav'

In [110]:
df_test.info()

<class 'pandas.core.frame.DataFrame'>
RangeIndex: 5 entries, 0 to 4
Data columns (total 6 columns):
 #   Column        Non-Null Count  Dtype 
---  ------        --------------  ----- 
 0   noise_db      5 non-null      int64 
 1   machine_type  5 non-null      object
 2   model_id      5 non-null      object
 3   sound         5 non-null      object
 4   sound_path    5 non-null      object
 5   target        5 non-null      object
dtypes: int64(1), object(5)
memory usage: 368.0+ bytes


In [109]:
df_audio_features.info()

<class 'pandas.core.frame.DataFrame'>
RangeIndex: 5 entries, 0 to 4
Data columns (total 15 columns):
 #   Column              Non-Null Count  Dtype  
---  ------              --------------  -----  
 0   spectogram          5 non-null      float64
 1   mel_spectogram      5 non-null      float64
 2   chromagram          5 non-null      float64
 3   chromagram_cq       5 non-null      float64
 4   chromagram_cens     5 non-null      float64
 5   mfcc                5 non-null      float64
 6   rms                 5 non-null      float64
 7   spectral_centroid   5 non-null      float64
 8   spectral_bandwidth  5 non-null      float64
 9   spectral_contrast   5 non-null      float64
 10  spectral_flatness   5 non-null      float64
 11  roll_off_frequency  5 non-null      float64
 12  tonnetz             5 non-null      float64
 13  zero_crossing_rate  5 non-null      float64
 14  sound_path          5 non-null      object 
dtypes: float64(14), object(1)
memory usage: 728.0+ bytes


In [115]:
df_all = pd.concat([df_test, df_audio_features], axis=1)

In [116]:
df_all

Unnamed: 0,noise_db,machine_type,model_id,sound,sound_path,target,spectogram,mel_spectogram,chromagram,chromagram_cq,...,mfcc,rms,spectral_centroid,spectral_bandwidth,spectral_contrast,spectral_flatness,roll_off_frequency,tonnetz,zero_crossing_rate,sound_path.1
0,-6,fan,0,00000059.wav,assets/sounds/-6_dB_fan/id_00/abnormal/0000005...,abnormal,0.08798,0.034124,0.51951,0.348788,...,-4.044741,0.007393,1180.264031,1579.000699,24.305225,0.000133,5154.829,0.042503,0.049593,assets/sounds/-6_dB_fan/id_00/abnormal/0000005...
1,-6,fan,0,00000071.wav,assets/sounds/-6_dB_fan/id_00/abnormal/0000007...,abnormal,0.100928,0.039813,0.75367,0.389851,...,-3.492745,0.007938,1036.51308,1375.484191,21.282001,0.000112,4208.891873,0.050504,0.03765,assets/sounds/-6_dB_fan/id_00/abnormal/0000007...
2,-6,fan,0,00000065.wav,assets/sounds/-6_dB_fan/id_00/abnormal/0000006...,abnormal,0.089889,0.036545,0.533704,0.351515,...,-3.905537,0.007585,1271.447206,1735.250822,24.27233,0.000149,5743.070145,0.043719,0.049436,assets/sounds/-6_dB_fan/id_00/abnormal/0000006...
3,-6,fan,0,00000273.wav,assets/sounds/-6_dB_fan/id_00/abnormal/0000027...,abnormal,0.090242,0.025801,0.730573,0.508074,...,-3.973263,0.006392,1050.479779,1347.219254,21.13812,0.000141,4106.271922,-0.022377,0.044327,assets/sounds/-6_dB_fan/id_00/abnormal/0000027...
4,-6,fan,0,00000267.wav,assets/sounds/-6_dB_fan/id_00/abnormal/0000026...,abnormal,0.087292,0.022661,0.583526,0.484912,...,-4.673796,0.005993,1414.325444,1763.148493,23.825591,0.000246,5714.992047,-0.038684,0.066287,assets/sounds/-6_dB_fan/id_00/abnormal/0000026...


In [96]:
get_audio_features("assets/sounds/-6_dB_fan/id_00/abnormal/00000071.wav")

[0.100927845,
 0.039812844,
 0.75366986,
 0.38985088,
 0.2539249235509849,
 -3.4927447,
 0.007938459534366313,
 1036.5130803953173,
 1375.484191023654,
 21.28200050194736,
 0.00011218998,
 4208.891873459252,
 0.050503582874733076,
 0.03764976979408353]

In [74]:
df_test.head()

Unnamed: 0,noise_db,machine_type,model_id,sound,sound_path,target,spectogram,mel_spectogram,chromagram,chromagram_cq,chromagram_cens,mfcc,rms,spectral_centroid,spectral_bandwidth,spectral_contrast,spectral_flatness,roll_off_frequency,tonnetz,zero_crossing_rate
0,-6,fan,0,00000059.wav,assets/sounds/-6_dB_fan/id_00/abnormal/0000005...,abnormal,0.08798,0.034124,0.51951,0.348788,0.243607,-4.044741,0.007393,1180.264031,1579.000699,24.305225,0.000133,5154.829,0.042503,0.049593
1,-6,fan,0,00000071.wav,assets/sounds/-6_dB_fan/id_00/abnormal/0000007...,abnormal,0.100928,0.039813,0.75367,0.389851,0.253925,-3.492745,0.007938,1036.51308,1375.484191,21.282001,0.000112,4208.891873,0.050504,0.03765
2,-6,fan,0,00000065.wav,assets/sounds/-6_dB_fan/id_00/abnormal/0000006...,abnormal,0.089889,0.036545,0.533704,0.351515,0.242565,-3.905537,0.007585,1271.447206,1735.250822,24.27233,0.000149,5743.070145,0.043719,0.049436
3,-6,fan,0,00000273.wav,assets/sounds/-6_dB_fan/id_00/abnormal/0000027...,abnormal,0.090242,0.025801,0.730573,0.508074,0.269959,-3.973263,0.006392,1050.479779,1347.219254,21.13812,0.000141,4106.271922,-0.022377,0.044327
4,-6,fan,0,00000267.wav,assets/sounds/-6_dB_fan/id_00/abnormal/0000026...,abnormal,0.087292,0.022661,0.583526,0.484912,0.270946,-4.673796,0.005993,1414.325444,1763.148493,23.825591,0.000246,5714.992047,-0.038684,0.066287


In [39]:
get_audio_features(df_test.sound_path.iloc[2])

[0.08988863,
 0.03654531,
 0.53370386,
 0.35151482,
 0.24256478440084428,
 -3.9055371,
 0.007584500401487915,
 1271.4472060240762,
 1735.2508219906167,
 24.272329736071338,
 0.00014936722,
 5743.070144830336,
 0.0437188022909946,
 0.04943649398201856]

In [9]:
df.head()

Unnamed: 0,noise_db,machine_type,model_id,sound,sound_path,target,spectogram,mel_spectogram,chromagram,chromagram_cq,chromagram_cens,mfcc,rms,spectral_centroid,spectral_bandwidth,spectral_contrast,spectral_flatness,roll_off_frequency,tonnetz,zero_crossing_rate
0,-6,slider,0,00000059.wav,assets/sounds/-6_db_slider/id_00/abnormal/0000...,abnormal,0.103656,0.028626,0.508862,0.514267,0.264862,-5.768414,0.006748,1746.776097,1955.708303,24.889785,0.000356,6312.725788,-0.014067,0.077769
1,-6,slider,0,00000071.wav,assets/sounds/-6_db_slider/id_00/abnormal/0000...,abnormal,0.102113,0.025405,0.592927,0.509345,0.265619,-5.547102,0.00639,1762.671976,1924.965304,24.371674,0.000404,6249.724931,-0.02664,0.080915
2,-6,slider,0,00000065.wav,assets/sounds/-6_db_slider/id_00/abnormal/0000...,abnormal,0.103404,0.029101,0.615363,0.51919,0.26435,-5.850099,0.006801,1797.617686,2018.689316,23.995691,0.000357,6408.051429,-0.015122,0.074452
3,-6,slider,0,00000273.wav,assets/sounds/-6_db_slider/id_00/abnormal/0000...,abnormal,0.120839,0.029576,0.674066,0.608943,0.279228,-4.68586,0.006914,1693.40478,1843.512061,21.976415,0.000446,5747.141969,-0.006995,0.082713
4,-6,slider,0,00000267.wav,assets/sounds/-6_db_slider/id_00/abnormal/0000...,abnormal,0.114762,0.032621,0.658467,0.62444,0.276571,-4.298293,0.007232,1503.216934,1768.753095,21.985056,0.000396,5490.966797,-0.000406,0.065476


In [10]:
df.info()

<class 'pandas.core.frame.DataFrame'>
RangeIndex: 100 entries, 0 to 99
Data columns (total 20 columns):
 #   Column              Non-Null Count  Dtype  
---  ------              --------------  -----  
 0   noise_db            100 non-null    int64  
 1   machine_type        100 non-null    object 
 2   model_id            100 non-null    object 
 3   sound               100 non-null    object 
 4   sound_path          100 non-null    object 
 5   target              100 non-null    object 
 6   spectogram          100 non-null    float64
 7   mel_spectogram      100 non-null    float64
 8   chromagram          100 non-null    float64
 9   chromagram_cq       100 non-null    float64
 10  chromagram_cens     100 non-null    float64
 11  mfcc                100 non-null    float64
 12  rms                 100 non-null    float64
 13  spectral_centroid   100 non-null    float64
 14  spectral_bandwidth  100 non-null    float64
 15  spectral_contrast   100 non-null    float64
 16  spectral_

In [11]:
df.isnull().any()

noise_db              False
machine_type          False
model_id              False
sound                 False
sound_path            False
target                False
spectogram            False
mel_spectogram        False
chromagram            False
chromagram_cq         False
chromagram_cens       False
mfcc                  False
rms                   False
spectral_centroid     False
spectral_bandwidth    False
spectral_contrast     False
spectral_flatness     False
roll_off_frequency    False
tonnetz               False
zero_crossing_rate    False
dtype: bool

In [None]:
# Get the relative path of the directory that contains all the sound files
folder_path = os.path.join("assets", "sounds", "6_dB_test")
sound_path_list = []

# Get all the filenames within the directory
for path, dirs, files in os.walk(folder_path):
    for filename in files:
        # Search only filenames with the extension ".wav"
        if filename.lower().endswith(".wav"):

            # Get the filename
            sound_list.append(filename)

            # Get the filepath
            sound_path = os.path.join(path, filename)
            sound_path_list.append(sound_path)

            # Split filepath to retrieve the information
            path_splitted = sound_path.split("/")

            # Get the background noise in db
            noise_db = int(path_splitted[2].split("_")[0])
            noise_db_list.append(noise_db)

            # Get the machine type
            machine_type = path_splitted[2].split("_")[2]
            machine_type_list.append(machine_type)

            # Get the model id
            model_id = path_splitted[3].split("_")[1]
            model_id_list.append(model_id)

            # Get target variable (normal aor anormal)
            target = path_splitted[4]
            target_list.append(target)
print()

In [126]:
df_all.to_csv("-6_dB_fan_csv", index=False)

In [128]:
df_all.describe().T

Unnamed: 0,count,mean,std,min,25%,50%,75%,max
noise_db,5550.0,-6.0,0.0,-6.0,-6.0,-6.0,-6.0,-6.0
spectogram,5550.0,0.095662,0.012191,0.051801,0.087735,0.095607,0.103148,0.135741
mel_spectogram,5550.0,0.023828,0.005022,0.00605,0.021274,0.024094,0.025627,0.054556
chromagram,5550.0,0.696104,0.117973,0.36668,0.591525,0.741141,0.806373,0.881168
chromagram_cq,5550.0,0.596425,0.081941,0.221833,0.54689,0.60683,0.654831,0.777581
chromagram_cens,5550.0,0.273416,0.007121,0.189166,0.271032,0.27482,0.27767,0.286754
mfcc,5550.0,-4.778437,0.882483,-8.020124,-5.553728,-4.409894,-4.136958,-2.680073
rms,5550.0,0.006165,0.000584,0.003287,0.005858,0.006197,0.006413,0.009236
spectral_centroid,5550.0,1348.625165,295.305422,614.600016,1133.415694,1334.580598,1489.537732,2938.424171
spectral_bandwidth,5550.0,1577.98597,185.43111,1036.590126,1424.885814,1589.900545,1710.724472,2323.834034


In [129]:
df_all.isnull().any()

noise_db              False
machine_type          False
model_id              False
sound                 False
sound_path            False
target                False
spectogram            False
mel_spectogram        False
chromagram            False
chromagram_cq         False
chromagram_cens       False
mfcc                  False
rms                   False
spectral_centroid     False
spectral_bandwidth    False
spectral_contrast     False
spectral_flatness     False
roll_off_frequency    False
tonnetz               False
zero_crossing_rate    False
dtype: bool

In [133]:
df_all.iloc[100]

noise_db                                                             -6
machine_type                                                        fan
model_id                                                             00
sound                                                      00000198.wav
sound_path            assets/sounds/-6_dB_fan/id_00/abnormal/0000019...
target                                                         abnormal
spectogram                                                     0.089376
mel_spectogram                                                 0.021159
chromagram                                                     0.791502
chromagram_cq                                                  0.468329
chromagram_cens                                                0.264135
mfcc                                                             -4.083
rms                                                            0.005409
spectral_centroid                                           1360