In [61]:
# =====================================================================
# Import
# =====================================================================

# Import internal modules
import os.path
from typing import List, Set, Dict, TypedDict, Tuple, Optional

# Import 3rd party modules
import pandas as pd
import numpy as np
import librosa
import librosa.display
import matplotlib.pyplot as plt

In [62]:
def get_sound_files(folder: str) -> pd.DataFrame:
    """
    Function to get all sound files within a folder.
    Param: `folder` name includes background noise level and machine type (e.g. -6_db_fan)    
    Return: a DataFrame  
    """
    # Create empty lists
    sound_list: List[str] = []  # will contain all sound filenames
    sound_path_list: List[str] = []  # will contain the sound's filepaths
    noise_db_list: List[int] = [] # will contain the background noise level in db of each sound
    machine_type_list: List[str] = [] # will contain the machine type of each sound
    model_id_list: List[str] = []  # will contain the machine product id of each sound
    target_list: List[str] = [] # will contain the target value (normal=0 or anormal=1) for each sound

    # Get the relative path of the directory that contains all the sound files
    folder_path = os.path.join("assets", "sounds", folder)

    # Get all the filenames within the directory
    for path, dirs, files in os.walk(folder_path):
        for filename in files:
            # Search only filenames with the extension ".wav"
            if filename.lower().endswith(".wav"):

                # Get the filename
                sound_list.append(filename)

                # Get the filepath
                sound_path = os.path.join(path, filename)
                sound_path_list.append(sound_path)
                
                # Split filepath to retrieve the information
                path_splitted = sound_path.split("/")
                
                # Get the background noise in db
                noise_db = int(path_splitted[2].split("_")[0])
                noise_db_list.append(noise_db)
                
                # Get the machine type
                machine_type = path_splitted[2].split("_")[2]
                machine_type_list.append(machine_type)
                
                # Get the model id
                model_id = path_splitted[3].split("_")[1]
                model_id_list.append(model_id)
                
                # Get target variable (normal or anormal)
                target = path_splitted[4]
                target_list.append(target)   
    
    # Create list with the data
    data: List[float, int, str] = list(zip(noise_db_list, machine_type_list, model_id_list, sound_list, sound_path_list, target_list))
    
    # Create list with column names
    cols: List[str] = ["noise_db", "machine_type", "model_id", "sound", "sound_path", "target"]
    
    # Return a DataFrame from the lists
    return pd.DataFrame(data=data, columns=cols)

def get_all_sounds(db_list: List[int], machine_type_list: List[str]) -> pd.DataFrame:
    """
    Function to get all sound files for specified lists of background noise and machine type.
    Param: * `db_list` is a list of background noise level (i.e. -6, 0, 6)
           * `machine_type_list` is a list of machine type (i.e. fan, pump, valve, slider) 
    Return: a DataFrame  
    """
    df_list = []
    for db in db_list:
        for machine_type in machine_type_list:
            df = get_sound_files(f"{db}_dB_{machine_type}")
            df_list.append(df)
            
    return pd.concat(df_list, axis=0)

In [205]:
def get_audio_features(sound_path: str) -> List[float]:
    """
    Function to extract audio features from the sound file
    Param: `sound_path` is the filepath of the sound
    Return: a list of audio features aggregated by their average values
    """
    # Read audio file
    y, sr = librosa.load(sound_path)

    # short-time Fourier Transform
    stft = librosa.stft(y)

    # Get spectogram
    spect: np.ndarray = np.abs(stft)
    spect_mean: np.float = np.mean(spect)
    spect_min: np.float = np.min(spect)
    spect_max: np.float = np.max(spect)
    spect_std: np.float = np.std(spect)
    
    # Get mel spectogram
    mel_spect: np.ndarray = librosa.feature.melspectrogram(y=y, sr=sr, n_fft=2048)
    mel_spect_mean: np.float = np.mean(mel_spect)
    mel_spect_min: np.float = np.min(mel_spect)
    mel_spect_max: np.float = np.max(mel_spect)
    mel_spect_std: np.float = np.std(mel_spect)

    # Get chromagram
    chroma: np.ndarray = librosa.feature.chroma_stft(S=spect, sr=sr)
    chroma_mean: np.float = np.mean(chroma)
    chroma_min: np.float = np.min(chroma)
    chroma_max: np.float = np.max(chroma)
    chroma_std: np.float = np.std(chroma)

    # Get constant-Q chromagram
    chroma_cq: np.ndarray = librosa.feature.chroma_cqt(y=y, sr=sr)
    chroma_cq_mean: np.float = np.mean(chroma_cq)
    chroma_cq_min: np.float = np.min(chroma_cq)
    chroma_cq_max: np.float = np.max(chroma_cq)
    chroma_cq_std: np.float = np.std(chroma_cq)
    
    # Get chromagram cens
    chroma_cens: np.ndarray = librosa.feature.chroma_cens(y=y, sr=sr)
    chroma_cens_mean: np.float = np.mean(chroma_cens)
    chroma_cens_min: np.float = np.min(chroma_cens)
    chroma_cens_max: np.float = np.max(chroma_cens)
    chroma_cens_std: np.float = np.std(chroma_cens)

    # Get mfcc
    mfcc: np.ndarray = librosa.feature.mfcc(y=y, sr=sr, n_mfcc=40)
    mfcc_mean: np.float = np.mean(mfcc)
    mfcc_min: np.float = np.min(mfcc)
    mfcc_max: np.float = np.max(mfcc)
    mfcc_std: np.float = np.std(mfcc)

    # Get rms
    S: np.ndarray
    phase: np.ndarray
    S, phase = librosa.magphase(stft)
    rms: np.ndarray = librosa.feature.rms(S=S)
    rms_mean: np.float = np.mean(rms)
    rms_min: np.float = np.min(rms)
    rms_max: np.float = np.max(rms)
    rms_std: np.float = np.std(rms)
    
    # Get spectral centroid
    cent: np.ndarray = librosa.feature.spectral_centroid(y=y, sr=sr)
    cent_mean: np.float = np.mean(cent)
    cent_min: np.float = np.min(cent)
    cent_max: np.float = np.max(cent)
    cent_std: np.float = np.std(cent)
        
    # Get spectral bandwidth
    spec_bw: np.ndarray = librosa.feature.spectral_bandwidth(y=y, sr=sr)
    spec_bw_mean: np.float = np.mean(spec_bw)
    spec_bw_min: np.float = np.min(spec_bw)
    spec_bw_max: np.float = np.max(spec_bw)
    spec_bw_std: np.float = np.std(spec_bw)
        
    # Get spectral contrast
    contrast: np.ndarray = librosa.feature.spectral_contrast(S=S, sr=sr)
    contrast_mean: np.float = np.mean(contrast)
    contrast_min: np.float = np.min(contrast)
    contrast_max: np.float = np.max(contrast)
    contrast_std: np.float = np.std(contrast)

    # Get spectral flatness
    flatness: np.ndarray = librosa.feature.spectral_flatness(y=y)
    flatness_mean: np.float = np.mean(flatness)
    flatness_min: np.float = np.min(flatness)
    flatness_max: np.float = np.max(flatness)
    flatness_std: np.float = np.std(flatness)
    
    # Get roll-off frequency
    roll_off: np.ndarray = librosa.feature.spectral_rolloff(y=y, sr=sr, roll_percent=0.95)
    roll_off_mean: np.float = np.mean(roll_off)
    roll_off_min: np.float = np.min(roll_off)
    roll_off_max: np.float = np.max(roll_off)
    roll_off_std: np.float = np.std(roll_off)

    # tonal centroid features (tonnetz)
    tonnetz: np.ndarray = librosa.feature.tonnetz(y=y, sr=sr)
    tonnetz_mean: np.float = np.mean(tonnetz)
    tonnetz_min: np.float = np.min(tonnetz)
    tonnetz_max: np.float = np.max(tonnetz)
    tonnetz_std: np.float = np.std(tonnetz)
        
    # zero-crossing rate
    zero_crossing_rate: np.ndarray = librosa.feature.zero_crossing_rate(y)
    zero_crossing_rate_mean: np.float = np.mean(zero_crossing_rate)
    zero_crossing_rate_min: np.float = np.min(zero_crossing_rate)
    zero_crossing_rate_max: np.float = np.max(zero_crossing_rate)
    zero_crossing_rate_std: np.float = np.std(zero_crossing_rate)
    
    # d_harmonic, d_percussive
    d_harmonic, d_percussive = librosa.decompose.hpss(stft)
    d_harmonic_abs = np.abs(d_harmonic)
    d_percussive_abs = np.abs(d_percussive)
    
    d_harmonic_mean: np.float = np.mean(d_harmonic_abs)
    d_harmonic_min: np.float = np.min(d_harmonic_abs)
    d_harmonic_max: np.float = np.max(d_harmonic_abs)    
    d_harmonic_std: np.float = np.std(d_harmonic_abs)
        
    d_percussive_mean: np.float = np.mean(d_percussive_abs)
    d_percussive_min: np.float = np.min(d_percussive_abs)
    d_percussive_max: np.float = np.max(d_percussive_abs)    
    d_percussive_std: np.float = np.std(d_percussive_abs)  
    
    
    # Return a list of audio features aggregated by their average values
    return [spect_mean, spect_min, spect_max, spect_std,
            mel_spect_mean, mel_spect_min, mel_spect_max, mel_spect_std,
            chroma_mean, chroma_min, chroma_max, chroma_std,
            chroma_cq_mean, chroma_cq_min, chroma_cq_max, chroma_cq_std,
            chroma_cens_mean, chroma_cens_min, chroma_cens_max, chroma_cens_std,
            mfcc_mean, mfcc_min, mfcc_max, mfcc_std,
            rms_mean, rms_min, rms_max, rms_std,
            cent_mean, cent_min, cent_max, cent_std,
            spec_bw_mean, spec_bw_min, spec_bw_max, spec_bw_std,
            contrast_mean, contrast_min, contrast_max, contrast_std,
            flatness_mean, flatness_min, flatness_max, flatness_std,
            roll_off_mean, roll_off_min, roll_off_max, roll_off_std,
            tonnetz_mean, tonnetz_min, tonnetz_max, tonnetz_std,
            zero_crossing_rate_mean, zero_crossing_rate_min, zero_crossing_rate_max, zero_crossing_rate_std,
            d_harmonic_mean, d_harmonic_min, d_harmonic_max, d_harmonic_std,
            d_percussive_mean, d_percussive_min, d_percussive_max, d_percussive_std
           ]

In [206]:
df_test = get_all_sounds([-6, 0, 6], ["fan", "valve", "pump", "slider"])
# df_test = get_all_sounds([-6], ["fan"])
df.head()

Unnamed: 0,noise_db,machine_type,model_id,sound,sound_path,target
0,-6,fan,0,00000059.wav,assets/sounds/-6_dB_fan/id_00/abnormal/0000005...,abnormal
1,-6,fan,0,00000071.wav,assets/sounds/-6_dB_fan/id_00/abnormal/0000007...,abnormal
2,-6,fan,0,00000065.wav,assets/sounds/-6_dB_fan/id_00/abnormal/0000006...,abnormal
3,-6,fan,0,00000273.wav,assets/sounds/-6_dB_fan/id_00/abnormal/0000027...,abnormal
4,-6,fan,0,00000267.wav,assets/sounds/-6_dB_fan/id_00/abnormal/0000026...,abnormal


In [161]:
df_test.shape

(54057, 6)

In [214]:
df_test_sample = df_test.sample(frac=0.05)

In [216]:
df_test_sample.shape

(2703, 6)

In [201]:
df_test_sample.head()

Unnamed: 0,noise_db,machine_type,model_id,sound,sound_path,target
1118,0,pump,0,00000685.wav,assets/sounds/0_dB_pump/id_00/normal/00000685.wav,normal
574,6,slider,0,00000855.wav,assets/sounds/6_dB_slider/id_00/normal/0000085...,normal
1456,-6,fan,6,00000271.wav,assets/sounds/-6_dB_fan/id_06/abnormal/0000027...,abnormal
550,-6,fan,0,00000842.wav,assets/sounds/-6_dB_fan/id_00/normal/00000842.wav,normal
1709,0,slider,6,00000160.wav,assets/sounds/0_dB_slider/id_06/normal/0000016...,normal


In [145]:
df_test.head()

Unnamed: 0,noise_db,machine_type,model_id,sound,sound_path,target
0,-6,fan,0,00000059.wav,assets/sounds/-6_dB_fan/id_00/abnormal/0000005...,abnormal
1,-6,fan,0,00000071.wav,assets/sounds/-6_dB_fan/id_00/abnormal/0000007...,abnormal
2,-6,fan,0,00000065.wav,assets/sounds/-6_dB_fan/id_00/abnormal/0000006...,abnormal
3,-6,fan,0,00000273.wav,assets/sounds/-6_dB_fan/id_00/abnormal/0000027...,abnormal
4,-6,fan,0,00000267.wav,assets/sounds/-6_dB_fan/id_00/abnormal/0000026...,abnormal


In [150]:
import time

In [217]:
# Create list with column names
start = time.time()
# Code
audio_features_cols: List[str] = ["spect_mean", "spect_min", "spect_max", "spect_std",
            "mel_spect_mean", "mel_spect_min", "mel_spect_max", "mel_spect_std",
            "chroma_mean", "chroma_min", "chroma_max", "chroma_std",
            "chroma_cq_mean", "chroma_cq_min", "chroma_cq_max", "chroma_cq_std",
            "chroma_cens_mean", "chroma_cens_min", "chroma_cens_max", "chroma_cens_std",
            "mfcc_mean", "mfcc_min", "mfcc_max", "mfcc_std",
            "rms_mean", "rms_min", "rms_max", "rms_std",
            "cent_mean", "cent_min", "cent_max", "cent_std",
            "spec_bw_mean", "spec_bw_min", "spec_bw_max", "spec_bw_std",
            "contrast_mean", "contrast_min", "contrast_max", "contrast_std",
            "flatness_mean", "flatness_min", "flatness_max", "flatness_std",
            "roll_off_mean", "roll_off_min", "roll_off_max", "roll_off_std",
            "tonnetz_mean", "tonnetz_min", "tonnetz_max", "tonnetz_std",
            "zero_crossing_rate_mean", "zero_crossing_rate_min", "zero_crossing_rate_max", "zero_crossing_rate_std",
            "d_harmonic_mean", "d_harmonic_min", "d_harmonic_max", "d_harmonic_std",
            "d_percussive_mean", "d_percussive_min", "d_percussive_max", "d_percussive_std"
           ]

df_test_sample[audio_features_cols] = df_test_sample[["sound_path"]].apply(lambda x: pd.Series(get_audio_features(x.sound_path)), axis=1)

end = time.time()
print(end - start)


4529.137727975845


In [218]:
4529.137727975845/60

75.48562879959742

In [219]:
df_test_sample.head()

Unnamed: 0,noise_db,machine_type,model_id,sound,sound_path,target,spect_mean,spect_min,spect_max,spect_std,...,zero_crossing_rate_max,zero_crossing_rate_std,d_harmonic_mean,d_harmonic_min,d_harmonic_max,d_harmonic_std,d_percussive_mean,d_percussive_min,d_percussive_max,d_percussive_std
1938,-6,slider,6,00000325.wav,assets/sounds/-6_dB_slider/id_06/normal/000003...,normal,0.086186,1.850374e-10,8.282417,0.239661,...,0.183105,0.019458,0.051047,3.093022e-13,8.259317,0.1965,0.035139,8.307444e-11,3.125634,0.079335
773,0,pump,0,00000814.wav,assets/sounds/0_dB_pump/id_00/normal/00000814.wav,normal,0.064292,9.486077e-11,4.943036,0.152599,...,0.103516,0.011921,0.033057,1.974081e-13,2.383167,0.098131,0.031234,3.159423e-11,2.559869,0.068802
1047,6,valve,0,00000928.wav,assets/sounds/6_dB_valve/id_00/normal/00000928...,normal,0.028409,6.772467e-11,1.246882,0.074859,...,0.242188,0.035117,0.012415,7.563213e-14,0.95309,0.039711,0.015994,4.018324e-11,1.243116,0.048474
2212,-6,pump,6,00000043.wav,assets/sounds/-6_dB_pump/id_06/normal/00000043...,normal,0.09999,1.298539e-10,4.971423,0.284035,...,0.07373,0.005807,0.055119,5.787144e-14,4.255203,0.183855,0.044872,4.671323e-11,2.199901,0.11922
3750,-6,pump,2,00000420.wav,assets/sounds/-6_dB_pump/id_02/normal/00000420...,normal,0.094911,6.309869e-11,4.607907,0.261325,...,0.075684,0.006278,0.052051,9.006482e-14,4.094114,0.171043,0.04286,4.86516e-11,2.621902,0.111749


In [221]:
df_test_sample.to_csv("test_sample_csv", index=False)

In [220]:
df_test_sample.info()

<class 'pandas.core.frame.DataFrame'>
Int64Index: 2703 entries, 1938 to 2407
Data columns (total 70 columns):
 #   Column                   Non-Null Count  Dtype  
---  ------                   --------------  -----  
 0   noise_db                 2703 non-null   int64  
 1   machine_type             2703 non-null   object 
 2   model_id                 2703 non-null   object 
 3   sound                    2703 non-null   object 
 4   sound_path               2703 non-null   object 
 5   target                   2703 non-null   object 
 6   spect_mean               2703 non-null   float64
 7   spect_min                2703 non-null   float64
 8   spect_max                2703 non-null   float64
 9   spect_std                2703 non-null   float64
 10  mel_spect_mean           2703 non-null   float64
 11  mel_spect_min            2703 non-null   float64
 12  mel_spect_max            2703 non-null   float64
 13  mel_spect_std            2703 non-null   float64
 14  chroma_mean          

In [183]:
# Correlation matrix
df_test_sample.corr().style.background_gradient(cmap="coolwarm")

  result[rl.indexer] = arr
  smin = np.nanmin(s.to_numpy()) if vmin is None else vmin
  smax = np.nanmax(s.to_numpy()) if vmax is None else vmax


Unnamed: 0,noise_db,spect_mean,spect_min,spect_max,spect_std,mel_spect_mean,mel_spect_min,mel_spect_max,mel_spect_std,chroma_mean,chroma_min,chroma_max,chroma_std,chroma_cq_mean,chroma_cq_min,chroma_cq_max,chroma_cq_std,chroma_cens_mean,chroma_cens_min,chroma_cens_max,chroma_cens_std,mfcc_mean,mfcc_min,mfcc_max,mfcc_std,rms_mean,rms_min,rms_max,rms_std,cent_mean,cent_min,cent_max,cent_std,spec_bw_mean,spec_bw_min,spec_bw_max,spec_bw_std,contrast_mean,contrast_min,contrast_max,contrast_std,flatness_mean,flatness_min,flatness_max,flatness_std,roll_off_mean,roll_off_min,roll_off_max,roll_off_std,tonnetz_mean,tonnetz_min,tonnetz_max,tonnetz_std,zero_crossing_rate_mean,zero_crossing_rate_min,zero_crossing_rate_max,zero_crossing_rate_std,d_harmonic_mean,d_harmonic_min,d_harmonic_max,d_harmonic_std,d_percussive_mean,d_percussive_min,d_percussive_max,d_percussive_std
noise_db,1.0,-0.818569,-0.431014,-0.58725,-0.834063,-0.803411,-0.807058,-0.419357,-0.646603,-0.100932,-0.188702,,0.161623,-0.126195,-0.17272,,0.146718,-0.168653,-0.083626,0.187944,0.14462,-0.698731,-0.726201,-0.103367,0.721104,-0.850686,-0.823365,-0.611875,-0.275783,0.121663,0.032975,0.135478,0.161746,0.090165,-0.018686,0.079653,0.094469,0.143892,0.023206,0.083067,0.022627,0.300045,0.221761,0.077169,0.114566,0.078606,-0.01499,0.063512,0.075797,0.033834,-0.112366,0.162074,0.194026,0.092697,0.032,0.178565,0.188704,-0.015857,0.574682,-0.580293,-0.736126,0.02185,0.291437,-0.276986,-0.787049
spect_mean,-0.818569,1.0,0.421453,0.540186,0.840153,0.832478,0.851866,0.437004,0.639876,0.180408,0.30977,,-0.281154,0.15865,0.280612,,-0.277116,0.197179,0.285173,-0.193831,-0.21662,0.627126,0.875331,-0.070386,-0.92152,0.878026,0.853955,0.59152,0.281585,0.149413,0.240427,-0.213423,-0.29559,0.064697,0.195268,-0.18955,-0.243075,-0.141547,-0.056899,-0.145768,0.171054,-0.068281,0.034744,0.11374,0.057277,0.08792,0.194967,-0.138158,-0.227127,-0.046977,0.147595,-0.192555,-0.215005,0.179205,0.234185,-0.149361,-0.238887,0.019474,-0.503643,0.512983,0.73246,-0.023602,-0.285539,0.26804,0.84558
spect_min,-0.431014,0.421453,1.0,0.323137,0.463731,0.454237,0.460247,0.239602,0.386257,0.035998,0.0885,,-0.072733,-0.011328,0.037919,,-0.045718,0.0024,0.000451,-0.032792,-0.000614,0.361684,0.376458,0.040273,-0.363886,0.4705,0.456805,0.305358,0.118365,-0.095276,-0.023522,-0.162402,-0.162742,-0.078709,0.012531,-0.11191,-0.117234,-0.047195,0.021082,-0.133168,-0.065693,-0.2008,-0.129265,-0.065374,-0.096933,-0.070168,0.007951,-0.106328,-0.098645,0.015564,0.044931,-0.030587,-0.023734,-0.082696,-0.043426,-0.1572,-0.156764,0.026641,-0.333106,0.33063,0.430533,-0.028335,-0.160435,0.140576,0.384294
spect_max,-0.58725,0.540186,0.323137,1.0,0.752114,0.743028,0.556064,0.887408,0.879471,-0.035165,-0.069586,,0.035062,-0.155993,-0.200314,,0.205743,-0.139334,-0.151942,0.179525,0.142611,0.519781,0.394396,0.012342,-0.493612,0.714758,0.538724,0.824152,0.689843,-0.210824,-0.234175,-0.176784,-0.10179,-0.093958,-0.107174,-0.039295,-0.015542,-0.021218,0.048829,-0.126712,-0.163541,-0.373673,-0.431421,-0.150754,-0.200435,-0.106422,-0.124093,-0.071724,0.00841,0.124558,-0.089312,0.207096,0.167481,-0.274462,-0.250001,-0.2491,-0.155945,0.043904,-0.887417,0.890863,0.731095,-0.046486,-0.563644,0.565112,0.57792
spect_std,-0.834063,0.840153,0.463731,0.752114,1.0,0.985449,0.901077,0.572038,0.902033,-0.000321,0.129474,,-0.085994,-0.132713,-0.033304,,0.03657,-0.101532,-0.027389,0.058874,0.09894,0.711139,0.695626,0.160738,-0.704427,0.995277,0.908188,0.69724,0.410056,-0.268016,-0.155048,-0.373411,-0.311607,-0.239672,-0.102115,-0.272974,-0.199713,-0.079645,0.114852,-0.327137,-0.256472,-0.476853,-0.354663,-0.195468,-0.2692,-0.227758,-0.112344,-0.27152,-0.1434,-0.018695,-0.020961,-0.015659,0.050705,-0.265009,-0.174989,-0.405154,-0.341026,0.060999,-0.767309,0.777283,0.942291,-0.064627,-0.346174,0.336462,0.799238
mel_spect_mean,-0.803411,0.832478,0.454237,0.743028,0.985449,1.0,0.90591,0.583939,0.910908,0.02797,0.142565,,-0.10646,-0.101577,-0.010447,,0.012016,-0.073871,-0.010558,0.035001,0.069642,0.683805,0.667354,0.166945,-0.67738,0.978957,0.890869,0.715617,0.436053,-0.25791,-0.154524,-0.341729,-0.282418,-0.226767,-0.10351,-0.249846,-0.179821,-0.106836,0.096184,-0.303627,-0.233485,-0.437817,-0.331048,-0.183311,-0.247484,-0.215242,-0.111777,-0.247972,-0.125982,-0.006847,0.0066,-0.024024,0.022563,-0.255872,-0.16948,-0.37128,-0.309256,0.068161,-0.746621,0.755624,0.914304,-0.071707,-0.370173,0.358665,0.815583
mel_spect_min,-0.807058,0.851866,0.460247,0.556064,0.901077,0.90591,1.0,0.402783,0.72814,0.020811,0.190119,,-0.146979,-0.052285,0.101446,,-0.106652,-0.026811,0.083643,-0.058139,0.01871,0.634024,0.787496,0.081756,-0.723092,0.92225,0.965845,0.54733,0.122946,-0.161122,0.025702,-0.363415,-0.380916,-0.172444,0.046017,-0.303222,-0.308058,-0.094298,0.092921,-0.31124,-0.131326,-0.344667,-0.181705,-0.117047,-0.181772,-0.140501,0.043508,-0.275565,-0.26989,-0.076633,0.063931,-0.15662,-0.061585,-0.123691,-0.014438,-0.353981,-0.369507,0.017284,-0.568801,0.575801,0.831409,-0.021106,-0.227508,0.21344,0.760871
mel_spect_max,-0.419357,0.437004,0.239602,0.887408,0.572038,0.583939,0.402783,1.0,0.750918,0.014383,-0.039062,,-0.011971,-0.019539,-0.086536,,0.072644,-0.01788,-0.051689,0.063768,0.014116,0.36771,0.3124,-0.008922,-0.406126,0.535746,0.381414,0.817246,0.67914,-0.1088,-0.158478,-0.066913,-0.028939,-0.034985,-0.087381,0.023815,0.032587,-0.049513,0.027519,-0.041289,-0.07311,-0.215327,-0.295591,-0.061787,-0.091281,-0.045046,-0.098652,0.001776,0.042906,0.075827,-0.037021,0.104169,0.054353,-0.149244,-0.135695,-0.115857,-0.070432,0.02152,-0.702822,0.707527,0.52314,-0.024238,-0.63359,0.640934,0.512089
mel_spect_std,-0.646603,0.639876,0.386257,0.879471,0.902033,0.910908,0.72814,0.750918,1.0,-0.070263,-0.002206,,0.019403,-0.258438,-0.213007,,0.216322,-0.253334,-0.163934,0.22763,0.243525,0.57971,0.468931,0.102753,-0.527255,0.868336,0.709032,0.765249,0.608625,-0.270805,-0.23899,-0.2832,-0.185654,-0.189501,-0.144832,-0.162763,-0.090094,-0.000579,0.139342,-0.253224,-0.280233,-0.445573,-0.404275,-0.2023,-0.258935,-0.196717,-0.163151,-0.179469,-0.041476,0.080421,-0.11116,0.182493,0.236707,-0.319749,-0.266845,-0.347174,-0.235725,0.078655,-0.882049,0.886495,0.894073,-0.080369,-0.429006,0.42488,0.642318
chroma_mean,-0.100932,0.180408,0.035998,-0.035165,-0.000321,0.02797,0.020811,0.014383,-0.070263,1.0,0.848205,,-0.927767,0.447176,0.440469,,-0.449454,0.44387,0.41442,-0.299538,-0.467608,0.384396,0.086371,0.307741,-0.090703,0.019768,0.029972,0.095542,0.049129,-0.075065,-0.157832,0.081261,0.122183,-0.184032,-0.251134,0.060139,0.196033,-0.824231,-0.406176,0.035533,0.122147,0.140628,0.082469,0.089764,0.111581,-0.244157,-0.291513,0.017058,0.254609,0.326133,0.541729,-0.110851,-0.492691,-0.017463,-0.068301,0.096451,0.149533,-0.0243,0.181123,-0.17725,-0.260411,0.020361,-0.150386,0.165012,0.417029


In [187]:
from sklearn.ensemble import RandomForestClassifier
from sklearn.model_selection import train_test_split
from sklearn.metrics import classification_report

In [None]:
df_test_sample.apply()

In [195]:
print(0.081604+0.000000j.real)
print(0.081604+0.000000j.imag)
print(0.081604+0.000000j.real + 0.081604+0.000000j.real)

0.081604
0.081604
0.163208


In [191]:
# Select data
## select numeric columns
cols = df_test_sample.select_dtypes(include="number").columns.tolist()

## drop noise_db and model_id columns
# cols.remove("noise_db")
# cols.remove("model_id")

# select features and target
X = df_test_sample[cols]
y = df_test_sample.target

# Split into training and test sets
X_train, X_test, y_train, y_test = train_test_split(X, y, test_size = 0.2, random_state=42, stratify=y)

# Create classifier and fit it to the training set
# random_classifier = RandomForestClassifier(n_estimators=100, max_depth=10, random_state=5, criterion = 'entropy')
random_classifier = RandomForestClassifier(n_estimators=100, random_state=5, criterion = 'entropy')
random_classifier.fit(X_train, y_train)

# Compute model score
random_classifier.score(X_test, y_test)

# Compute model score
print("model score: ", random_classifier.score(X_test, y_test))

# Evaluate model
print(classification_report(y_test, random_classifier.predict(X_test)))

ValueError: Complex data not supported
      noise_db          spect_mean                   spect_min  \
2089        -6  0.081604+0.000000j  7.126549e-11+0.000000e+00j   
659          0  0.059857+0.000000j  5.390232e-11+0.000000e+00j   
3571         0  0.046802+0.000000j  1.659626e-11+0.000000e+00j   
1143        -6  0.070292+0.000000j  1.743206e-10+0.000000e+00j   
1933         0  0.056599+0.000000j  1.039749e-10+0.000000e+00j   
...        ...                 ...                         ...   
58           6  0.055893+0.000000j  1.583214e-12+0.000000e+00j   
3061         0  0.052901+0.000000j  7.196674e-11+0.000000e+00j   
5221        -6  0.109528+0.000000j  2.487300e-11+0.000000e+00j   
2857        -6  0.106693+0.000000j  6.941863e-11+0.000000e+00j   
3805         0  0.064577+0.000000j  4.479243e-12+0.000000e+00j   

                spect_max           spect_std      mel_spect_mean  \
2089  11.803023+0.000000j  0.263863+0.000000j  0.022785+0.000000j   
659    2.479441+0.000000j  0.135329+0.000000j  0.006421+0.000000j   
3571   5.095831+0.000000j  0.132824+0.000000j  0.005585+0.000000j   
1143   5.835920+0.000000j  0.258383+0.000000j  0.021745+0.000000j   
1933   3.455087+0.000000j  0.184644+0.000000j  0.011390+0.000000j   
...                   ...                 ...                 ...   
58     6.959498+0.000000j  0.187515+0.000000j  0.010971+0.000000j   
3061   2.652793+0.000000j  0.094038+0.000000j  0.002741+0.000000j   
5221   7.561564+0.000000j  0.225898+0.000000j  0.018132+0.000000j   
2857   8.413273+0.000000j  0.232345+0.000000j  0.017742+0.000000j   
3805   3.441570+0.000000j  0.136172+0.000000j  0.005730+0.000000j   

                   mel_spect_min       mel_spect_max       mel_spect_std  \
2089  6.420037e-17+0.000000e+00j  6.910113+0.000000j  0.128214+0.000000j   
659   3.175889e-17+0.000000e+00j  0.293584+0.000000j  0.016591+0.000000j   
3571  2.478964e-17+0.000000e+00j  1.065357+0.000000j  0.025722+0.000000j   
1143  9.130419e-17+0.000000e+00j  2.164843+0.000000j  0.091968+0.000000j   
1933  4.065848e-17+0.000000e+00j  0.612413+0.000000j  0.035148+0.000000j   
...                          ...                 ...                 ...   
58    5.811580e-18+0.000000e+00j  2.069100+0.000000j  0.084539+0.000000j   
3061  5.378071e-18+0.000000e+00j  0.527031+0.000000j  0.010458+0.000000j   
5221  1.085001e-16+0.000000e+00j  2.886674+0.000000j  0.059678+0.000000j   
2857  1.081828e-16+0.000000e+00j  4.049874+0.000000j  0.073802+0.000000j   
3805  3.509027e-17+0.000000e+00j  0.475705+0.000000j  0.016950+0.000000j   

             chroma_mean  ...  zero_crossing_rate_max  zero_crossing_rate_std  \
2089  0.816283+0.000000j  ...      0.108398+0.000000j      0.013084+0.000000j   
659   0.814680+0.000000j  ...      0.123047+0.000000j      0.011751+0.000000j   
3571  0.502028+0.000000j  ...      0.192871+0.000000j      0.027765+0.000000j   
1143  0.728500+0.000000j  ...      0.146484+0.000000j      0.016720+0.000000j   
1933  0.725061+0.000000j  ...      0.057617+0.000000j      0.004934+0.000000j   
...                  ...  ...                     ...                     ...   
58    0.443376+0.000000j  ...      0.150391+0.000000j      0.029924+0.000000j   
3061  0.834701+0.000000j  ...      0.322754+0.000000j      0.083647+0.000000j   
5221  0.680066+0.000000j  ...      0.132812+0.000000j      0.009238+0.000000j   
2857  0.626196+0.000000j  ...      0.166504+0.000000j      0.020464+0.000000j   
3805  0.566989+0.000000j  ...      0.170898+0.000000j      0.010242+0.000000j   

         d_harmonic_mean      d_harmonic_min      d_harmonic_max  \
2089 -0.000014+0.000037j -8.531141+0.151785j  6.484267-1.267645j   
659  -0.000012-0.000001j -1.484308-1.059371j  1.684317-0.206739j   
3571 -0.000014-0.000002j -3.514175+2.156438j  3.183606+0.041102j   
1143  0.000016-0.000036j -4.787023-0.224968j  4.817464+0.779367j   
1933  0.000012+0.000025j -3.135149+0.105760j  2.861496+0.541014j   
...                  ...                 ...                 ...   
58   -0.000016-0.000014j -5.636914+1.217741j  5.940217-0.134271j   
3061  0.000003+0.000011j -1.041576+0.216147j  1.022812-0.110501j   
5221 -0.000008-0.000011j -6.276908+3.876319j  4.701975-2.102709j   
2857  0.000007-0.000012j -5.573461+1.512027j  6.964580-4.262686j   
3805  0.000011+0.000023j -3.183712-0.203141j  3.275067-0.633275j   

          d_harmonic_std   d_percussive_mean    d_percussive_min  \
2089  0.186793+0.000000j  0.000017-0.000036j -2.433637+0.305716j   
659   0.087042+0.000000j  0.000010+0.000002j -1.115007+0.708265j   
3571  0.109294+0.000000j  0.000014+0.000003j -1.075981+0.250809j   
1143  0.191232+0.000000j -0.000015+0.000036j -2.726436+0.542733j   
1933  0.145985+0.000000j -0.000012-0.000024j -0.983688-0.343581j   
...                  ...                 ...                 ...   
58    0.179283+0.000000j  0.000016+0.000014j -2.187891+0.000000j   
3061  0.053472+0.000000j -0.000005-0.000010j -2.115894-0.373354j   
5221  0.187434+0.000000j  0.000008+0.000011j -1.031698+0.117612j   
2857  0.209960+0.000000j -0.000007+0.000012j -1.335813-0.820756j   
3805  0.113439+0.000000j -0.000010-0.000023j -1.556014+0.435141j   

        d_percussive_max    d_percussive_std  
2089  2.304126-0.010769j  0.113944+0.000000j  
659   0.981916-0.219702j  0.069201+0.000000j  
3571  1.127088+0.114869j  0.062705+0.000000j  
1143  2.167995-0.721575j  0.097879+0.000000j  
1933  1.017351+0.311972j  0.066819+0.000000j  
...                  ...                 ...  
58    2.064788-0.655933j  0.049365+0.000000j  
3061  2.262675+0.186584j  0.064815+0.000000j  
5221  1.059790-0.000000j  0.091390+0.000000j  
2857  1.337479-0.104693j  0.078018+0.000000j  
3805  1.439874-0.172479j  0.060330+0.000000j  

[1297 rows x 65 columns]


In [None]:
importances = random_forest.feature_importances_
std = np.std([tree.feature_importances_ for tree in random_forest.estimators_],
             axis=0)
indices = np.argsort(importances)[::-1]

# Print the feature ranking
print("Feature ranking:")

for f in range(X_train.shape[1]):
    print("%d. feature %d (%f)" % (f + 1, indices[f], importances[indices[f]]))

# Plot the impurity-based feature importances of the forest
plt.figure()
plt.title("Feature importances")
plt.bar(range(X_train.shape[1]), importances[indices],
        color="r", yerr=std[indices], align="center")
plt.xticks(range(X_train.shape[1]), indices)
plt.xlim([-1, X_train.shape[1]])
plt.show()

In [73]:
# Create list with column names
audio_features_cols: List[str] = ["spectogram", "mel_spectogram",
                   "chromagram", "chromagram_cq", "chromagram_cens",
                   "mfcc", "rms", "spectral_centroid", 
                   "spectral_bandwidth", "spectral_contrast", "spectral_flatness",
                   "roll_off_frequency", "tonnetz", "zero_crossing_rate"]

# Create empty columns
for col in audio_features_cols:
    df_test[col] = np.nan

for idx, sound in enumerate(df_test.sound_path.values):
    feature_values_list = get_audio_features(sound)
#     print(feature_values_list)
    for i in range(len(feature_values_list)):
#         print(idx, str(audio_features_cols[i]), feature_values_list[i])
        df_test[audio_features_cols[i]].iloc[idx] = feature_values_list[i]

A value is trying to be set on a copy of a slice from a DataFrame.
Try using .loc[row_indexer,col_indexer] = value instead

See the caveats in the documentation: https://pandas.pydata.org/pandas-docs/stable/user_guide/indexing.html#returning-a-view-versus-a-copy
  df_test[col] = np.nan
A value is trying to be set on a copy of a slice from a DataFrame

See the caveats in the documentation: https://pandas.pydata.org/pandas-docs/stable/user_guide/indexing.html#returning-a-view-versus-a-copy
  self._setitem_single_block(indexer, value, name)
A value is trying to be set on a copy of a slice from a DataFrame

See the caveats in the documentation: https://pandas.pydata.org/pandas-docs/stable/user_guide/indexing.html#returning-a-view-versus-a-copy
  iloc._setitem_with_indexer(indexer, value, self.name)


In [None]:
# Create list with column names
audio_features_cols: List[str] = ["spectogram", "mel_spectogram",
                   "chromagram", "chromagram_cq", "chromagram_cens",
                   "mfcc", "rms", "spectral_centroid", 
                   "spectral_bandwidth", "spectral_contrast", "spectral_flatness",
                   "roll_off_frequency", "tonnetz", "zero_crossing_rate"]

# Create empty columns
for col in audio_features_cols:
    df_test[col] = np.nan

for idx, sound in enumerate(df_test.sound_path.values):
    feature_values_list = get_audio_features(sound)
#     print(feature_values_list)
    for i in range(len(feature_values_list)):
#         print(idx, str(audio_features_cols[i]), feature_values_list[i])
        df_test[audio_features_cols[i]].iloc[idx] = feature_values_list[i]

In [78]:
# Create list with column names
audio_features_cols: List[str] = ["spectogram", "mel_spectogram",
                   "chromagram", "chromagram_cq", "chromagram_cens",
                   "mfcc", "rms", "spectral_centroid", 
                   "spectral_bandwidth", "spectral_contrast", "spectral_flatness",
                   "roll_off_frequency", "tonnetz", "zero_crossing_rate"]

# Create empty columns
for col in audio_features_cols:
    df_test[col] = np.nan
df_test.head()

A value is trying to be set on a copy of a slice from a DataFrame.
Try using .loc[row_indexer,col_indexer] = value instead

See the caveats in the documentation: https://pandas.pydata.org/pandas-docs/stable/user_guide/indexing.html#returning-a-view-versus-a-copy
  df_test[col] = np.nan


Unnamed: 0,noise_db,machine_type,model_id,sound,sound_path,target,spectogram,mel_spectogram,chromagram,chromagram_cq,chromagram_cens,mfcc,rms,spectral_centroid,spectral_bandwidth,spectral_contrast,spectral_flatness,roll_off_frequency,tonnetz,zero_crossing_rate
0,-6,fan,0,00000059.wav,assets/sounds/-6_dB_fan/id_00/abnormal/0000005...,abnormal,,,,,,,,,,,,,,
1,-6,fan,0,00000071.wav,assets/sounds/-6_dB_fan/id_00/abnormal/0000007...,abnormal,,,,,,,,,,,,,,
2,-6,fan,0,00000065.wav,assets/sounds/-6_dB_fan/id_00/abnormal/0000006...,abnormal,,,,,,,,,,,,,,
3,-6,fan,0,00000273.wav,assets/sounds/-6_dB_fan/id_00/abnormal/0000027...,abnormal,,,,,,,,,,,,,,
4,-6,fan,0,00000267.wav,assets/sounds/-6_dB_fan/id_00/abnormal/0000026...,abnormal,,,,,,,,,,,,,,


In [79]:
audio_features_list_all_sounds.append(df_test.sound_path.values)

In [92]:
audio_features_list_all_sounds = []
for sound_path in df_test.sound_path.values:
    audio_features_list = get_audio_features(sound_path)
    audio_features_list.append(sound_path)
    audio_features_list_all_sounds.append(audio_features_list)
audio_features_list_all_sounds

[[0.08797987,
  0.03412381,
  0.51950985,
  0.3487876,
  0.24360700893714782,
  -4.0447407,
  0.007393384808553433,
  1180.264030852038,
  1579.0006992822268,
  24.305225494964127,
  0.00013324826,
  5154.828999601218,
  0.04250314218185303,
  0.049592834614269145,
  'assets/sounds/-6_dB_fan/id_00/abnormal/00000059.wav'],
 [0.100927845,
  0.039812844,
  0.75366986,
  0.38985088,
  0.2539249235509849,
  -3.4927447,
  0.007938459534366313,
  1036.5130803953173,
  1375.484191023654,
  21.28200050194736,
  0.00011218998,
  4208.891873459252,
  0.050503582874733076,
  0.03764976979408353,
  'assets/sounds/-6_dB_fan/id_00/abnormal/00000071.wav'],
 [0.08988863,
  0.03654531,
  0.53370386,
  0.35151482,
  0.24256478440084428,
  -3.9055371,
  0.007584500401487915,
  1271.4472060240762,
  1735.2508219906167,
  24.272329736071338,
  0.00014936722,
  5743.070144830336,
  0.0437188022909946,
  0.04943649398201856,
  'assets/sounds/-6_dB_fan/id_00/abnormal/00000065.wav'],
 [0.0902425,
  0.025801158,

In [123]:
audio_features_list_all_sounds = []
for sound_path in df_test.sound_path.values:
    audio_features_list = get_audio_features(sound_path)
#     audio_features_list.append(sound_path)
    audio_features_list_all_sounds.append(audio_features_list)

# Create list with column names
audio_features_cols: List[str] = ["spectogram", "mel_spectogram",
                   "chromagram", "chromagram_cq", "chromagram_cens",
                   "mfcc", "rms", "spectral_centroid", 
                   "spectral_bandwidth", "spectral_contrast", "spectral_flatness",
                   "roll_off_frequency", "tonnetz", "zero_crossing_rate"]
df_audio_features = pd.DataFrame(data=audio_features_list_all_sounds,columns=audio_features_cols)

df_all = pd.concat([df_test, df_audio_features], axis=1)

In [124]:
df_all.head()

Unnamed: 0,noise_db,machine_type,model_id,sound,sound_path,target,spectogram,mel_spectogram,chromagram,chromagram_cq,chromagram_cens,mfcc,rms,spectral_centroid,spectral_bandwidth,spectral_contrast,spectral_flatness,roll_off_frequency,tonnetz,zero_crossing_rate
0,-6,fan,0,00000059.wav,assets/sounds/-6_dB_fan/id_00/abnormal/0000005...,abnormal,0.08798,0.034124,0.51951,0.348788,0.243607,-4.044741,0.007393,1180.264031,1579.000699,24.305225,0.000133,5154.829,0.042503,0.049593
1,-6,fan,0,00000071.wav,assets/sounds/-6_dB_fan/id_00/abnormal/0000007...,abnormal,0.100928,0.039813,0.75367,0.389851,0.253925,-3.492745,0.007938,1036.51308,1375.484191,21.282001,0.000112,4208.891873,0.050504,0.03765
2,-6,fan,0,00000065.wav,assets/sounds/-6_dB_fan/id_00/abnormal/0000006...,abnormal,0.089889,0.036545,0.533704,0.351515,0.242565,-3.905537,0.007585,1271.447206,1735.250822,24.27233,0.000149,5743.070145,0.043719,0.049436
3,-6,fan,0,00000273.wav,assets/sounds/-6_dB_fan/id_00/abnormal/0000027...,abnormal,0.090242,0.025801,0.730573,0.508074,0.269959,-3.973263,0.006392,1050.479779,1347.219254,21.13812,0.000141,4106.271922,-0.022377,0.044327
4,-6,fan,0,00000267.wav,assets/sounds/-6_dB_fan/id_00/abnormal/0000026...,abnormal,0.087292,0.022661,0.583526,0.484912,0.270946,-4.673796,0.005993,1414.325444,1763.148493,23.825591,0.000246,5714.992047,-0.038684,0.066287


In [125]:
df_all.shape

(5550, 20)

In [95]:
df_audio_features.sound_path[1]

'assets/sounds/-6_dB_fan/id_00/abnormal/00000071.wav'

In [110]:
df_test.info()

<class 'pandas.core.frame.DataFrame'>
RangeIndex: 5 entries, 0 to 4
Data columns (total 6 columns):
 #   Column        Non-Null Count  Dtype 
---  ------        --------------  ----- 
 0   noise_db      5 non-null      int64 
 1   machine_type  5 non-null      object
 2   model_id      5 non-null      object
 3   sound         5 non-null      object
 4   sound_path    5 non-null      object
 5   target        5 non-null      object
dtypes: int64(1), object(5)
memory usage: 368.0+ bytes


In [109]:
df_audio_features.info()

<class 'pandas.core.frame.DataFrame'>
RangeIndex: 5 entries, 0 to 4
Data columns (total 15 columns):
 #   Column              Non-Null Count  Dtype  
---  ------              --------------  -----  
 0   spectogram          5 non-null      float64
 1   mel_spectogram      5 non-null      float64
 2   chromagram          5 non-null      float64
 3   chromagram_cq       5 non-null      float64
 4   chromagram_cens     5 non-null      float64
 5   mfcc                5 non-null      float64
 6   rms                 5 non-null      float64
 7   spectral_centroid   5 non-null      float64
 8   spectral_bandwidth  5 non-null      float64
 9   spectral_contrast   5 non-null      float64
 10  spectral_flatness   5 non-null      float64
 11  roll_off_frequency  5 non-null      float64
 12  tonnetz             5 non-null      float64
 13  zero_crossing_rate  5 non-null      float64
 14  sound_path          5 non-null      object 
dtypes: float64(14), object(1)
memory usage: 728.0+ bytes


In [115]:
df_all = pd.concat([df_test, df_audio_features], axis=1)

In [116]:
df_all

Unnamed: 0,noise_db,machine_type,model_id,sound,sound_path,target,spectogram,mel_spectogram,chromagram,chromagram_cq,...,mfcc,rms,spectral_centroid,spectral_bandwidth,spectral_contrast,spectral_flatness,roll_off_frequency,tonnetz,zero_crossing_rate,sound_path.1
0,-6,fan,0,00000059.wav,assets/sounds/-6_dB_fan/id_00/abnormal/0000005...,abnormal,0.08798,0.034124,0.51951,0.348788,...,-4.044741,0.007393,1180.264031,1579.000699,24.305225,0.000133,5154.829,0.042503,0.049593,assets/sounds/-6_dB_fan/id_00/abnormal/0000005...
1,-6,fan,0,00000071.wav,assets/sounds/-6_dB_fan/id_00/abnormal/0000007...,abnormal,0.100928,0.039813,0.75367,0.389851,...,-3.492745,0.007938,1036.51308,1375.484191,21.282001,0.000112,4208.891873,0.050504,0.03765,assets/sounds/-6_dB_fan/id_00/abnormal/0000007...
2,-6,fan,0,00000065.wav,assets/sounds/-6_dB_fan/id_00/abnormal/0000006...,abnormal,0.089889,0.036545,0.533704,0.351515,...,-3.905537,0.007585,1271.447206,1735.250822,24.27233,0.000149,5743.070145,0.043719,0.049436,assets/sounds/-6_dB_fan/id_00/abnormal/0000006...
3,-6,fan,0,00000273.wav,assets/sounds/-6_dB_fan/id_00/abnormal/0000027...,abnormal,0.090242,0.025801,0.730573,0.508074,...,-3.973263,0.006392,1050.479779,1347.219254,21.13812,0.000141,4106.271922,-0.022377,0.044327,assets/sounds/-6_dB_fan/id_00/abnormal/0000027...
4,-6,fan,0,00000267.wav,assets/sounds/-6_dB_fan/id_00/abnormal/0000026...,abnormal,0.087292,0.022661,0.583526,0.484912,...,-4.673796,0.005993,1414.325444,1763.148493,23.825591,0.000246,5714.992047,-0.038684,0.066287,assets/sounds/-6_dB_fan/id_00/abnormal/0000026...


In [96]:
get_audio_features("assets/sounds/-6_dB_fan/id_00/abnormal/00000071.wav")

[0.100927845,
 0.039812844,
 0.75366986,
 0.38985088,
 0.2539249235509849,
 -3.4927447,
 0.007938459534366313,
 1036.5130803953173,
 1375.484191023654,
 21.28200050194736,
 0.00011218998,
 4208.891873459252,
 0.050503582874733076,
 0.03764976979408353]

In [74]:
df_test.head()

Unnamed: 0,noise_db,machine_type,model_id,sound,sound_path,target,spectogram,mel_spectogram,chromagram,chromagram_cq,chromagram_cens,mfcc,rms,spectral_centroid,spectral_bandwidth,spectral_contrast,spectral_flatness,roll_off_frequency,tonnetz,zero_crossing_rate
0,-6,fan,0,00000059.wav,assets/sounds/-6_dB_fan/id_00/abnormal/0000005...,abnormal,0.08798,0.034124,0.51951,0.348788,0.243607,-4.044741,0.007393,1180.264031,1579.000699,24.305225,0.000133,5154.829,0.042503,0.049593
1,-6,fan,0,00000071.wav,assets/sounds/-6_dB_fan/id_00/abnormal/0000007...,abnormal,0.100928,0.039813,0.75367,0.389851,0.253925,-3.492745,0.007938,1036.51308,1375.484191,21.282001,0.000112,4208.891873,0.050504,0.03765
2,-6,fan,0,00000065.wav,assets/sounds/-6_dB_fan/id_00/abnormal/0000006...,abnormal,0.089889,0.036545,0.533704,0.351515,0.242565,-3.905537,0.007585,1271.447206,1735.250822,24.27233,0.000149,5743.070145,0.043719,0.049436
3,-6,fan,0,00000273.wav,assets/sounds/-6_dB_fan/id_00/abnormal/0000027...,abnormal,0.090242,0.025801,0.730573,0.508074,0.269959,-3.973263,0.006392,1050.479779,1347.219254,21.13812,0.000141,4106.271922,-0.022377,0.044327
4,-6,fan,0,00000267.wav,assets/sounds/-6_dB_fan/id_00/abnormal/0000026...,abnormal,0.087292,0.022661,0.583526,0.484912,0.270946,-4.673796,0.005993,1414.325444,1763.148493,23.825591,0.000246,5714.992047,-0.038684,0.066287


In [39]:
get_audio_features(df_test.sound_path.iloc[2])

[0.08988863,
 0.03654531,
 0.53370386,
 0.35151482,
 0.24256478440084428,
 -3.9055371,
 0.007584500401487915,
 1271.4472060240762,
 1735.2508219906167,
 24.272329736071338,
 0.00014936722,
 5743.070144830336,
 0.0437188022909946,
 0.04943649398201856]

In [9]:
df.head()

Unnamed: 0,noise_db,machine_type,model_id,sound,sound_path,target,spectogram,mel_spectogram,chromagram,chromagram_cq,chromagram_cens,mfcc,rms,spectral_centroid,spectral_bandwidth,spectral_contrast,spectral_flatness,roll_off_frequency,tonnetz,zero_crossing_rate
0,-6,slider,0,00000059.wav,assets/sounds/-6_db_slider/id_00/abnormal/0000...,abnormal,0.103656,0.028626,0.508862,0.514267,0.264862,-5.768414,0.006748,1746.776097,1955.708303,24.889785,0.000356,6312.725788,-0.014067,0.077769
1,-6,slider,0,00000071.wav,assets/sounds/-6_db_slider/id_00/abnormal/0000...,abnormal,0.102113,0.025405,0.592927,0.509345,0.265619,-5.547102,0.00639,1762.671976,1924.965304,24.371674,0.000404,6249.724931,-0.02664,0.080915
2,-6,slider,0,00000065.wav,assets/sounds/-6_db_slider/id_00/abnormal/0000...,abnormal,0.103404,0.029101,0.615363,0.51919,0.26435,-5.850099,0.006801,1797.617686,2018.689316,23.995691,0.000357,6408.051429,-0.015122,0.074452
3,-6,slider,0,00000273.wav,assets/sounds/-6_db_slider/id_00/abnormal/0000...,abnormal,0.120839,0.029576,0.674066,0.608943,0.279228,-4.68586,0.006914,1693.40478,1843.512061,21.976415,0.000446,5747.141969,-0.006995,0.082713
4,-6,slider,0,00000267.wav,assets/sounds/-6_db_slider/id_00/abnormal/0000...,abnormal,0.114762,0.032621,0.658467,0.62444,0.276571,-4.298293,0.007232,1503.216934,1768.753095,21.985056,0.000396,5490.966797,-0.000406,0.065476


In [10]:
df.info()

<class 'pandas.core.frame.DataFrame'>
RangeIndex: 100 entries, 0 to 99
Data columns (total 20 columns):
 #   Column              Non-Null Count  Dtype  
---  ------              --------------  -----  
 0   noise_db            100 non-null    int64  
 1   machine_type        100 non-null    object 
 2   model_id            100 non-null    object 
 3   sound               100 non-null    object 
 4   sound_path          100 non-null    object 
 5   target              100 non-null    object 
 6   spectogram          100 non-null    float64
 7   mel_spectogram      100 non-null    float64
 8   chromagram          100 non-null    float64
 9   chromagram_cq       100 non-null    float64
 10  chromagram_cens     100 non-null    float64
 11  mfcc                100 non-null    float64
 12  rms                 100 non-null    float64
 13  spectral_centroid   100 non-null    float64
 14  spectral_bandwidth  100 non-null    float64
 15  spectral_contrast   100 non-null    float64
 16  spectral_

In [11]:
df.isnull().any()

noise_db              False
machine_type          False
model_id              False
sound                 False
sound_path            False
target                False
spectogram            False
mel_spectogram        False
chromagram            False
chromagram_cq         False
chromagram_cens       False
mfcc                  False
rms                   False
spectral_centroid     False
spectral_bandwidth    False
spectral_contrast     False
spectral_flatness     False
roll_off_frequency    False
tonnetz               False
zero_crossing_rate    False
dtype: bool

In [None]:
# Get the relative path of the directory that contains all the sound files
folder_path = os.path.join("assets", "sounds", "6_dB_test")
sound_path_list = []

# Get all the filenames within the directory
for path, dirs, files in os.walk(folder_path):
    for filename in files:
        # Search only filenames with the extension ".wav"
        if filename.lower().endswith(".wav"):

            # Get the filename
            sound_list.append(filename)

            # Get the filepath
            sound_path = os.path.join(path, filename)
            sound_path_list.append(sound_path)

            # Split filepath to retrieve the information
            path_splitted = sound_path.split("/")

            # Get the background noise in db
            noise_db = int(path_splitted[2].split("_")[0])
            noise_db_list.append(noise_db)

            # Get the machine type
            machine_type = path_splitted[2].split("_")[2]
            machine_type_list.append(machine_type)

            # Get the model id
            model_id = path_splitted[3].split("_")[1]
            model_id_list.append(model_id)

            # Get target variable (normal aor anormal)
            target = path_splitted[4]
            target_list.append(target)
print()

In [126]:
df_all.to_csv("-6_dB_fan_csv", index=False)

In [128]:
df_all.describe().T

Unnamed: 0,count,mean,std,min,25%,50%,75%,max
noise_db,5550.0,-6.0,0.0,-6.0,-6.0,-6.0,-6.0,-6.0
spectogram,5550.0,0.095662,0.012191,0.051801,0.087735,0.095607,0.103148,0.135741
mel_spectogram,5550.0,0.023828,0.005022,0.00605,0.021274,0.024094,0.025627,0.054556
chromagram,5550.0,0.696104,0.117973,0.36668,0.591525,0.741141,0.806373,0.881168
chromagram_cq,5550.0,0.596425,0.081941,0.221833,0.54689,0.60683,0.654831,0.777581
chromagram_cens,5550.0,0.273416,0.007121,0.189166,0.271032,0.27482,0.27767,0.286754
mfcc,5550.0,-4.778437,0.882483,-8.020124,-5.553728,-4.409894,-4.136958,-2.680073
rms,5550.0,0.006165,0.000584,0.003287,0.005858,0.006197,0.006413,0.009236
spectral_centroid,5550.0,1348.625165,295.305422,614.600016,1133.415694,1334.580598,1489.537732,2938.424171
spectral_bandwidth,5550.0,1577.98597,185.43111,1036.590126,1424.885814,1589.900545,1710.724472,2323.834034


In [129]:
df_all.isnull().any()

noise_db              False
machine_type          False
model_id              False
sound                 False
sound_path            False
target                False
spectogram            False
mel_spectogram        False
chromagram            False
chromagram_cq         False
chromagram_cens       False
mfcc                  False
rms                   False
spectral_centroid     False
spectral_bandwidth    False
spectral_contrast     False
spectral_flatness     False
roll_off_frequency    False
tonnetz               False
zero_crossing_rate    False
dtype: bool

In [133]:
df_all.iloc[100]

noise_db                                                             -6
machine_type                                                        fan
model_id                                                             00
sound                                                      00000198.wav
sound_path            assets/sounds/-6_dB_fan/id_00/abnormal/0000019...
target                                                         abnormal
spectogram                                                     0.089376
mel_spectogram                                                 0.021159
chromagram                                                     0.791502
chromagram_cq                                                  0.468329
chromagram_cens                                                0.264135
mfcc                                                             -4.083
rms                                                            0.005409
spectral_centroid                                           1360