In [43]:
# import and init constants 

import librosa
import os
import json

DATASET_PATH = "Data"
JSON_PATH = "Data\Speech_Commands_Data_Set.json"
SAMPLES_TO_CONSIDER = 22050

In [44]:
def prepare_dataset(dataset_path, json_path, n_mfcc=13, hop_length=512, n_fft=2048):
    # data dictionary
    data = {
        "mappings": [],
        "labels": [],
        "MFCCs": [],
        "files": []
    }
    for i, (dirpath, dirnames, filenames) in enumerate(os.walk(dataset_path)):
        # ensure not at datset level
        if dirpath is not dataset_path:
            # update mappings
            label = dirpath.split("\\")[-1]
            data["mappings"].append(label)
            print("\nProcessing: '{}'".format(label))

            for f in filenames:
                # get file path
                file_path = os.path.join(dirpath, f)

                # load audio file
                signal, sr = librosa.load(file_path)

                # ensure the audio file is longer than 1 sec
                if len(signal) >= SAMPLES_TO_CONSIDER:  
                    # enforce 1 sec long
                    signal = signal[:SAMPLES_TO_CONSIDER]
                    # extract MFCCs 
                    MFCCs = librosa.feature.mfcc(y=signal, n_mfcc=n_mfcc, hop_length=hop_length, n_fft=n_fft)
                    # store the data
                    data["labels"].append(i-1)
                    data["files"].append(file_path)
                    data["MFCCs"].append(MFCCs.T.tolist())
                    print(f"{file_path}: {i-1}")
    with open(json_path, "w") as fp:
        json.dump(data, fp, indent=4)

In [45]:
prepare_dataset(DATASET_PATH, JSON_PATH)
print("\n\n-------------------------------------\n\nProcess Complete!")


Processing: 'bed'
Data\bed\00176480_nohash_0.wav: 0
Data\bed\004ae714_nohash_0.wav: 0
Data\bed\00f0204f_nohash_0.wav: 0
Data\bed\00f0204f_nohash_1.wav: 0
Data\bed\012c8314_nohash_0.wav: 0
Data\bed\012c8314_nohash_1.wav: 0
Data\bed\0132a06d_nohash_0.wav: 0
Data\bed\0135f3f2_nohash_0.wav: 0
Data\bed\0137b3f4_nohash_0.wav: 0
Data\bed\01648c51_nohash_0.wav: 0
Data\bed\01648c51_nohash_1.wav: 0
Data\bed\016e2c6d_nohash_0.wav: 0
Data\bed\01b4757a_nohash_0.wav: 0
Data\bed\01bcfc0c_nohash_0.wav: 0
Data\bed\026290a7_nohash_0.wav: 0
Data\bed\02746d24_nohash_0.wav: 0
Data\bed\035de8fe_nohash_0.wav: 0
Data\bed\0362539c_nohash_0.wav: 0
Data\bed\037c445a_nohash_0.wav: 0
Data\bed\0397ecda_nohash_0.wav: 0
Data\bed\03c96658_nohash_0.wav: 0
Data\bed\042186b8_nohash_0.wav: 0
Data\bed\042ea76c_nohash_0.wav: 0
Data\bed\0447d7c1_nohash_0.wav: 0
Data\bed\0474c92a_nohash_1.wav: 0
Data\bed\05b2db80_nohash_0.wav: 0
Data\bed\05d18852_nohash_0.wav: 0
Data\bed\06076b6b_nohash_0.wav: 0
Data\bed\060cd039_nohash_0.wa