In [12]:
import os
import librosa
import math
import json

DATASET_PATH = "wavfiles"
JSON_PATH = "data2.json"
SAMPLE_RATE = 22050

## NEED TO UPDATE TO BE DYNAMIC
DURATION = 30 # measured in seconds, but wont work for us
##

SAMPLES_PER_TRACK = SAMPLE_RATE * DURATION

def save_mfcc(dataset_path, json_path, n_mfcc=13, n_fft=2048, hop_length=512, num_segments=5):
    # build a dictionary to store data
    data = {
        "mapping": [], #"mapping": ["clacky","thocky"], # 0 = clacky, 1 = thocky
        "mfcc": [], # training inputs
        "labels": [] # target outputs
    }
    
    num_samples_per_segment = int(SAMPLES_PER_TRACK / num_segments)
    expected_num_mfcc_vectors_per_segment = math.ceil(num_samples_per_segment / hop_length) # 1.2 -> 2
    
    # loop through all genres
    for i, (dirpath, dirnames, filenames) in enumerate(os.walk(dataset_path)):
        print('\n dirpath: {},\n dirnames: {},\n filenames: {}'.format(dirpath,dirnames,filenames))
        # ensure that we are not at the root level
        if dirpath is not dataset_path:
            # save the semantic
            dirpath_components = dirpath.split("/") # genre/blues ==> ["genre", "blues"]
            semantic_label = dirpath_components[-1]
            data["mapping"].append(semantic_label)
            print("\n Processing {}".format(semantic_label))
            
            # process files for a specific genre
            for f in filenames:
                
                # load audio file
                file_path = os.path.join(dirpath, f)
                signal, sr = librosa.load(file_path, sr=SAMPLE_RATE)
                
                # process segments extracting mfcc and storing data
                for s in range(num_segments):
                    start_sample = num_samples_per_segment * s 
                    finish_sample = start_sample + num_samples_per_segment
                    
                    mfcc = librosa.feature.mfcc(y=signal[start_sample:finish_sample],
                                                sr=sr,
                                                n_fft=n_fft,
                                                n_mfcc=n_mfcc,
                                                hop_length=hop_length
                                               )
                    
                    mfcc = mfcc.T 
                    # store mfcc for segment only if it has the expected length
                    if len(mfcc) == expected_num_mfcc_vectors_per_segment:
                        data["mfcc"].append(mfcc.tolist())
                        data["labels"].append(i-1)
                        print("{}, segment:{}".format(file_path, s))                  

                        
    with open(json_path, "w") as fp:
        json.dump(data, fp, indent=4)

if __name__ == "__main__":
    save_mfcc(DATASET_PATH, JSON_PATH, num_segments=10)


 dirpath: wavfiles,
 dirnames: ['clacky', 'not_clacky'],
 filenames: []

 dirpath: wavfiles\clacky,
 dirnames: [],
 filenames: ['7V Prototype with lubed H1 Switches Typing Sounds ASMR.wav', 'Alexelcapo_s Keycult Commission.wav', 'Black Polycarbonate Think6.5 with lubed Cherry MX Browns Typing Sounds ASMR.wav', 'CYBERBOARD Prototype with lubed NovelKeys Silk Blacks Typing Sounds ASMR.wav', 'Duo-S with lubed Lilacs Typing Sounds ASMR.wav', 'E6.5 with lubed Gateron Yellow housings with NovelKeys Cream stems Typing Sounds ASMR.wav', 'FLX-VIRGO with lubed NovelKeys Creams Typing Sounds ASMR.wav', 'HEX.4B with Gateron Lion Typing Sounds Test ASMR.wav', 'Imaqtpie_s Purple Polycarbonate HBCP with Kailh Box Browns Typing Sounds ASMR.wav', 'KFE CE with lubed Gateron Black Inks V2 Typing Sounds ASMR.wav', 'KFE with lubed Cherry MX Blacks Typing Sounds ASMR.wav', 'LilyPichu_s Rose Gold Kyuu with lubed Nolives Typing Sounds ASMR.wav', 'Lyn Whale R2 with lubed Retooled Cherry MX Blacks Typing Sound

wavfiles\clacky\Imaqtpie_s Purple Polycarbonate HBCP with Kailh Box Browns Typing Sounds ASMR.wav, segment:0
wavfiles\clacky\Imaqtpie_s Purple Polycarbonate HBCP with Kailh Box Browns Typing Sounds ASMR.wav, segment:1
wavfiles\clacky\Imaqtpie_s Purple Polycarbonate HBCP with Kailh Box Browns Typing Sounds ASMR.wav, segment:2
wavfiles\clacky\Imaqtpie_s Purple Polycarbonate HBCP with Kailh Box Browns Typing Sounds ASMR.wav, segment:3
wavfiles\clacky\Imaqtpie_s Purple Polycarbonate HBCP with Kailh Box Browns Typing Sounds ASMR.wav, segment:4
wavfiles\clacky\Imaqtpie_s Purple Polycarbonate HBCP with Kailh Box Browns Typing Sounds ASMR.wav, segment:5
wavfiles\clacky\Imaqtpie_s Purple Polycarbonate HBCP with Kailh Box Browns Typing Sounds ASMR.wav, segment:6
wavfiles\clacky\Imaqtpie_s Purple Polycarbonate HBCP with Kailh Box Browns Typing Sounds ASMR.wav, segment:7
wavfiles\clacky\Imaqtpie_s Purple Polycarbonate HBCP with Kailh Box Browns Typing Sounds ASMR.wav, segment:8
wavfiles\clacky\Ima

wavfiles\clacky\Mode SixtyFive with lubed Cherry MX Blacks Typing Sounds ASMR.wav, segment:0
wavfiles\clacky\Mode SixtyFive with lubed Cherry MX Blacks Typing Sounds ASMR.wav, segment:1
wavfiles\clacky\Mode SixtyFive with lubed Cherry MX Blacks Typing Sounds ASMR.wav, segment:2
wavfiles\clacky\Mode SixtyFive with lubed Cherry MX Blacks Typing Sounds ASMR.wav, segment:3
wavfiles\clacky\Mode SixtyFive with lubed Cherry MX Blacks Typing Sounds ASMR.wav, segment:4
wavfiles\clacky\Mode SixtyFive with lubed Cherry MX Blacks Typing Sounds ASMR.wav, segment:5
wavfiles\clacky\Mode SixtyFive with lubed Cherry MX Blacks Typing Sounds ASMR.wav, segment:6
wavfiles\clacky\Mode SixtyFive with lubed Cherry MX Blacks Typing Sounds ASMR.wav, segment:7
wavfiles\clacky\Mode SixtyFive with lubed Cherry MX Blacks Typing Sounds ASMR.wav, segment:8
wavfiles\clacky\Mode SixtyFive with lubed Cherry MX Blacks Typing Sounds ASMR.wav, segment:9
wavfiles\clacky\monokei Kei with lubed Pinokos Typing Sounds ASMR.wav,

wavfiles\not_clacky\Protozoa Studios P.02 prototype with lubed C³ Tangerines Typing Sounds ASMR.wav, segment:0
wavfiles\not_clacky\Protozoa Studios P.02 prototype with lubed C³ Tangerines Typing Sounds ASMR.wav, segment:1
wavfiles\not_clacky\Protozoa Studios P.02 prototype with lubed C³ Tangerines Typing Sounds ASMR.wav, segment:2
wavfiles\not_clacky\Protozoa Studios P.02 prototype with lubed C³ Tangerines Typing Sounds ASMR.wav, segment:3
wavfiles\not_clacky\Protozoa Studios P.02 prototype with lubed C³ Tangerines Typing Sounds ASMR.wav, segment:4
wavfiles\not_clacky\Protozoa Studios P.02 prototype with lubed C³ Tangerines Typing Sounds ASMR.wav, segment:5
wavfiles\not_clacky\Protozoa Studios P.02 prototype with lubed C³ Tangerines Typing Sounds ASMR.wav, segment:6
wavfiles\not_clacky\Protozoa Studios P.02 prototype with lubed C³ Tangerines Typing Sounds ASMR.wav, segment:7
wavfiles\not_clacky\Protozoa Studios P.02 prototype with lubed C³ Tangerines Typing Sounds ASMR.wav, segment:8
w