In [10]:
import os
import librosa
import math
import json
DATASET_PATH = "voice_data"
JSON_PATH = "data_for_record.json"
SAMPLE_RATE = 22050

DURATION = 30 # measured in seconds
SAMPLES_PER_TRACK = SAMPLE_RATE * DURATION

def save_mfcc(dataset_path, json_path, n_mfcc=13, n_fft= 2048, hop_length=512, num_segments=5):
        #dictionary to store data
        
        data = {
            "mapping": [],
            "labels": [],
            "mfcc": []
        }
        
        num_samples_per_segment = int(SAMPLES_PER_TRACK / num_segments)
        expected_num_mfcc_vectors_per_segment = math.ceil(num_samples_per_segment / hop_length) # 1.2 should be 2
        
        #loop through all the genres
        for i, (dirpath, dirnames, filenames) in enumerate(os.walk(dataset_path)):
            # ensure that we are not at the root level
            if dirpath is not dataset_path:
                # save the semantic label(folder name is mapping)
                dirpath_components = os.path.split(dirpath)#dirpath.split("/") # genre/blues => ["genre", "blues"]
                print("")
                print(dirpath_components)
                semantic_label = dirpath_components[-1]
                data["mapping"].append(semantic_label)
                print("\n Processing {}".format(semantic_label))
                
                # process files for a specific genre
                
                for f in filenames:
                    # lode audio file
                    file_path = os.path.join(dirpath, f)
                    signal, sr = librosa.load(file_path, sr = SAMPLE_RATE)
                    
                    # process segments extraction mfcc and storing data
                    for s in range(num_segments):
                        start_sample = num_samples_per_segment * s # s=>0 -> 0
                        finish_sample = start_sample + num_samples_per_segment # s=>0 -> num_sample_per_segment
                        
                        mfcc = librosa.feature.mfcc(signal[start_sample:finish_sample], sr=sr, n_fft=n_fft,
                                                    hop_length= hop_length, n_mfcc=n_mfcc)
                        
                        mfcc = mfcc.T
                        
                        # store mfcc for segment if it has the expetced lenght
                        if len(mfcc)== expected_num_mfcc_vectors_per_segment:
                            data["mfcc"].append(mfcc.tolist())
                            data["labels"].append(i-1)
                            print("{}, segment:{}".format(file_path, s+1))
                        
                
        with open(json_path, "w") as fp:
            json.dump(data, fp, indent = 4)
            
            
if __name__ == "__main__":
    save_mfcc(DATASET_PATH, JSON_PATH, num_segments=2)
                


('voice_data', 'original')

 Processing original
voice_data\original\blues.00000.wav, segment:1
voice_data\original\blues.00000.wav, segment:2
voice_data\original\blues.00001.wav, segment:1
voice_data\original\blues.00001.wav, segment:2
voice_data\original\blues.00002.wav, segment:1
voice_data\original\blues.00002.wav, segment:2
voice_data\original\blues.00003.wav, segment:1
voice_data\original\blues.00003.wav, segment:2
voice_data\original\blues.00004.wav, segment:1
voice_data\original\blues.00004.wav, segment:2
voice_data\original\blues.00005.wav, segment:1
voice_data\original\blues.00005.wav, segment:2
voice_data\original\blues.00006.wav, segment:1
voice_data\original\blues.00006.wav, segment:2
voice_data\original\blues.00007.wav, segment:1
voice_data\original\blues.00007.wav, segment:2
voice_data\original\blues.00008.wav, segment:1
voice_data\original\blues.00008.wav, segment:2
voice_data\original\blues.00009.wav, segment:1
voice_data\original\blues.00009.wav, segment:2
voice_data

voice_data\recoarded\disco.00009.wav, segment:1
voice_data\recoarded\disco.00009.wav, segment:2
voice_data\recoarded\ghor gari1.wav, segment:1
voice_data\recoarded\ghor gari1.wav, segment:2
voice_data\recoarded\hip-hop.00000.wav, segment:1
voice_data\recoarded\hip-hop.00000.wav, segment:2
voice_data\recoarded\hip-hop.00001.wav, segment:1
voice_data\recoarded\hip-hop.00002.wav, segment:1
voice_data\recoarded\hip-hop.00002.wav, segment:2
voice_data\recoarded\hip-hop.00003.wav, segment:1
voice_data\recoarded\hip-hop.00003.wav, segment:2
voice_data\recoarded\hip-hop.00004.wav, segment:1
voice_data\recoarded\hip-hop.00004.wav, segment:2
voice_data\recoarded\hip-hop.00005.wav, segment:1
voice_data\recoarded\hip-hop.00006.wav, segment:1
voice_data\recoarded\hip-hop.00006.wav, segment:2
voice_data\recoarded\hip-hop.00007.wav, segment:1
voice_data\recoarded\hip-hop.00008.wav, segment:1
voice_data\recoarded\hip-hop.00008.wav, segment:2
voice_data\recoarded\hip-hop.00009.wav, segment:1
voice_data