In [13]:
import os
import librosa

'''
librosa allows users to perform audio data preprocessing, feature extraction, visualization, analysis, and 
even advanced techniques like music genre classification and audio source separation.
'''

import matplotlib.pyplot as plt
import numpy as np
import math
import json



DATASET_PATH = "Data1/genres_original"
JSON_PATH = "data.json"

"""
For audio signals, the human hearing range is typically considered to be between 20 Hz and 20,000 Hz (20 kHz). 
Therefore, a sample rate of 22050 Hz (slightly above the upper limit of human hearing) allows for the accurate representation of audio signals
up to approximately 11 kHz, which covers the audible range comprehensively.

Why Not Higher?

While higher sample rates can capture more detail, they also require more storage space and computational resources.
Sampling at rates significantly higher than 22050 Hz doesn't add much value for most audio processing tasks because it's beyond what humans
can hear.
"""



SAMPLE_RATE = 22050
DURATION = 30 # 30 seconds
SAMPLES_PER_TRACK = SAMPLE_RATE * DURATION

# n_fft --> no. of fast fourier transform
# n_mfcc --> no. of mel frequency cepstral co-efficients.
"""
Human Auditory System Simulation: The human auditory system does not process sound linearly across all frequencies. 
                                  It is more sensitive to changes in lower frequencies compared to higher ones.

Computational Efficiency: Compared to other feature extraction techniques, such as spectrograms or wavelet transforms,
                          MFCC computation is efficient.
"""

def save_mfcc(dataset_path, json_path, n_mfcc=13, n_fft=2048, hop_length=512, no_of_segments=5):
 
    # dictionary to store data.
    data = {
        "mapping": [],
        "mfcc": [], # training data
        "labels": []
    }

    no_of_samples_per_segment = int(SAMPLES_PER_TRACK / no_of_segments)
    expected_num_mfcc_vectors_per_segment = math.ceil(no_of_samples_per_segment / hop_length)

    
    # loop through all the genres
    for i, (dirpath, dirnames, filenames) in enumerate(os.walk(dataset_path)):

        # make sure that we are not at the bottom level
        if dirpath is not dataset_path:
            # save the label
            # here always we have to take the last label
            dirpath_components = dirpath.split("/") # genres/blues => ["genre", "blues"]
            semantic_label = dirpath_components[-1]
            data["mapping"].append(semantic_label)
            print("\nProcessing {}".format(semantic_label))

            # process files for a specific genre
            for f in filenames:

                # load audio files
                file_path = os.path.join(dirpath, f)
                signal, sr = librosa.load(file_path, sr=SAMPLE_RATE)

                # process segments extracting mfcc and storing data
                for s in range(no_of_segments):
                    start_sample = no_of_samples_per_segment * s # s is current segment
                    finish_sample = start_sample + no_of_samples_per_segment

                    

                    mfcc = librosa.feature.mfcc(y=signal[start_sample:finish_sample],
                                                sr = sr,
                                                n_fft = n_fft,
                                                n_mfcc = n_mfcc,
                                                hop_length = hop_length)
                    mfcc = mfcc.T


                    # store mfcc for segment if it has expected length
                    if len(mfcc) == expected_num_mfcc_vectors_per_segment:
                        data["mfcc"].append(mfcc.tolist())
                        data["labels"].append(i-1)
                        print("{}, segment:{}".format(file_path, s))

    with open(json_path, "w") as fp:
        json.dump(data, fp, indent=4)


if __name__ == "__main__":
    save_mfcc(DATASET_PATH, JSON_PATH, no_of_segments=10)


Processing genres_original\.ipynb_checkpoints

Processing genres_original\blues
Data1/genres_original\blues\blues.00000.wav, segment:0
Data1/genres_original\blues\blues.00000.wav, segment:1
Data1/genres_original\blues\blues.00000.wav, segment:2
Data1/genres_original\blues\blues.00000.wav, segment:3
Data1/genres_original\blues\blues.00000.wav, segment:4
Data1/genres_original\blues\blues.00000.wav, segment:5
Data1/genres_original\blues\blues.00000.wav, segment:6
Data1/genres_original\blues\blues.00000.wav, segment:7
Data1/genres_original\blues\blues.00000.wav, segment:8
Data1/genres_original\blues\blues.00000.wav, segment:9
Data1/genres_original\blues\blues.00001.wav, segment:0
Data1/genres_original\blues\blues.00001.wav, segment:1
Data1/genres_original\blues\blues.00001.wav, segment:2
Data1/genres_original\blues\blues.00001.wav, segment:3
Data1/genres_original\blues\blues.00001.wav, segment:4
Data1/genres_original\blues\blues.00001.wav, segment:5
Data1/genres_original\blues\blues.00001