In [121]:
import librosa
import librosa.display
import glob
from scipy.signal import butter, filtfilt, freqz
import numpy as np
import matplotlib.pyplot as plt
import json

In [3]:
def butter_lowpass(cutoff, fs, order=5):
    nyq = 0.5 * fs
    normal_cutoff = cutoff / nyq
    b, a = butter(order, normal_cutoff, btype='low', analog=False)
    return b, a

def butter_lowpass_filter(data, cutoff, fs, order=5):
    b, a = butter_lowpass(cutoff, fs, order)
    y = filtfilt(b, a, data)
    return np.array(y)

In [4]:
def plot_audio(data):
    plt.figure(figsize=(10, 4))
    # Variables for plotting
    n = len(data)           # total number of samples
    T = n / fs              # seconds
    t = np.linspace(0, T, n)
    plt.plot(t, data, linewidth=.2)
    plt.show()

In [105]:
def get_MFCC_per_folder(folder_name, cutoff=400, order=6, top_db=20):
    wav_file_list = glob.glob(folder_name + "*.wav")
    mfcc_dataset = []
    for wav_file in wav_file_list:
        sound, sampling_rate = librosa.load(wav_file)
        fs = sampling_rate   # sample rate, Hz
        
        # low-pass filter the sound by removing all frequencies
        # above the cutoff value (default=400 Hz)
        low_passed_sound = butter_lowpass_filter(sound, cutoff, fs, order)
        
        # Trim the silent parts of the speech signal
        low_passed_trimmed_sound, _ = librosa.effects.trim(low_passed_sound, top_db=top_db)
        
        # Calculate the MFCC values of the low-pass filtered and trimmed speech signal
        mfccs = librosa.feature.mfcc(y=low_passed_trimmed_sound, sr=sampling_rate, n_mfcc=12).tolist()
        mfcc_dataset.append(mfccs)
    return mfcc_dataset

In [140]:
def save_MFCC_as_json(MFCC_data, filename):
    with open(filename, 'w', encoding='utf-8') as f:
        json.dump(MFCC_data, f, ensure_ascii=False, indent=4)
    return None

def load_MFCC_from_json(filename):
    with open(filename, 'r', encoding='utf-8') as f:
        MFCC_dataset = json.load(f)
    return MFCC_dataset

In [152]:
# IMPORTANT: in case you have downloaded the data in a different audio format (for example in .mp3)
# convert all audio files that you want to use into .wav files

paths = ["Downsampled/dummy_clips/train_wav_clips/",
         "Downsampled/dummy_clips/test_wav_clips/",
         "Downsampled/dummy_clips/validate_wav_clips/"]

# Variables needed for the filter
order = 6
cutoff = 400            # desired cutoff frequency of the filter, Hz

MFCC_train_set = get_MFCC_per_folder(paths[0])
MFCC_test_set = get_MFCC_per_folder(paths[1])
MFCC_validate_set = get_MFCC_per_folder(paths[2])

# save data in json files
save_MFCC_as_json(MFCC_train_set, 'MFCC_train.json')
save_MFCC_as_json(MFCC_test_set, 'MFCC_test.json')
save_MFCC_as_json(MFCC_validate_set, 'MFCC_validate.json')