In [0]:
"""
Created on Sun Mar 22 02:09 2020
@author: Héctor & Pati
@subject: DLAS
"""

In [0]:
# Needed libraries imported
import os
import ast
import argparse
import librosa

from pathlib import Path
from scipy.io import savemat
from librosa.feature import mfcc

In [0]:
# MFCCs extraction from raw audio
def compute_mfccs(audio_signal, sample_rate):
    
    '''
        Computes MFCC of the input audio sample.
    
        :params:
        audio_sample, numpy array, raw audio signal (example: in wav format)
        numcap - Integer, the number of cepstrum features to return
    
        :returns:
        mfcc_features, Numpy array
    '''
    
    N = 40 # number of MFCC calculated (normally 13 are enough)
    NFFT = 2048 # length of the FFT window
    hoplength = int(NFFT/4) # 512 --> number of samples between successive frames
    pr = 2.0 # exponent for the magnitude melspectrogram (in this case 2 for power)
    f_min = 20
    f_max = 20000
    nmels = 256 # number of Mel bands to generate

    audio_features = mfcc(y=audio_signal, sr=sample_rate, n_mfcc=N, n_fft=NFFT, hop_length=hoplength, power=pr,fmin=f_min, fmax=f_max, n_mels=nmels)

    return audio_features

In [0]:
# Program to extract features from audio
def process(args):
    
    # Folders for MFCCs created
	dataset_dir = "mfcc"
	if not os.path.exists(dataset_dir):
		os.mkdir(dataset_dir)

	train_dir = "train"
	if not os.path.exists(dataset_dir+"/"+train_dir):
		os.mkdir(dataset_dir+"/"+train_dir)

	test_dir = "test"
	if not os.path.exists(dataset_dir+"/"+test_dir):
		os.mkdir(dataset_dir+"/"+test_dir)

    
    # (TRAIN) Args path converted to Path object so we can browse through the directory and take each raw audio 
	train_path = Path(args["inputPath"]+"/train/train")
	for audio in train_path.iterdir():
		print("[INFO] Train audio path:")
		print(audio) # Example: spoken-language-identification/train/train/de_f_0809fd0642232f8c85b0b3d545dc2b5a.fragment16.noise6.flac
		ids = str(audio).split("/")[-1] # split path to take audio file name. Example: de_f_5d2e7f30d69f2d1d86fd05f3bbe120c2.fragment1.flac
		ids = str(ids).split(".")[0]+"."+str(ids).split(".")[1]+"."+str(ids).split(".")[2] # split filename to take audio identifier
		print(ids) # Example: de_f_0809fd0642232f8c85b0b3d545dc2b5a.fragment16.noise6
		signal, sample_rate = librosa.load(audio) # load audio file extracting sample rate (22050Hz)
		# Cal to function that computes MFCCs
		mfccs = compute_mfccs(signal, sample_rate) 
		outputFile = dataset_dir+"/"+train_dir+"/"+ids+".mat"
		print(outputFile) # Example: mfcc/train/de_f_0809fd0642232f8c85b0b3d545dc2b5a.fragment16.noise6.mat
		savemat(outputFile, {"mfccs": mfccs}) # We save data as a MATLAB file



    # (TEST) Args path converted to Path object so we can browse through the directory and take each raw audio  (same as train code)
	test_path = Path(args["inputPath"]+"/test/test")
	for audio in test_path.iterdir():
		print("[INFO] Test audio path:")
		print(audio)
		ids = str(audio).split("/")[-1]
		ids = str(ids).split(".")[0]+"."+str(ids).split(".")[1]+"."+str(ids).split(".")[2]
		print(ids)
		signal, sample_rate = librosa.load(audio)
		mfccs = compute_mfccs(signal, sample_rate)
		outputFile = dataset_dir+"/"+test_dir+"/"+ids+".mat"
		print(outputFile)
		savemat(outputFile, {"mfccs": mfccs})

In [0]:
# Construct and Parse input arguments
ap = argparse.ArgumentParser()
ap.add_argument('-path', '--inputPath', default = 'spoken-language-identification/', type = str,
                help = 'path to the input directory, where input files are stored.')
args = vars(ap.parse_args())

# Call to function that performs MFCC extraction from the audios path specified
process(args)