In [36]:
# Libraries
from os import listdir
from os.path import isdir, join
import librosa
import random
import numpy as np
import matplotlib.pyplot as plt
import python_speech_features as psf
import subprocess
import time

In [37]:
# Dataset
dataset_path = '../data_speech_commands_v0.02'
all_targets = [name for name in listdir(dataset_path) if isdir(join(dataset_path, name))]
all_targets.remove('_background_noise_')
print(all_targets)

['right', 'eight', 'cat', 'tree', 'backward', 'learn', 'bed', 'happy', 'go', 'dog', 'no', 'wow', 'follow', 'nine', 'left', 'stop', 'three', 'sheila', 'one', 'bird', 'zero', 'seven', 'up', 'visual', 'marvin', 'two', 'house', 'down', 'six', 'yes', 'on', 'five', 'forward', 'off', 'four']


In [38]:
# Settings
target_list = all_targets
feature_sets_file = 'all_targets_mfcc_sets.npz'
perc_keep_samples = 1 # 1.0 is keep all samples
val_ratio = 0.1
test_ratio = 0.1
sample_rate = 8000
num_mfcc = 16
len_mfcc = 16

In [39]:
# Create list of filenames along with ground truth vector (y)
filenames = []
y = []
for index, target in enumerate(target_list):
    filenames.append(listdir(join(dataset_path, target)))
    y.append(np.ones(len(filenames[index])) * index)

In [40]:
# Flatten filename and y vectors
filenames = [item for sublist in filenames for item in sublist]
y = [item for sublist in y for item in sublist]

In [41]:
# Associate filenames with true output and shuffle
filenames_y = list(zip(filenames, y))
random.shuffle(filenames_y)
filenames, y = zip(*filenames_y)

In [42]:
# Only keep the specified number of samples (shorter extraction/training)
print(f"len file names : {len(filenames)}")
filenames = filenames[:int(len(filenames) * perc_keep_samples)]
print(f"len file names : {len(filenames)}")

len file names : 105829
len file names : 105829


In [43]:
# Calculate validation and test set sizes
val_set_size = int(len(filenames) * val_ratio)
test_set_size = int(len(filenames) * test_ratio)

In [44]:
# Break dataset apart into train, validation, and test sets
filenames_val = filenames[:val_set_size]
filenames_test = filenames[val_set_size:(val_set_size + test_set_size)]
filenames_train = filenames[(val_set_size + test_set_size):]

# Break y apart into train, validation, and test sets
y_orig_val = y[:val_set_size]
y_orig_test = y[val_set_size:(val_set_size + test_set_size)]
y_orig_train = y[(val_set_size + test_set_size):]

In [45]:
# Function: Create MFCC from given path
def calc_mfcc(path):
    
    # Load wavefile
    signal, fs = librosa.load(path, sr=sample_rate)
    
    # Create MFCCs from sound clip
    mfccs = psf.base.mfcc(signal, 
                          samplerate=fs,
                          winlen=0.256,
                          winstep=0.050,
                          numcep=num_mfcc,
                          nfilt=26,
                          nfft=2048,
                          preemph=0.0,
                          ceplifter=0,
                          appendEnergy=False,
                          winfunc=np.hanning)
    return mfccs.transpose()

def calc_mfcc_from_C(path):
    
    #Load wavefile
    signal, fs = librosa.load(path, sr=sample_rate)
    signal=signal*(2**15)
    function=signal.astype(np.int16)
    #Stock the Wav data in the file time_series.dat
    np.savetxt('./time_series_1s.dat', function, fmt='%d', delimiter='\n') 
    
    # Call the C++ function to compute the MFCC from the time series
    subprocess.run(["./compute_mfcc"], shell=True)

    # Load the MFCC from the file mfcc_from_time_series.dat
    mfcc_from_time_series = np.genfromtxt('mfcc_from_time_series_1s.dat',delimiter='\n',dtype=None)
    mfcc_from_time_series = mfcc_from_time_series.reshape(16,16)
    
    return mfcc_from_time_series #.transpose()

# Function: Create MFCCs, keeping only ones of desired length
def extract_features(in_files, in_y):
    prob_cnt = 0
    out_x = []
    out_y = []
    
    start_time = time.time()
    total_files = len(in_files)
    processed_files = 0

    for index, filename in enumerate(in_files):
    
        # Create path from given filename and target item
        path = join(dataset_path, target_list[int(in_y[index])], 
                    filename)
        
        # Check to make sure we're reading a .wav file
        if not path.endswith('.wav'):
            continue

        # Create MFCCs
        # mfccs = calc_mfcc(path)
        mfccs = calc_mfcc_from_C(path) # <- Use this function for C++ implementation

        processed_files += 1
        progress = processed_files / total_files * 100
        elapsed_time = time.time() - start_time
        average_time_per_file = elapsed_time / processed_files
        remaining_files = total_files - processed_files
        estimated_remaining_time = average_time_per_file * remaining_files

        hours = int(estimated_remaining_time / 3600)
        minutes = int((estimated_remaining_time % 3600) / 60)

        print('Processing: {:.2f}%, Time remaining: {}h{}min'.format(progress, hours, minutes), end='\r')


        # Only keep MFCCs with given length
        if mfccs.shape[1] == len_mfcc:
            out_x.append(mfccs)
            out_y.append(in_y[index])
        else:
            # print('Dropped:', index, mfccs.shape)
            prob_cnt += 1
            
    return out_x, out_y, prob_cnt

In [46]:
# Create train, validation, and test sets
x_train, y_train, prob = extract_features(filenames_train, 
                                          y_orig_train)
print('Removed percentage:', prob / len(y_orig_train))
x_val, y_val, prob = extract_features(filenames_val, y_orig_val)
print('Removed percentage:', prob / len(y_orig_val))
x_test, y_test, prob = extract_features(filenames_test, y_orig_test)
print('Removed percentage:', prob / len(y_orig_test))

Removed percentage: 0.0me remaining: 0h0min
Removed percentage: 0.0me remaining: 0h0min
Removed percentage: 0.0me remaining: 0h0min


In [47]:
# Save features and truth vector (y) sets to disk
np.savez(feature_sets_file, 
         x_train=x_train, 
         y_train=y_train, 
         x_val=x_val, 
         y_val=y_val, 
         x_test=x_test, 
         y_test=y_test)

In [48]:
# TEST: Load features
feature_sets = np.load(feature_sets_file)
feature_sets.files

len(feature_sets['x_train'])
print(feature_sets['y_val'])


[29. 31. 18. ... 33. 29. 33.]
