In [1]:
import numpy as np
import os
import os.path as path
from tqdm import tqdm_notebook as tqdm
import librosa
from sklearn.utils import shuffle
import tensorflow as tf
# from tensorflow.keras.utils import to_categorical

In [2]:
base_path = 'data'
data_path = path.join(base_path, 'data_speech_commands_v0.02')
strange_path = path.join(base_path, 'strange')

In [3]:
class_list = ['zero', 'one', 'two', 'three', 'four', 
              'five', 'six', 'seven', 'eight', 'nine', 
              'bed', 'bird', 'cat', 'dog', 'house',
              'tree']

class_dict = {_class:i for i, _class in enumerate(class_list)}

In [4]:
def load_wavs(filenames):
#     first_wav, _ = librosa.load(path.join(data_path, filename), sr=16000)
#     wav_shape = first_wav.shape[0]
#     x_data = [librosa.util.fix_length(librosa.load(filename)[0], wav_shape) 
#               for filename in tqdm(filenames)]
    x_data = [ librosa.load(path.join(data_path, filename), sr=16000)[0]
              for filename in tqdm(filenames)]
    return np.asarray(x_data)
    
### If you have lack of memory, Use this
#     wav, _ = librosa.load(filenames[0])
#     wavs = np.zeros( (len(filenames), wav.shape[0]) )
#     for i, filename in enumerate(filenames):
#         wavs[i] = librosa.load(filename)[0][:]
#     return wavs

def make_x_data(filenames):
    x_data = load_wavs(filenames)
    x_data = np.reshape(x_data, (*x_data.shape, 1))
    return x_data
    
def extract_class_from_filename(filename):
    dirname = os.path.dirname(filename)
    class_name = dirname.split('/')[-1]
    return class_name

def make_y_data(filenames, y_dict):
    return np.asarray([y_dict[extract_class_from_filename(filename)] 
                       for filename in filenames])

def make_xy_data(filenames, y_dict):
    x_data = make_x_data(filenames)
    y_data = make_y_data(filenames, y_dict)
    return x_data, y_data

In [5]:
train_text_filename = path.join(base_path, 'wav_train_16words_ok.txt') 
with open(train_text_filename, 'r', encoding='utf-8') as f:
    train_filename_list = f.read().splitlines()

train_filename_list = shuffle(train_filename_list)
x_train, y_train = make_xy_data(train_filename_list, class_dict)
lookup_table = np.asarray(list(class_dict.items()))

np.savez_compressed(path.join(data_path, 'wav_train_data.npz'),
                     x_train=x_train, y_train=y_train, table=lookup_table)

del x_train, y_train

HBox(children=(IntProgress(value=0, max=36805), HTML(value='')))




In [6]:
validation_text_filename = path.join(base_path, 'wav_validation_16words_ok.txt') 
with open(validation_text_filename, 'r', encoding='utf-8') as f:
    val_filename_list = f.read().splitlines()

val_filename_list = shuffle(val_filename_list)
x_val, y_val = make_xy_data(val_filename_list, class_dict)
lookup_table = np.asarray(list(class_dict.items()))

np.savez_compressed(path.join(data_path, 'wav_validation_data.npz'), 
                     x_val=x_val, y_val=y_val, table=lookup_table)

del x_val, y_val

HBox(children=(IntProgress(value=0, max=4293), HTML(value='')))




In [7]:
test_text_filename = path.join(base_path, 'wav_test_16words_ok.txt') 
with open(test_text_filename, 'r', encoding='utf-8') as f:
    test_filename_list = f.read().splitlines()

test_filename_list = shuffle(test_filename_list)
x_test, y_test = make_xy_data(test_filename_list, class_dict)
lookup_table = np.asarray(list(class_dict.items()))

np.savez_compressed(path.join(data_path, 'wav_test_data.npz'), 
                     x_test=x_test, y_test=y_test, table=lookup_table)

del x_test, y_test

HBox(children=(IntProgress(value=0, max=4815), HTML(value='')))




In [8]:
data = np.load(path.join(data_path, 'wav_train_data.npz'))
data['x_train'].shape, data['y_train'].shape

((36805, 16000, 1), (36805,))

In [9]:
data = np.load(path.join(data_path, 'wav_validation_data.npz'))
data['x_val'].shape, data['y_val'].shape

((4293, 16000, 1), (4293,))

In [10]:
data = np.load(path.join(data_path, 'wav_test_data.npz'))
data['x_test'].shape, data['y_test'].shape

((4815, 16000, 1), (4815,))