In [1]:
import numpy as np
import os
import os.path as path
from tqdm import tqdm_notebook as tqdm
import cv2
from sklearn.utils import shuffle
import tensorflow as tf
from tensorflow.keras.utils import to_categorical

In [2]:
base_path = 'data'
data_path = path.join(base_path, 'data_speech_commands_v0.02')
strange_path = path.join(base_path, 'strange')

In [3]:
class_list = ['zero', 'one', 'two', 'three', 'four', 
              'five', 'six', 'seven', 'eight', 'nine', 
              'bed', 'bird', 'tree', 'cat', 'house',
              'dog']

class_dict = {_class:i for i, _class in enumerate(class_list)}

In [6]:
def make_x_data(filenames):
    load_and_flatten = lambda filename: cv2.imread(filename, cv2.IMREAD_GRAYSCALE).flatten().astype(np.float32)
    x_data = np.asarray([load_and_flatten(path.join(data_path, filename)) 
                            for filename in filenames])
    return x_data
    
def extract_class_from_filename(filename):
    dirname = os.path.dirname(filename)
    class_name = dirname.split(path.sep)[-1]
    return class_name

# def make_y_data(filenames, y_dict):
#     return np.asarray([to_categorical(y_dict[extract_class_from_filename(filename)], len(class_list)) 
#                        for filename in filenames])

def make_y_data(filenames, y_dict):
    return np.asarray([y_dict[extract_class_from_filename(filename)]
                           for filename in filenames])

def make_xy_data(filenames, y_dict):
    print('make x data...')
    x_data = make_x_data(filenames)
    print('make y data...')
    y_data = make_y_data(filenames, y_dict)
    return x_data, y_data

In [7]:
train_text_filename = path.join(base_path, 'train_16words_png_ok.txt') 
with open(train_text_filename, 'r', encoding='utf-8') as f:
    train_filename_list = f.read().splitlines()

train_filename_list = shuffle(train_filename_list)
x_train, y_train = make_xy_data(train_filename_list, class_dict)
lookup_table = np.asarray(list(class_dict.items()))

np.savez_compressed(path.join(data_path, 'train_data.npz'),
                     x_train=x_train, y_train=y_train, table=lookup_table)

del x_train, y_train

make x data...
make y data...


In [9]:
validation_text_filename = path.join(base_path, 'validation_16words_png_ok.txt') 
with open(validation_text_filename, 'r', encoding='utf-8') as f:
    val_filename_list = f.read().splitlines()

val_filename_list = shuffle(val_filename_list)
x_val, y_val = make_xy_data(val_filename_list, class_dict)
lookup_table = np.asarray(list(class_dict.items()))

np.savez_compressed(path.join(data_path, 'validation_data.npz'), 
                     x_val=x_val, y_val=y_val, table=lookup_table)

del x_val, y_val

make x data...
make y data...


In [10]:
test_text_filename = path.join(base_path, 'test_16words_png_ok.txt') 
with open(test_text_filename, 'r', encoding='utf-8') as f:
    test_filename_list = f.read().splitlines()

test_filename_list = shuffle(test_filename_list)
x_test, y_test = make_xy_data(test_filename_list, class_dict)
lookup_table = np.asarray(list(class_dict.items()))

np.savez_compressed(path.join(data_path, 'test_data.npz'), 
                     x_test=x_test, y_test=y_test, table=lookup_table)

del x_test, y_test

make x data...
make y data...


In [11]:
data = np.load(path.join(data_path, 'train_data.npz'))
data['x_train'].shape, data['y_train'].shape

((36805, 25443), (36805,))

In [12]:
data = np.load(path.join(data_path, 'validation_data.npz'))
data['x_val'].shape, data['y_val'].shape

((4293, 25443), (4293,))

In [13]:
data = np.load(path.join(data_path, 'test_data.npz'))
data['x_test'].shape, data['y_test'].shape

((4815, 25443), (4815,))