# 3D Conv Speaker Recognition

Model proto phase of this implementation.

## Import Dependencies

In [2]:
import numpy as np

# Library for hdf5
import h5py as h5

# Vis Tool of IPythin
from IPython.display import SVG
from IPython.display import Audio


# Useful Magics
% reload_ext autoreload
% autoreload 2

## Data preprocess


In [None]:
from lib.input_feature import AudioDataset,Compose,CMVN,Feature_Cube,ToOutput
from keras.utils import to_categorical

In [None]:
files_path = './dataset_path'
audio_dir = '/'
dataset_origin = AudioDataset(files_path=files_path, audio_dir=audio_dir,
                           transform=Compose([CMVN(), Feature_Cube(cube_shape=(20, 80, 40), augmentation=True), ToOutput()]))

In [None]:
length_of_dataset = dataset_origin.__len__()
print(length_of_dataset)

In [3]:
# Define Basic infos
final_file = 'dataset.h5'
num_classes = 695

In [None]:
h5_file = None

try:
    # Try to create H5 file
    h5_file = h5.File(final_file, 'w-')
except IOError as e:
    override = input("File exists, override?(Y/N)").lower()
    override = override == 'y' or override == ''
    if override:
        # Override H5 file
        h5_file = h5.File(final_file, 'w')
    else:
        h5_file = None
        print('aborted for not overriding.')
except OSError as e:
    print("EXCEPTION: %s." % (e))
    print("File may be not accessiable!")
    print("Saving H5 File(%s) Failed in path %s" % (save_name, save_path))
    if h5_file:
        h5_file.close()


basic_features_dataset_shape = (0, 20, 80, 40, 1)
max_features_dataset_shape = (length_of_dataset+1024, 20, 80, 40, 1)

# Create Dataset for images
features_dataset = h5_file.create_dataset(
    'features',
    basic_features_dataset_shape,
    dtype=np.float,
    chunks=True,
    maxshape=max_features_dataset_shape,
    compression=None)

basic_labels_dataset_shape = (0, num_classes)
max_labels_dataset_shape = (length_of_dataset+1024, num_classes)
labels_dataset = h5_file.create_dataset(
    'labels',
    basic_labels_dataset_shape,
    dtype=np.int,
    chunks=True,
    maxshape=max_labels_dataset_shape,
    compression=None)

h5_file.close()

In [None]:
# Saving dataset to h5 for further using.
def extracting_features(dataset_origin, index, batch_size):
    cur = index * 32
    batch_features = [dataset_origin.__getitem__(idx) for idx in range(cur,cur + batch_size)] 
    train_data,train_label = zip(*batch_features)
    train_data,train_label = np.array(list(train_data)),np.array(list(train_label))
    train_data = np.transpose(train_data, axes=(0, 2, 3, 4, 1))
    return train_data,train_label

def get_label_index(vocab_by_index, index_by_vocab, label):
    label_index = index_by_vocab.get(label, -1)
    if label_index > -1:
        return label_index
    else:
        label_index = len(vocab_by_index)
        index_by_vocab[label] = label_index
        vocab_by_index.append(label)
        return label_index
    
    
def save_to_h5(h5_file,features,labels,index,batch_size):
    assert len(features)==batch_size,"Length of features and batch_size is not equal, can't perform saving operation"
    assert len(labels)==batch_size, "Length of labels and batch_size is not equal, can't perform saving operation"
    features_set = h5_file['features']
    labels_set = h5_file['labels']
    cur = index*batch_size
    new_features_shape=(
        cur+batch_size,
        basic_features_dataset_shape[1],
        basic_features_dataset_shape[2],
        basic_features_dataset_shape[3],
        basic_features_dataset_shape[4],
    )
    new_labels_shape=(
        cur+batch_size,
        basic_labels_dataset_shape[1]
    )
    features_set.resize(new_features_shape)
    labels_set.resize(new_labels_shape)
    
    features_set[cur:cur+batch_size] = features
    labels_set[cur:cur+batch_size] = labels
    
def combine_process(dataset,h5_file,index,batch_size,vocab_by_index, index_by_vocab):
    features,labels = extracting_features(dataset,index,batch_size)
    labels = [get_label_index(vocab_by_index, index_by_vocab, label) for label in labels]
    labels = to_categorical(labels,num_classes=num_classes)
    save_to_h5(h5_file,features,labels,index,batch_size)
    
    
def processing_dataset(dataset,h5_file,length,batch_size=32):
    # Distinct Label Set
    vocab_by_index = []
    index_by_vocab = {}
    num_of_batches = length // batch_size
    print('Num of Batches: %d'%num_of_batches)
    fit_batch = False
    if num_of_batches*32 == length:
        fit_batch = True
    for index in range(num_of_batches):
        print("##############################")
        print("############ %d/%d #############"%(index,num_of_batches))
        combine_process(dataset,h5_file,index,batch_size,vocab_by_index, index_by_vocab)
        print("##############################")
#     if not fit_batch:
#         combine_process(dataset,h5_file,num_of_batches,batch_size,vocab_by_index, index_by_vocab)
    return (vocab_by_index,index_by_vocab)
    
    # Batch Reading and Saving to H5

In [None]:
# TEST CASES
# processing_dataset(dataset,length_of_dataset,h5_file)
# extracting_features(dataset, 100, 32)
# vocab_by_index, index_by_vocab = ['01001','01234'], {'01001':0,'01234':1}
# print(get_label_index(vocab_by_index, index_by_vocab, '0000'))
# print(vocab_by_index, index_by_vocab)

In [None]:
# Do processing to h5 file
try:
    h5_file = h5.File(final_file, 'a')
    vocab=processing_dataset(dataset_origin,h5_file,length_of_dataset,512)
finally:
    if h5_file:
        h5_file.close()

In [None]:
try:
    h5_file = h5.File(final_file, 'r')
    print(h5_file['features'])
    print(h5_file['labels'][117247])
finally:
    if h5_file:
        h5_file.close()

## Define Model

In [4]:
# Model API Related
from keras.models import Model
from keras.optimizers import Adam,SGD

# Layers
from keras.layers import Input,Conv3D,PReLU,MaxPool3D,Flatten,Dense,Activation

# vis tool
from keras.utils.vis_utils import model_to_dot


Using TensorFlow backend.


In [9]:
# Define Model
def model(input_shape, num_class):
    inputs=Input(shape=input_shape,name="input-layer")
    
    # Conv 1
    X = Conv3D(filters=16, kernel_size=(3, 1, 5), strides=(1, 1, 1), name="conv1-1")(inputs)
    X = PReLU(name="activation1-1")(X)
    X = Conv3D(filters=16, kernel_size=(3, 9, 1),strides=(1, 2, 1),name="conv1-2")(X)
    X = PReLU(name="activation1-2")(X)
    X = MaxPool3D(pool_size=(1, 1, 2), strides=(1, 1, 2), padding="valid", name="pool-1")(X)
    
    # Conv 2
    X = Conv3D(filters=16, kernel_size=(3, 1, 4), strides=(1, 1, 1), name="conv2-1")(X)
    X = PReLU(name="activation2-1")(X)
    X = Conv3D(filters=16, kernel_size=(3, 8, 1),strides=(1, 2, 1),name="conv2-2")(X)
    X = PReLU(name="activation2-2")(X)
    X = MaxPool3D(pool_size=(1, 1, 2), strides=(1, 1, 2), padding="valid", name="pool-2")(X)
    
    # Conv 3
    X = Conv3D(filters=16, kernel_size=(3, 1, 3), strides=(1, 1, 1), name="conv3-1")(X)
    X = PReLU(name="activation3-1")(X)
    X = Conv3D(filters=16, kernel_size=(3, 7, 1),strides=(1, 1, 1),name="conv3-2")(X)
    X = PReLU(name="activation3-2")(X)
    
    # Conv 4
    X = Conv3D(filters=16, kernel_size=(3, 1, 3), strides=(1, 1, 1), name="conv4-1")(X)
    X = PReLU(name="activation4-1")(X)
    X = Conv3D(filters=16, kernel_size=(3, 7, 1),strides=(1, 1, 1),name="conv4-2")(X)
    X = PReLU(name="activation4-2")(X)
    
    # Flaten
    X = Flatten()(X)

    # FC
    X = Dense(units=128,name="fc",activation='relu')(X)
#     X = PReLU(name="fc-ac")(X)
    feature_model = Model(inputs=inputs,outputs=X)
    # Final Activation
    X = Dense(units=num_class,activation='softmax',name="ac_softmax")(X)
    model = Model(inputs=inputs,outputs=X)
    
    return model,feature_model

In [10]:
model,feature_model = model((20,80,40,1),num_classes)
opt = Adam(beta_1=0.9, beta_2=0.999, decay=1e-6, lr=0.01)
# opt = SGD(lr=0.01)
# opt = SGD(lr=0.01, decay=1e-6, momentum=0.9, nesterov=True)
model.compile(optimizer=opt, loss='categorical_crossentropy', metrics=["accuracy"])

In [11]:
# SVG(model_to_dot(model).create(prog='dot', format='svg'))
model.summary()
print("#### Feature Model Below ####")
feature_model.summary()

_________________________________________________________________
Layer (type)                 Output Shape              Param #   
input-layer (InputLayer)     (None, 20, 80, 40, 1)     0         
_________________________________________________________________
conv1-1 (Conv3D)             (None, 18, 80, 36, 16)    256       
_________________________________________________________________
activation1-1 (PReLU)        (None, 18, 80, 36, 16)    829440    
_________________________________________________________________
conv1-2 (Conv3D)             (None, 16, 36, 36, 16)    6928      
_________________________________________________________________
activation1-2 (PReLU)        (None, 16, 36, 36, 16)    331776    
_________________________________________________________________
pool-1 (MaxPooling3D)        (None, 16, 36, 18, 16)    0         
_________________________________________________________________
conv2-1 (Conv3D)             (None, 14, 36, 15, 16)    3088      
__________

# Training


In [None]:
test_final_name='dataset_test.h5'
try:
    h5_file = h5.File(final_file, 'r')
    h5_file_test = h5.File(test_final_name, 'r')
    train_data,train_label=h5_file['features'],h5_file['labels']
    test_data,test_label=h5_file_test['features'],h5_file_test['labels']
    history=model.fit(batch_size=64,
              epochs=1,
              shuffle="batch",
              x=train_data,
              y=train_label,validation_data=(test_data,test_label))
finally:
    if h5_file:
        h5_file.close()
    if h5_file_test:
        h5_file_test.close()

Train on 117248 samples, validate on 6144 samples
Epoch 1/1

In [None]:
if h5_file_test:
    h5_file_test.close()

In [15]:
model.save("model/model-0911.h5")
# model.save_weights("model-weights-" + time_str + ".h5")

In [None]:
# Model Evaluation on softmax
# Test a split of dataset.
test_final_name='dataset_test.h5'
try:
    h5_file_test = h5.File(test_final_name, 'r')
    test_data,test_label=h5_file_test['features'][:],h5_file_test['labels'][:]
    evaluation = model.evaluate(batch_size=64,x=test_data,y=test_label)
    metrics_name = model.metrics_names
    eval_results = list([*zip(metrics_name, evaluation)])
    print(eval_results)
finally:
    if h5_file_test:
        h5_file_test.close()
    test_data,test_label=None,None

# Enrollment

In [None]:
# Todo, enrollment and similarity comparison

# Evaluation

In [None]:
# Evalution the model