In [1]:
import os
import argparse
import time
from sklearn.preprocessing import LabelEncoder
from sklearn.model_selection import train_test_split
import numpy as np
from tensorflow import keras
from tensorflow.keras.callbacks import EarlyStopping, ModelCheckpoint

# Local Definitions
from data_generator import generate_mel_spectrogram
from models import CNN_Model, Dense_Model, AttentionRNN_Model
from helper_q_tool import generate_quantum_speech, plot_accuracy_loss, display_speech

In [2]:
# Set the GPU to be used
os.environ["CUDA_VISIBLE_DEVICES"] = "1"

# Generate a unique timestamp for current run
current_time_stamp = time.strftime("%m%d_%H%M")

# Paths
training_audio_path = 'dataset/'
data_save_path = "data_quantum/"  # Data saving folder

In [3]:
speech_commands = ["bed","cat","dog","five","happy","left","marvin","sheila","six","stop"]

In [4]:
# parser.add_argument("--eps", type = int, default = 30, help = "Epochs")
# parser.add_argument("--bsize", type = int, default = 16, help = "Batch Size")
# parser.add_argument("--sr", type = int, default = 16000, help = "Sampling Rate for input Speech")
# parser.add_argument("--net", type = int, default = 1, help = "(0) Dense Model, (1) U-Net RNN Attention")
# parser.add_argument("--mel", type = int, default = 0, help = "(0) Load Demo Features, (1) Extra Mel Features")
# parser.add_argument("--quanv", type = int, default = 0, help = "(0) Load Demo Features, (1) Extra Mel Features")
# parser.add_argument("--port", type = int, default = 100, help = "(1/N) data ratio for encoding ")
epochs=30
batch_size=16
sampling_rate=16000
model_type='UNET'
mel_option='MEL'
quantum_option='MEL'
partition_ratio=100

In [5]:
def generate_training_data(labels, training_audio_path, sampling_rate, partition_ratio):
    all_wave, all_label = generate_mel_spectrogram(labels, training_audio_path, sampling_rate, partition_ratio)

    label_encoder = LabelEncoder()
    y = label_encoder.fit_transform(all_label)
    classes = list(label_encoder.classes_)
    y = keras.utils.to_categorical(y, num_classes=len(labels))

    x_train, x_valid, y_train, y_valid = train_test_split(
        np.array(all_wave), np.array(y), stratify=y, test_size=0.2, random_state=777, shuffle=True
    )
    height_feature, width_feature, _ = x_train[0].shape
    np.save(data_save_path + "n_x_train_speech.npy", x_train)
    np.save(data_save_path + "n_x_test_speech.npy", x_valid)
    np.save(data_save_path + "n_y_train_speech.npy", y_train)
    np.save(data_save_path + "n_y_test_speech.npy", y_valid)
    print("=== Feature Shape:", height_feature, width_feature)

    return x_train, x_valid, y_train, y_valid

In [6]:
def generate_quantum_features(x_train, x_valid, kernel_radius):
    print("Kernel Radius =", kernel_radius)
    q_train, q_valid = generate_quantum_speech(x_train, x_valid, kernel_radius)

    np.save(data_save_path + "q_train.npy", q_train)
    np.save(data_save_path + "q_test.npy", q_valid)

    return q_train, q_valid

In [7]:
x_train, x_valid, y_train, y_valid = generate_training_data(speech_commands, training_audio_path, sampling_rate, partition_ratio)

100%|██████████| 10/10 [00:11<00:00,  1.10s/it]

=== Feature Shape: 60 126





In [8]:
x_train = np.load(data_save_path + "n_x_train_speech.npy")
x_valid = np.load(data_save_path + "n_x_test_speech.npy")
y_train = np.load(data_save_path + "n_y_train_speech.npy")
y_valid = np.load(data_save_path + "n_y_test_speech.npy")

In [None]:
q_train, q_valid = generate_quantum_features(x_train, x_valid, 2)

Kernel Radius = 2


Quantum pre-processing of train Speech::  15%|█▍        | 22/149 [02:28<14:33,  6.88s/it]

In [9]:
q_train = np.load(data_save_path + "q_train.npy")
q_valid = np.load(data_save_path + "q_test.npy")

FileNotFoundError: [Errno 2] No such file or directory: 'data_quantum/q_train.npy'

In [10]:
early_stop = EarlyStopping(monitor='val_loss', mode='min', verbose=1, patience=10, min_delta=0.0001)
checkpoint = ModelCheckpoint('checkpoints/best_demo.hdf5', monitor='val_acc', verbose=1, save_best_only=True, mode='max')

Pick a model

In [11]:
cnn_model = CNN_Model(x_train.shape[0], x_train.shape[1], speech_commands)
cnn_model.summary()
cnn_history = cnn_model.fit(
    x=x_train,
    y=y_train,
    epochs=epochs,
    callbacks=[checkpoint],
    batch_size=batch_size,
    validation_data=(x_valid,y_valid)
)
cnn_model.save('checkpoints/' + current_time_stamp + '_cnn.keras')

  super().__init__(name, **kwargs)


In [None]:
quantum_cnn_model = CNN_Model(q_train.shape[0], q_train.shape[1], speech_commands)
quantum_cnn_model.summary()
quantum_cnn_history = quantum_cnn_model.fit(
    x=q_train,
    y=y_train,
    epochs=epochs,
    callbacks=[checkpoint],
    batch_size=batch_size,
    validation_data=(q_valid,y_valid)
)
quantum_cnn_model.save('checkpoints/' + current_time_stamp + '_quantum_cnn.keras')

In [None]:
attrnn_model = AttentionRNN_Model(x_train[0], speech_commands)
attrnn_model.summary()
attrnn_history = attrnn_model.fit(
    x=x_train,
    y=y_train,
    epochs=epochs,
    callbacks=[checkpoint],
    batch_size=batch_size,
    validation_data=(x_valid,y_valid)
)
attrnn_model.save('checkpoints/' + current_time_stamp + '_rnn.keras')

In [None]:
quantum_attrnn_model = AttentionRNN_Model(q_train[0], speech_commands)
quantum_attrnn_model.summary()
quantum_attrnn_history = quantum_attrnn_model.fit(
    x=q_train,
    y=y_train,
    epochs=epochs,
    callbacks=[checkpoint],
    batch_size=batch_size,
    validation_data=(q_valid,y_valid)
)
quantum_attrnn_model.save('checkpoints/' + current_time_stamp + '_quantum_attrnn.keras')

In [15]:
print("=== Batch Size: ", batch_size)

=== Batch Size:  16


In [None]:
display_speech(x_train, q_train, True)

In [None]:
display_speech(x_train, q_train, False)

In [None]:
plot_accuracy_loss(attrnn_history, quantum_attrnn_history, cnn_history, quantum_cnn_history, 'lol')