## Load labels

In [1]:
from pathlib import Path

data_path = Path("../data")
data_path

WindowsPath('../data')

### List all WAV files

In [2]:
wav_files = data_path.glob("*.wav")
wav_files = list(wav_files)
wav_files[:5]

[WindowsPath('../data/03-01-01-01-01-01-01.wav'),
 WindowsPath('../data/03-01-01-01-01-01-02.wav'),
 WindowsPath('../data/03-01-01-01-01-01-03.wav'),
 WindowsPath('../data/03-01-01-01-01-01-04.wav'),
 WindowsPath('../data/03-01-01-01-01-01-05.wav')]

### Map file names to their classes.

Each emotion is labelled as 01 - 08, so we convert that to labels 0 - 7

In [3]:
def class_from_file_name(fname):
    return int(fname.split('-')[2]) - 1

labels = {
    f.name: class_from_file_name(f.name)
    for f in wav_files
}
[(k, v) for k, v in labels.items()][:5]

[('03-01-01-01-01-01-01.wav', 0),
 ('03-01-01-01-01-01-02.wav', 0),
 ('03-01-01-01-01-01-03.wav', 0),
 ('03-01-01-01-01-01-04.wav', 0),
 ('03-01-01-01-01-01-05.wav', 0)]

### Extract number of classes

In [23]:
NUM_CLASSES = len(set(labels.values()))
NUM_CLASSES

8

## Load Data

### Fix PYTHONPATH

Add the path to the vgg-related files to the pythonpath so that we can import the modules

In [4]:
import os
import sys
nb_dir = Path(os.getcwd())
vgg_dir = nb_dir.parent / 'vgg'
vgg_dir

WindowsPath('D:/Work/playground/vgg-emotion-classifier/vgg')

In [5]:
if str(vgg_dir) not in sys.path:
    sys.path.append(str(vgg_dir))
sys.path

['',
 'C:\\Users\\Sam\\Anaconda3\\envs\\vggec\\python36.zip',
 'C:\\Users\\Sam\\Anaconda3\\envs\\vggec\\DLLs',
 'C:\\Users\\Sam\\Anaconda3\\envs\\vggec\\lib',
 'C:\\Users\\Sam\\Anaconda3\\envs\\vggec',
 'C:\\Users\\Sam\\Anaconda3\\envs\\vggec\\lib\\site-packages',
 'C:\\Users\\Sam\\Anaconda3\\envs\\vggec\\lib\\site-packages\\IPython\\extensions',
 'C:\\Users\\Sam\\.ipython',
 'D:\\Work\\playground\\vgg-emotion-classifier\\vgg']

### Read WAV files

Read in the wav files and convert them into the correct shape for the VGGish model (this is thankfully taken care of already by the example code provided)

In [6]:
from vggish_input import wavfile_to_examples 

In [7]:
data = {
    f.name: wavfile_to_examples(str(f))
    for f in wav_files
}

In [8]:
data['03-01-01-01-01-01-01.wav'].shape

(3, 96, 64)

### Split dataset

Split into

* train: 70%
* val: 15%
* test: 15%

In [10]:
from sklearn.model_selection import train_test_split

In [30]:
seed = 987234871

x_train_keys, x_test_keys, y_train, y_test = train_test_split(list(labels.keys()),
                                                    list(labels.values()),
                                                    test_size = 0.15,
                                                    random_state = seed)

x_train_keys, x_val_keys, y_train, y_val = train_test_split(x_train,
                                                            y_train,
                                                            test_size = 0.15 / (1 - 0.15),
                                                            random_state = seed)

print(f"Training size: {len(x_train_keys)}")
print(f"Validation size: {len(x_val_keys)}")
print(f"Training size: {len(x_test_keys)}")

Training size: 1008
Validation size: 216
Training size: 216


## Load pretrained model

In [14]:
checkpoint_path = '../pretrained_models/vggish_model.ckpt'

with tf.Graph().as_default(), tf.Session() as sess:
    
    embeddings = vggish_slim.define_vggish_slim(training=True)

    # Define a shallow classification model and associated training ops on top
    # of VGGish.
    with tf.variable_scope('emo-classifier'):
        # Add a fully connected layer with 100 units.
        num_units = 100
    
        
        seq = keras.layers.CuDNNLSTM(units, return_sequences=True)(embeddings)
        fc = slim.fully_connected(seq, num_units)

        # Add a classifier layer at the end, consisting of parallel logistic
        # classifiers, one per class. This allows for multi-class tasks.
        logits = slim.fully_connected(
            fc, NUM_CLASSES, activation_fn=None, scope='logits')
        tf.sigmoid(logits, name='prediction')

        # Add training ops.
        with tf.variable_scope('train'):
            global_step = tf.Variable(
                0, name='global_step', trainable=False,
                collections=[tf.GraphKeys.GLOBAL_VARIABLES,
                             tf.GraphKeys.GLOBAL_STEP])

            # Labels are assumed to be fed as a batch multi-hot vectors, with
            # a 1 in the position of each positive class label, and 0 elsewhere.
            labels = tf.placeholder(
                tf.float32, shape=(None, _NUM_CLASSES), name='labels')

            # Cross-entropy label loss.
            xent = tf.nn.sigmoid_cross_entropy_with_logits(
                logits=logits, labels=labels, name='xent')
            loss = tf.reduce_mean(xent, name='loss_op')
            tf.summary.scalar('loss', loss)

            # We use the same optimizer and hyperparameters as used to train VGGish.
            optimizer = tf.train.AdamOptimizer(
                learning_rate=vggish_params.LEARNING_RATE,
                epsilon=vggish_params.ADAM_EPSILON)
            optimizer.minimize(loss, global_step=global_step, name='train_op')

    # Initialize all variables in the model, and then load the pre-trained
    # VGGish checkpoint.
    sess.run(tf.global_variables_initializer())
    vggish_slim.load_vggish_slim_checkpoint(sess, FLAGS.checkpoint)

    # Locate all the tensors and ops we need for the training loop.
    features_tensor = sess.graph.get_tensor_by_name(
        vggish_params.INPUT_TENSOR_NAME)
    labels_tensor = sess.graph.get_tensor_by_name('mymodel/train/labels:0')
    global_step_tensor = sess.graph.get_tensor_by_name(
        'mymodel/train/global_step:0')
    loss_tensor = sess.graph.get_tensor_by_name('mymodel/train/loss_op:0')
    train_op = sess.graph.get_operation_by_name('mymodel/train/train_op')

    # The training loop.
    for _ in range(FLAGS.num_batches):
      (features, labels) = _get_examples_batch()
      [num_steps, loss, _] = sess.run(
          [global_step_tensor, loss_tensor, train_op],
          feed_dict={features_tensor: features, labels_tensor: labels})
      print('Step %d: loss %g' % (num_steps, loss))

INFO:tensorflow:Restoring parameters from ../pretrained_models/vggish_model.ckpt
