In [3]:
### ADLFRAMEWORK
import adlframework
from adlframework.retrievals.BlobLocalCache import BlobLocalCache
from adlframework.datasource import DataSource
from adlframework.dataentity.audio_de import AudioRecordingDataEntity
from adlframework.experiment import SimpleExperiment
from adlframework.processors.general_processors import reshape, to_np_arr
from adlframework.filters.general_filters import min_array_shape
### KERAS
from keras.losses import KLD, MAE
from keras.optimizers import Adadelta, Adam
import keras.backend as K
from keras.models import Sequential
from keras.layers import *
from keras.callbacks import ModelCheckpoint, TensorBoard
from keras.utils.training_utils import multi_gpu_model
### UTILS
import pdb
import numpy as np
from functools import partial
import tensorflow as tf
from tqdm import tqdm_notebook as tqdm

In [4]:
config = tf.ConfigProto()
config.gpu_options.allow_growth=True
tfsession = tf.Session(config=config)
K.set_session(tfsession)

In [5]:
### Hyperparameters
input_window = 3 # seconds
input_time = 3
drop_out_prob = .2
stride = 8
padding = 'same'
activation = 'relu'
optimizer = 'adam'
loss = 'MAE'
epochs = 100
steps_per_epoch = 100
val_steps = 2

fs = 44100
input_shape = (fs*input_window, 1)

## DATA!

In [6]:
controllers = [to_np_arr, partial(reshape, shape=(-1, 1)), partial(min_array_shape, min_shape=input_shape)]

In [7]:
cache_path = 'local_cache/AVEC/'
train_retrieval = BlobLocalCache(cache_path+'wav/train', cache_path+'labels/train')
val_retrieval = BlobLocalCache(cache_path+'wav/dev', cache_path+'labels/dev')
# test_retrieval = BlobLocalCache(cache_path+'wav/test', cache_path+'labels/test')
epochs = 100
max_mem = .5

Retrieval not named, so won't be cached.
Retrieval not named, so won't be cached.


In [8]:
universal_args = {
    'window_size': input_window,
    'timestamp_column': 'Timestamps',
    'sampling_method': 'linear_interpolation',
    'ignore_cache': True,
    'verbosity': 3,
    'max_mem_percent': max_mem,
    'controllers': controllers,
    'batch_size': 100,
    'workers': 16
}

In [9]:
## Creating and splitting datasets
train_ds = DataSource(train_retrieval, AudioRecordingDataEntity,
                     **universal_args)

val_ds = DataSource(val_retrieval, AudioRecordingDataEntity,
                    **universal_args)

# test_ds = DataSource(test_retrieval, AudioRecordingDataEntity,
# 						ignore_cache=True,
# 						batch_size=30,
# 						verbosity=3,
#                         max_mem_percent=max_mem,
#                         workers=workers,
#                         controllers=controllers,
#                     **universal_args)

In [10]:
### Callbacks
callbacks = [#ModelCheckpoint('weights/weights.{epoch:02d}-{val_loss:.2f}.hdf5'),
            TensorBoard(log_dir='./Graph', histogram_freq=0, write_graph=True, write_images=True)]

## Define Network

In [11]:
with tf.device("/cpu:0"):
    model = Sequential()
    model.add(Conv1D(240, 128, strides=stride, padding=padding, input_shape=input_shape))
    model.add(BatchNormalization())
    model.add(Activation(activation))
    model.add(MaxPooling1D(pool_size=8))

    model.add(Conv1D(360, 64, strides=stride, padding=padding))
    model.add(BatchNormalization())
    model.add(Activation(activation))
    model.add(MaxPooling1D(pool_size=8))

    model.add(Dropout(rate=drop_out_prob))
    model.add(Conv1D(512, 32, strides=stride, padding=padding))
    model.add(BatchNormalization())
    model.add(Activation(activation))

    model.add(Dropout(rate=drop_out_prob))
    model.add(Conv1D(1024, 16, strides=stride, padding=padding))
    model.add(BatchNormalization())
    model.add(Activation(activation))

    model.add(Dropout(rate=drop_out_prob))
    model.add(Conv1D(512, 8, strides=stride, padding=padding))
    model.add(BatchNormalization())
    model.add(Activation(activation))

    model.add(Flatten())
    model.add(Dense(256, activation=activation))
    model.add(Dropout(rate=drop_out_prob))
    model.add(Dense(2, activation=activation, name='emotion_cnn_output'))

In [12]:
# make the model parallel
model = multi_gpu_model(model, gpus=8)

In [13]:
# we'll store a copy of the model on *every* GPU and then combine
# the results from the gradient updates on the CPU
# initialize the model
model.compile(optimizer, loss)

## Train!

In [14]:
model.fit_generator(train_ds,
                    steps_per_epoch=steps_per_epoch,
                    epochs=epochs,
                    callbacks=callbacks,
                    validation_data=val_ds,
                    validation_steps=val_steps)

Epoch 1/100
Epoch 2/100
Epoch 3/100
Epoch 4/100
Epoch 5/100
Epoch 6/100
Epoch 7/100
Epoch 8/100
Epoch 9/100
Epoch 10/100
Epoch 11/100

KeyboardInterrupt: 