In [None]:
#default_exp audio_classification

In [None]:
#export
import os
import json
import math
import tensorflow as tf
import numpy as np
import functools

2021-07-24 15:05:41.211523: I tensorflow/stream_executor/platform/default/dso_loader.cc:53] Successfully opened dynamic library libcudart.so.11.0


In [None]:
#export

import audioengine
from audioengine.utils.gpu import list_all_gpus, set_gpu_list_memory_limit
from audioengine.utils.schema import verify_audioengine_dataset, verify_audioengine_internal_audio_representation_schema
from audioengine.utils.misc import log_init, log_info, log_debug, log_error, pad_up_to
from audioengine.models import Simple1DConvNet
from audioengine.utils.wav_utils import get_max_samples_in_wav_from_directory

In [None]:
DEBUG = True

In [None]:
if(DEBUG == True):
    #physical_gpus_list = list_all_gpus()
    #set_gpu_list_memory_limit(physical_gpus_list, limit=(2**13+2**11)) #8192
    log_init()

[INFO] Logging now set to file: /project/Development/ML/audio/logs/audioengine.log with level DEBUG


# Import Dataset

This notebook will be using the single word dataset which is a subset of the common voice dataset

In [None]:
DATASET_DIRECTORY = '/project/Datasets/audioengine_single_word'
DATASET_NAME = 'train.json'
VALIDATION_DATASET_NAME = 'val.json'
AUDIO_CLIPS_DIR_NAME = 'clips'

DATASET_JSON_FILEPATH = os.path.join(DATASET_DIRECTORY, DATASET_NAME)
VALIDATION_DATASET_JSON_FILEPATH = os.path.join(DATASET_DIRECTORY, VALIDATION_DATASET_NAME)
AUDIO_CLIPS_FULL_DIR_PATH = os.path.join(DATASET_DIRECTORY, AUDIO_CLIPS_DIR_NAME)

dataset_json_fp = open(DATASET_JSON_FILEPATH, 'r')
dataset_json = json.load(dataset_json_fp)
dataset_json_fp.close()

validation_dataset_json_fp = open(VALIDATION_DATASET_JSON_FILEPATH, 'r')
validation_dataset_json = json.load(validation_dataset_json_fp)
validation_dataset_json_fp.close()

print(len(validation_dataset_json['audio']))

if(not verify_audioengine_dataset(dataset_json)):
    log_critical('The dataset does not match the schema!')
else:
    log_debug('The dataset matches the schema')
    
def create_ir_json(partial_json: dict, audio_clip_directory: str, length_to_pad_to: int) -> dict:
    audio_data_id = partial_json['id']
    file_name = partial_json['file_name']
    full_audio_clip_filepath = os.path.join(audio_clip_directory, file_name)
    contents = tf.io.read_file(full_audio_clip_filepath)
    audio_data, _ = tf.audio.decode_wav(contents)
    audio_data = tf.squeeze(audio_data, axis=1)
    audio_data = pad_up_to(audio_data, (length_to_pad_to,), 0)
    audio_data = tf.expand_dims(audio_data, axis=-1)


    ir_record_json = {'audio_data': audio_data,
                      'length': tf.shape(audio_data)[0],
                      'id': audio_data_id,
                      'file_name': file_name,
                      'category_id': partial_json['category_id']
                     }
    return ir_record_json

def convert_labels_list_to_tensor(label_list):
    label_tensor = tf.cast(tf.convert_to_tensor(label_list), tf.float32)
    one_tensor = tf.constant(1, dtype=tf.float32)
    return tf.cast(label_tensor - one_tensor, tf.int32)

def convert_classification_audioengine_dataset_to_IR_generator(dataset_json: dict = {}, audio_clip_directory: str = '',
                                                               batch_size: int = 256) -> list:
    '''
    This uses too much memory to hold the whole dataset at once
    Need to use generators instead.
    '''
    #Really needs multiprocessing in the future
    if(not dataset_json['info']['task'] == 'classification'):
        log_critical('Dataset not using classification task')
    else:
        log_debug('Dataset using classification task')
    
    audio_dataset_section_json = dataset_json['audio']
    
    #Batch it here
    num_batches = math.floor((len(audio_dataset_section_json) - (len(audio_dataset_section_json) % batch_size)) / batch_size)
    left_over = len(audio_dataset_section_json) % batch_size
    
    #ir_list = []
    max_length = get_max_samples_in_wav_from_directory(audio_clip_directory)
    for i in range(num_batches):
        batch_ir_list = []
        batch_features_list = []
        batch_labels_list = []
        for j in range(batch_size):
            partial_json = audio_dataset_section_json[(i*batch_size)+j]
            ir_record_json = create_ir_json(partial_json, audio_clip_directory, max_length)
            batch_labels_list.append(ir_record_json['category_id'])
            batch_features_list.append(ir_record_json['audio_data'])
            batch_ir_list.append(ir_record_json.copy())
        batch_features_tensor = tf.cast(tf.stack(batch_features_list, axis=0), tf.float32)
        batch_labels_tensor = tf.cast(convert_labels_list_to_tensor(batch_labels_list), tf.int32)
        #yield (batch_ir_list, batch_features_tensor, batch_labels_tensor)
        yield (batch_features_tensor, batch_labels_tensor)
    #if(left_over):
    #    partial_json_list = audio_dataset_section_json[-1:-left_over]
    #    batch_ir_list = []
    #    batch_features_list = []
    #    batch_labels_list = []
    #    for idx, partial_json in enumerate(partial_json_list):
    #        ir_record_json = create_ir_json(partial_json, audio_clip_directory, max_length)
    #        batch_labels_list.append(ir_record_json['category_id'])
    #        batch_features_list.append(ir_record_json['audio_data'])
    #        batch_ir_list.append(ir_record_json.copy())
    #    batch_features_tensor = tf.cast(tf.stack(batch_features_list, axis=0), tf.float32)
    #    batch_labels_tensor = tf.cast(convert_labels_list_to_tensor(batch_labels_list), tf.int32)
    #    #yield (batch_ir_list, batch_features_tensor, batch_labels_tensor)
    #    yield (batch_features_tensor, batch_labels_tensor)

24509


In [None]:
#Construct the dataset
BATCH_SIZE = 8
VALIDATION_BATCH_SIZE = 8
EPOCHS_COUNT = 256
STEPS_PER_EPOCH = BATCH_SIZE * 256

MODEL_SAVE_DIR = '/project/Development/ML/audio/models/final/'
MODEL_CHECKPOINT_DIR = '/project/Development/ML/audio/models/checkpoints/'
if(not os.path.exists(MODEL_SAVE_DIR)):
    os.makedirs(MODEL_SAVE_DIR, exist_ok=True)
else:
    pass
if(not os.path.exists(MODEL_CHECKPOINT_DIR)):
    os.makedirs(MODEL_CHECKPOINT_DIR, exist_ok=True)
else:
    pass

#Get more information about the dataset
num_classes = len(dataset_json['categories'])
max_length = get_max_samples_in_wav_from_directory(AUDIO_CLIPS_FULL_DIR_PATH)
input_dimension = (BATCH_SIZE, max_length, 1)

validation_input_dimension = (VALIDATION_BATCH_SIZE, max_length, 1)
validation_label_shape = (VALIDATION_BATCH_SIZE, )

    
#setup model
model = Simple1DConvNet(num_classes=num_classes, 
                        input_dimension=input_dimension, 
                        batch_input_shape=input_dimension)

# setup loss and optimizer
lr_schedule = tf.keras.optimizers.schedules.ExponentialDecay(initial_learning_rate=1.0,
                                                          decay_steps=10000,
                                                          decay_rate=0.9)
optimizer = tf.keras.optimizers.Adadelta(learning_rate=lr_schedule)
loss_fn = tf.keras.losses.SparseCategoricalCrossentropy(from_logits=False)

def train_model(model, dataset_generator, validation_dataset: tf.data.Dataset, 
                num_epochs: int = 0, 
                log_step_count: int = 200, dataset_json: dict = {},
                audio_clip_directory: str = '', batch_size: int = 8,
                batch_input_dimension: tuple = (),
                label_tensor_shape: tuple = (),
                callbacks: list = [],
                steps_per_epoch: int = 0):
    generator_partial = functools.partial(dataset_generator, dataset_json=dataset_json, 
                                          audio_clip_directory=audio_clip_directory, batch_size=batch_size)
    dataset = tf.data.Dataset.from_generator(generator_partial, output_signature=(tf.TensorSpec(shape=batch_input_dimension, dtype=tf.float32),
                                                                                  tf.TensorSpec(shape=label_tensor_shape, dtype=tf.int32))).repeat()
    model.fit(x=dataset, epochs=num_epochs, callbacks=callbacks, validation_data=validation_dataset, 
              steps_per_epoch=steps_per_epoch)

early_stopping_callback = tf.keras.callbacks.EarlyStopping(monitor='val_loss', mode='min', patience=10, restore_best_weights=True)
model_checkpoint_callback = tf.keras.callbacks.ModelCheckpoint(
    MODEL_CHECKPOINT_DIR, monitor='val_loss', save_best_only=True,
    save_weights_only=False, mode='min', save_freq='epoch')



model.compile(optimizer=optimizer, loss=loss_fn, metrics=['accuracy'])

validation_data_generator_partial = functools.partial(convert_classification_audioengine_dataset_to_IR_generator, 
                                                      dataset_json=validation_dataset_json, 
                                                      audio_clip_directory=AUDIO_CLIPS_FULL_DIR_PATH, 
                                                      batch_size=VALIDATION_BATCH_SIZE)
validation_dataset = tf.data.Dataset.from_generator(validation_data_generator_partial, 
                                                    output_signature=(tf.TensorSpec(shape=validation_input_dimension, dtype=tf.float32),
                                                    tf.TensorSpec(shape=validation_label_shape, dtype=tf.int32)))

train_model(model, convert_classification_audioengine_dataset_to_IR_generator, validation_dataset,
            num_epochs=EPOCHS_COUNT, log_step_count=1, 
            dataset_json=dataset_json, 
            audio_clip_directory=AUDIO_CLIPS_FULL_DIR_PATH,
            batch_size=BATCH_SIZE,
            batch_input_dimension=input_dimension,
            label_tensor_shape=(BATCH_SIZE,),
            callbacks = [early_stopping_callback, model_checkpoint_callback],
            steps_per_epoch=STEPS_PER_EPOCH)
model.summary()

2021-07-24 15:05:44.044826: I tensorflow/stream_executor/platform/default/dso_loader.cc:53] Successfully opened dynamic library libcuda.so.1
2021-07-24 15:05:44.109054: I tensorflow/stream_executor/cuda/cuda_gpu_executor.cc:937] successful NUMA node read from SysFS had negative value (-1), but there must be at least one NUMA node, so returning NUMA node zero
2021-07-24 15:05:44.109847: I tensorflow/core/common_runtime/gpu/gpu_device.cc:1733] Found device 0 with properties: 
pciBusID: 0000:09:00.0 name: NVIDIA GeForce GTX 1080 Ti computeCapability: 6.1
coreClock: 1.582GHz coreCount: 28 deviceMemorySize: 10.91GiB deviceMemoryBandwidth: 451.17GiB/s
2021-07-24 15:05:44.109865: I tensorflow/stream_executor/platform/default/dso_loader.cc:53] Successfully opened dynamic library libcudart.so.11.0
2021-07-24 15:05:44.111801: I tensorflow/stream_executor/platform/default/dso_loader.cc:53] Successfully opened dynamic library libcublas.so.11
2021-07-24 15:05:44.111840: I tensorflow/stream_executor

Epoch 1/256


2021-07-24 15:05:46.274653: I tensorflow/stream_executor/platform/default/dso_loader.cc:53] Successfully opened dynamic library libcudnn.so.8
2021-07-24 15:05:46.486620: I tensorflow/stream_executor/cuda/cuda_dnn.cc:359] Loaded cuDNN version 8101
2021-07-24 15:05:46.692895: I tensorflow/stream_executor/platform/default/dso_loader.cc:53] Successfully opened dynamic library libcublas.so.11
2021-07-24 15:05:46.860638: I tensorflow/stream_executor/platform/default/dso_loader.cc:53] Successfully opened dynamic library libcublasLt.so.11
2021-07-24 15:05:46.981311: W tensorflow/core/common_runtime/bfc_allocator.cc:271] Allocator (GPU_0_bfc) ran out of memory trying to allocate 3.31GiB with freed_by_count=0. The caller indicates that this is not a failure, but may mean that there could be performance gains if more memory were available.
2021-07-24 15:05:47.049556: W tensorflow/core/common_runtime/bfc_allocator.cc:271] Allocator (GPU_0_bfc) ran out of memory trying to allocate 3.31GiB with free



2021-07-24 15:16:47.057159: W tensorflow/core/framework/cpu_allocator_impl.cc:80] Allocation of 1767899136 exceeds 10% of free system memory.
2021-07-24 15:16:47.922256: W tensorflow/core/framework/cpu_allocator_impl.cc:80] Allocation of 1767899136 exceeds 10% of free system memory.
2021-07-24 15:16:50.624442: W tensorflow/python/util/util.cc:348] Sets are not currently considered sequences, but this may change in the future, so consider avoiding using them.
2021-07-24 15:16:51.959800: W tensorflow/core/framework/cpu_allocator_impl.cc:80] Allocation of 1767899136 exceeds 10% of free system memory.
2021-07-24 15:16:52.504062: W tensorflow/core/framework/cpu_allocator_impl.cc:80] Allocation of 1767899136 exceeds 10% of free system memory.
2021-07-24 15:16:53.047116: W tensorflow/core/framework/cpu_allocator_impl.cc:80] Allocation of 1767899136 exceeds 10% of free system memory.


INFO:tensorflow:Assets written to: /project/Development/ML/audio/models/checkpoints/assets
Epoch 2/256
INFO:tensorflow:Assets written to: /project/Development/ML/audio/models/checkpoints/assets
Epoch 3/256
INFO:tensorflow:Assets written to: /project/Development/ML/audio/models/checkpoints/assets
Epoch 4/256
Epoch 5/256
INFO:tensorflow:Assets written to: /project/Development/ML/audio/models/checkpoints/assets
Epoch 6/256
Epoch 7/256
Epoch 8/256
Epoch 9/256
Epoch 10/256
INFO:tensorflow:Assets written to: /project/Development/ML/audio/models/checkpoints/assets
Epoch 11/256
Epoch 12/256

KeyboardInterrupt: 

In [None]:
model.save(MODEL_SAVE_DIR)