In [1]:
########################################################################################
# library imports
########################################################################################

# disable warnings
import warnings
warnings.filterwarnings("ignore")

# generic libraries
from platform import python_version
import functools
import diskcache as dc
import time
import hashlib
import numpy as np

# tensor flow / keras related libraries
import tensorflow as tf
import tensorflow_io as tfio
from keras.utils import dataset_utils

# image processing related libraries
import librosa 
import imageio
import PIL

# from platform import python_version
# import tensorflow as tf 
# import tensorflow_io as tfio
# import os
# import numpy as np
# import datetime
# import tensorflow_hub as hub

# from keras.utils import dataset_utils
# import matplotlib.pyplot as plt

# from IPython import display
# import librosa
# import glob
# import imageio
# import PIL
# import tensorflow as tf
# import time

# print system information
print('Python Version        : ', python_version())
print('TensorFlow Version    : ', tf.__version__)
print('TensorFlow IO Version : ', tfio.__version__)
print('Librosa Version       : ', librosa.__version__)

Python Version        :  3.8.16
TensorFlow Version    :  2.10.1
TensorFlow IO Version :  0.27.0
Librosa Version       :  0.10.0


In [2]:
########################################################################################
# system constants
########################################################################################
AUDIO_DATA_DIRECTORY = "d:/data/bc"
CACHE_DIRETORY       = "d:/pipeline_cache"

MODEL_INPUT_IMAGE_WIDTH = 256
MODEL_INPUT_IMAGE_HEIGHT = 256
MODEL_INPUT_IMAGE_CHANNELS = 3


In [3]:
########################################################################################
# Create a DiskCache instance
# This cache will allow us store intermediate function results to speed up the 
# data processing pipeline
########################################################################################
cache = dc.Cache(CACHE_DIRETORY, cull_limit=0, size_limit=10**9) 


########################################################################################
# a helper function to create a hash key from a function signature and arguments
########################################################################################
def create_function_key(func, *args, **kwargs):
    partial_func = functools.partial(func, *args, **kwargs)
    func_name = partial_func.func.__name__
    func_module = partial_func.func.__module__
    args_repr = repr(partial_func.args)
    kwargs_repr = repr(sorted(partial_func.keywords.items()))

    key = f"{func_module}.{func_name}:{args_repr}:{kwargs_repr}"
    # Use hashlib to create a hash of the key for shorter and consistent length
    key_hash = hashlib.sha256(key.encode()).hexdigest()

    return key, key_hash, partial_func


########################################################################################
# Execute a function and cache the result
# If already executed, retrieve function output from the cache instead
########################################################################################
def execute_cached_function(func, *args, **kwargs):
    key_string,key,partial_func = create_function_key(func, *args, **kwargs)
    #print(f'key: {key_string} {key}')
    # Check if the result is in the cache
    if key in cache:
        result = cache[key]
        print(f"Result loaded from cache: {result}")
    else:
        # If not in cache, call the slow operation and store the result in cache
        result = partial_func()
        cache[key] = result
        print(f"Result calculated and stored in cache: {result}")
    return result

In [4]:
########################################################################################
# these helper functions load the audio data into a 'dataset' 
########################################################################################
def paths_and_labels_to_dataset(image_paths, labels, num_classes):
    path_ds = tf.data.Dataset.from_tensor_slices(image_paths)
    img_ds = path_ds.map(
        lambda path: tf.io.read_file(path), 
        num_parallel_calls=tf.data.AUTOTUNE
    )
    label_ds = dataset_utils.labels_to_dataset(
        labels, 
        'categorical', 
        num_classes)
    img_ds = tf.data.Dataset.zip((img_ds, label_ds))
    return img_ds

def create_dataset(audio_files):
    image_paths, labels, class_names = dataset_utils.index_directory(
            audio_files,
            labels="inferred",
            formats=('.ogg','.mp3','.wav','.flac'),
            class_names=None,
            shuffle=False,
            seed=42,
            follow_links=False)

    dataset = paths_and_labels_to_dataset(
        image_paths=image_paths,
        labels=labels,
        num_classes=len(class_names))
    
    return dataset, class_names

In [14]:
# create the dataset
complete_dataset, class_names = create_dataset(AUDIO_DATA_DIRECTORY)
print("class names: ", class_names)


Found 524 files belonging to 5 classes.
class names:  ['brant', 'jabwar', 'sheowl', 'spodov', 'wiltur']


In [12]:
len(train_dataset)

524

In [6]:
# testing the cache works
class ArrayProcessor:
    def sum_plus_five(self, arr, v2):
        array_sum = np.sum(arr)
        return array_sum + 5.0 + v2

# Usage example
processor = ArrayProcessor()

# Create a 2D NumPy array
arr = np.random.rand(1024, 1024)

In [7]:
%timeit -r1 -n1 processor.sum_plus_five(arr, 18)

1.63 ms ± 0 ns per loop (mean ± std. dev. of 1 run, 1 loop each)


In [8]:
%timeit -r1 -n1 execute_cached_function(processor.sum_plus_five, arr, 18)

Result calculated and stored in cache: 524025.7365223917
6.75 ms ± 0 ns per loop (mean ± std. dev. of 1 run, 1 loop each)


In [9]:
%timeit -r1 -n1 execute_cached_function(processor.sum_plus_five,arr, 18)

Result loaded from cache: 524025.7365223917
955 µs ± 0 ns per loop (mean ± std. dev. of 1 run, 1 loop each)


In [10]:
def dataset_transforms(image, label):
    # reshape into standard 3 channels
    image = tf.io.parse_tensor(image, tf.float32)
    image = tf.expand_dims(image, -1)
    
    # most pre-trained model expect 3 color channels
    image = tf.repeat(image, MODEL_INPUT_IMAGE_CHANNELS, axis=2)
    
    image = tf.ensure_shape(image, [216, 128, MODEL_INPUT_IMAGE_CHANNELS])
    image = tf.image.resize(image, (MODEL_INPUT_IMAGE_WIDTH,MODEL_INPUT_IMAGE_HEIGHT), 
                            method=tf.image.ResizeMethod.LANCZOS5)
    
    # for some reason the melspecs seem rotated by 90 degrees. This corrects that.
    image = tf.image.rot90(image, k=1)
    
    # rescale to range [0,1]
    image = image - tf.reduce_min(image) 
    image = image / (tf.reduce_max(image)+0.00001)
    
    return image,label

In [11]:
train_dataset_b = ( 
                  train_dataset       
                  .shuffle(20000)
                  .map(dataset_transforms)
                  .batch(baseline_config.batch_size)
                  .cache()           
                )

validation_dataset_b = ( 
                  validation_dataset
                  .map(dataset_transforms)
                  .batch(baseline_config.batch_size)
                  .cache()
                )

test_dataset_b = ( 
                  test_dataset
                  .map(dataset_transforms)
                  .batch(baseline_config.batch_size)
                  .cache()
                )

train_dataset, class_names = create_dataset('TRAIN/')
test_dataset, _            = create_dataset('TEST/')
validation_dataset, _      = create_dataset('VALIDATION/')
print("class names: ", class_names)

NameError: name 'baseline_config' is not defined