In [9]:
import os
import tensorflow as tf
import tensorflow_hub as hub
from scipy.signal import resample_poly
import matplotlib.pyplot as plt
from IPython.display import display
from tensorflow.keras.layers import Input, Dense
from tensorflow.keras.models import Model


# Load YAMNet model
yamnet_model_handle = 'https://tfhub.dev/google/yamnet/1'
yamnet_model = hub.load(yamnet_model_handle)

# Constants
main_directory = '/Users/ankush/Downloads/deakin-units/data/b3'
class_names = sorted(os.listdir(main_directory))



In [10]:

# Function to get file paths and labels
def get_file_paths_and_labels(main_directory, class_names):
    filenames = []
    labels = []
    for label, class_name in enumerate(class_names):
        class_directory = os.path.join(main_directory, class_name)
        if os.path.isdir(class_directory): # Ensure it's a directory
            for file_name in os.listdir(class_directory):
                if file_name.endswith('.wav'):
                    filenames.append(os.path.join(class_directory, file_name))
                    labels.append(label)
    return filenames, labels
from scipy.signal import resample

def resample_audio(wav, num_samples):
    return resample(wav, num_samples)

def load_wav_16k_mono(filename, target_length=16000):
    """ Load a WAV file, convert it to a float tensor, resample to 16 kHz single-channel audio, and pad to target length. """
    file_contents = tf.io.read_file(filename)
    wav, sample_rate = tf.audio.decode_wav(
          file_contents,
          desired_channels=1)
    wav = tf.squeeze(wav, axis=-1)
    sample_rate = tf.cast(sample_rate, dtype=tf.float32)
    target_sample_rate = 16000.0

    # Compute the number of samples for the target sample rate
    num_samples = tf.cast(tf.shape(wav)[0], dtype=tf.float32) * target_sample_rate / sample_rate
    num_samples = tf.cast(num_samples, tf.int32)

    # Resample the wav using scipy resample
    resampled_wav = tf.numpy_function(resample_audio, [wav, num_samples], tf.float32)

    # Pad or truncate to target length
    resampled_wav = tf.cond(tf.shape(resampled_wav)[0] < target_length,
                            lambda: tf.pad(resampled_wav, [[0, target_length - tf.shape(resampled_wav)[0]]]),
                            lambda: resampled_wav[:target_length])

    return resampled_wav



# Import necessary libraries
from tensorflow.keras.layers.experimental import preprocessing

# Define the augmentation function
def audio_augmentation(wav_data):
    # Time shifting
    shift = tf.random.uniform([], -1600, 1600, dtype=tf.int32)
    wav_data = tf.roll(wav_data, shift, axis=0)
    # Adding noise (optional)
    noise = tf.random.normal(shape=tf.shape(wav_data), mean=0., stddev=0.1)
    wav_data = wav_data + noise
    return wav_data

# Modify the load_and_preprocess_data function
def load_and_preprocess_data(filename, label):
    wav_data = load_wav_16k_mono(filename)
    # Apply the augmentation
    wav_data = audio_augmentation(wav_data)
    scores, embeddings, _ = yamnet_model(wav_data)
    embeddings = tf.reduce_mean(embeddings, axis=0)  # Average across frames
    return embeddings, label




In [11]:

# Get file paths and labels
filenames, labels = get_file_paths_and_labels(main_directory, class_names)

filenames_ds = tf.data.Dataset.from_tensor_slices(filenames)
labels_ds = tf.data.Dataset.from_tensor_slices(labels)

# Zipping the datasets to create pairs of (filename, label)
main_ds = tf.data.Dataset.zip((filenames_ds, labels_ds))

# Apply loading and preprocessing
main_ds = main_ds.map(load_and_preprocess_data)

# Splitting the dataset
train_ds = main_ds.take(int(len(filenames) * 0.7))
test_ds = main_ds.skip(int(len(filenames) * 0.7))

train_ds = train_ds.cache().shuffle(1000).batch(32).prefetch(tf.data.AUTOTUNE)
test_ds = test_ds.cache().batch(32).prefetch(tf.data.AUTOTUNE)

In [12]:
from tensorflow.keras.layers import Input, Dense, Flatten
from tensorflow.keras.models import Sequential
from tensorflow.keras.layers import Dense
from tensorflow.keras.regularizers import l2



# Model definition
from tensorflow.keras.layers import Dropout

my_model = Sequential([
    Input(shape=(1024,), dtype=tf.float32, name='input_embedding'),
    Dense(512, activation='relu', kernel_regularizer=l2(0.01)),
    Dropout(0.5), # Adding dropout
    Dense(256, activation='relu'), # Additional hidden layer
    Dense(len(class_names))
], name='my_model')







my_model.summary()

my_model.compile(loss=tf.keras.losses.SparseCategoricalCrossentropy(from_logits=True),
                 optimizer="adam",
                 metrics=['accuracy'])


Model: "my_model"
_________________________________________________________________
 Layer (type)                Output Shape              Param #   
 dense_3 (Dense)             (None, 512)               524800    
                                                                 
 dropout_1 (Dropout)         (None, 512)               0         
                                                                 
 dense_4 (Dense)             (None, 256)               131328    
                                                                 
 dense_5 (Dense)             (None, 16)                4112      
                                                                 
Total params: 660240 (2.52 MB)
Trainable params: 660240 (2.52 MB)
Non-trainable params: 0 (0.00 Byte)
_________________________________________________________________


In [13]:
callback = tf.keras.callbacks.EarlyStopping(monitor='loss', patience=3, restore_best_weights=True)

history = my_model.fit(train_ds, epochs=50, callbacks=callback)

Epoch 1/50
Epoch 2/50
Epoch 3/50
Epoch 4/50
Epoch 5/50
Epoch 6/50
Epoch 7/50
Epoch 8/50
Epoch 9/50
Epoch 10/50
Epoch 11/50
Epoch 12/50
Epoch 13/50
Epoch 14/50
Epoch 15/50
Epoch 16/50
Epoch 17/50
Epoch 18/50
Epoch 19/50
Epoch 20/50
Epoch 21/50
Epoch 22/50
Epoch 23/50
Epoch 24/50
Epoch 25/50
Epoch 26/50
Epoch 27/50
Epoch 28/50
Epoch 29/50


In [14]:
loss, accuracy = my_model.evaluate(test_ds)

print("Loss: ", loss)
print("Accuracy: ", accuracy)


Loss:  8.64730167388916
Accuracy:  0.002109704539179802
