To implement a solution that works for your exact case, we would need more detailed information about your exact setup. However, I'll provide a more comprehensive code snippet below that you can adapt to your case. This assumes you have a directory structure where each subdirectory's name is the class label, and each subdirectory contains the corresponding audio files. The structure would look something like this:


- main_directory
    - class1
        - file1.wav
        - file2.wav
        ...
    - class2
        - file1.wav
        - file2.wav
        ...
    ...


import os
import librosa
import tensorflow as tf
import tensorflow_hub as hub
import numpy as np
from sklearn.model_selection import train_test_split
from sklearn.preprocessing import LabelEncoder
from tensorflow.keras import layers, models

# Load YAMNet model from TensorFlow Hub
yamnet_model_handle = 'https://tfhub.dev/google/yamnet/1'
yamnet_model = hub.load(yamnet_model_handle)

def load_data(main_directory):
    labels = []
    features = []

    # List all subdirectories
    for subdirectory in os.listdir(main_directory):
        print(f"Loading files in {subdirectory}")
        for filename in os.listdir(f"{main_directory}/{subdirectory}"):
            if filename.endswith('.wav'):
                # Load the file
                file_path = f"{main_directory}/{subdirectory}/{filename}"
                waveform, _ = librosa.load(file_path, sr=16000)

                # Make the file mono and the correct sample rate, truncate/pad to correct length
                waveform = waveform[:16000*3]

                # Extract features
                scores, embeddings, _ = yamnet_model(waveform)
                feature = tf.reduce_mean(embeddings, axis=0).numpy()

                # Append the feature and label to our lists
                features.append(feature)
                labels.append(subdirectory)

    # Encode labels
    le = LabelEncoder()
    encoded_labels = le.fit_transform(labels)

    return np.array(features), np.array(encoded_labels)

# Replace 'main_directory' with the path to your directory
features, labels = load_data('main_directory')

# Create train and test datasets
X_train, X_test, y_train, y_test = train_test_split(features, labels, test_size=0.2, random_state=42)

# Create a new model
num_classes = len(np.unique(labels))
model = models.Sequential([
    layers.Dense(128, activation='relu'),
    layers.Dense(num_classes, activation='softmax')
])

model.compile(optimizer='adam',
              loss=tf.keras.losses.SparseCategoricalCrossentropy(),
              metrics=['accuracy'])

# Train your model
model.fit(X_train, y_train, epochs=10, validation_data=(X_test, y_test))


In [None]:
# Evaluate the model on the test data using `evaluate`
print('\n# Evaluate on test data')
results = model.evaluate(X_test, y_test, batch_size=128)
print('test loss, test accuracy:', results)


In [None]:

# Load the file you want to predict on
file_path = 'path_to_your_file.wav'  # replace with your file path
feature = extract_embedding(file_path)

# Reshape the feature for prediction (since the model expects batches)
feature = np.expand_dims(feature, axis=0)

# Predict
prediction = model.predict(feature)

# The prediction will be an array of probabilities for each class
# We can get the class with the highest probability using argmax
predicted_class = np.argmax(prediction)

# If you want to convert the predicted class back to the original label
predicted_label = le.inverse_transform([predicted_class])

print('The predicted class is:', predicted_class)
print('The predicted label is:', predicted_label)