In [1]:
import os
import pandas as pd
import numpy as np
import matplotlib.pyplot as plt
import soundfile as sf
import yamnet.params as params
import yamnet.yamnet as yamnet_model
import librosa
import tempfile
from collections import defaultdict
# Load YAMNet model
yamnet = yamnet_model.yamnet_frames_model(params)
yamnet.load_weights('yamnet/yamnet.h5')
yamnet_classes = yamnet_model.class_names('yamnet/yamnet_class_map.csv')

frame_len = int(params.SAMPLE_RATE * 1)  # 1sec

# Read the whole audio file
filename = 'test.m4a'
data, sr = librosa.load(filename, sr=params.SAMPLE_RATE)

# Split the audio data into 1 second chunks
chunks = np.array_split(data, len(data) // frame_len)

# Dataframe to store the results
df = pd.DataFrame(columns=['start_time', 'end_time', 'yamnet_label', 'yamnet_probability', 'your_model_label', 'your_model_probability'])


plt.ion()
for cnt, frame_data in enumerate(chunks):
    print(len(frame_data))
    start_time = cnt
    end_time = cnt + 1

    # model prediction
    scores, melspec = yamnet.predict(np.reshape(frame_data, [1, -1]), steps=1)
    yamnet_prediction = np.mean(scores, axis=0)

    # visualize input audio
    plt.imshow(melspec.T, cmap='jet', aspect='auto', origin='lower')
    plt.pause(0.001)
    plt.show()

    top5_i = np.argsort(yamnet_prediction)[::-1][:5]

    # If the top prediction is 'Animal', save the audio segment and send it to your model
    if yamnet_classes[top5_i[0]] == 'Animal' and yamnet_prediction[top5_i[0]] > 0.3:
        # Pad the audio if it's shorter than 1 second
        if len(frame_data) < frame_len:
            padding = frame_len - len(frame_data)
            frame_data = np.pad(frame_data, (0, padding), 'constant')

        # Create a temporary file to store the frame data
        with tempfile.NamedTemporaryFile(suffix=".wav", delete=True) as temp_audio_file:
            sf.write(temp_audio_file.name, frame_data, params.SAMPLE_RATE)
            # Reload the audio file as a binary file
            with open(temp_audio_file.name, 'rb') as binary_file:
                # Get prediction from your model
                your_model_prediction, your_model_probability = predict_on_audio(binary_file.read())

            # Add the results to the DataFrame
            df = df.append({
                'start_time': start_time, 
                'end_time': end_time,
                'yamnet_label': 'Animal',
                'yamnet_probability': yamnet_prediction[top5_i[0]],
                'your_model_label': your_model_prediction,
                'your_model_probability': your_model_probability
            }, ignore_index=True)
# ...

    else:
        # Add the results to the DataFrame
        df = df.append({
            'start_time': start_time, 
            'end_time': end_time,
            'yamnet_label': yamnet_classes[top5_i[0]],
            'yamnet_probability': yamnet_prediction[top5_i[0]],
            'your_model_label': None,
            'your_model_probability': None
        }, ignore_index=True)

# print the DataFrame
print(df)

ModuleNotFoundError: No module named 'yamnet'