In [47]:
pip install tensorflow_hub

Note: you may need to restart the kernel to use updated packages.


In [45]:
pip install pydub

Note: you may need to restart the kernel to use updated packages.


In [79]:
import tensorflow as tf
import tensorflow_hub as hub
import numpy as np
import soundfile as sf
import os
from pydub import AudioSegment
from pydub.silence import detect_nonsilent
import pandas as pd

In [80]:
# Load the YAMNet model
yamnet_model_handle = 'https://tfhub.dev/google/yamnet/1'
yamnet_model = hub.load(yamnet_model_handle)

# Read class name mapping file
class_map_path = "C:/Users/93978/Documents/GitHub/Project-Echo/Extracting Animal and Bird Sounds/yamnet_class_map/yamnet_class_map.csv" 
class_names = pd.read_csv(class_map_path, sep=',', header=None).values.squeeze()



In [81]:
def analyze_audio(file_path):
    waveform, sample_rate = sf.read(file_path, dtype=np.float32)
    if len(waveform.shape) > 1:
        waveform = np.mean(waveform, axis=1)
    scores, embeddings, log_mel_spectrogram = yamnet_model(waveform)
    return scores.numpy(), class_names



In [113]:
# load audio file
audio = AudioSegment.from_file("C:/Users/93978/Documents/GitHub/Project-Echo/Extracting Animal and Bird Sounds/original audio file/9.mp3")

# Set the parameter value according to the specific situation of the audio file
min_silence_len = 500  
silence_thresh = -30    

# Extract non-silent segments
non_silent_ranges = detect_nonsilent(
    audio, 
    min_silence_len=min_silence_len, 
    silence_thresh=silence_thresh
)

# Create a directory to save the extracted audio segments
os.makedirs("extracted_segments", exist_ok=True)

# Extract and save audio segments
for i, (start, end) in enumerate(non_silent_ranges):
    segment = audio[start:end]
    segment.export(f"extracted_segments/segment_{i}.wav", format="wav")


In [114]:
# Analyze and annotate extracted audio segments
for i, (start, end) in enumerate(non_silent_ranges):
    segment_path = f"extracted_segments/segment_{i}.wav"
    segment = audio[start:end]
    segment.export(segment_path, format="wav")
    
    # Analyze segment with YAMNet
    scores, class_names = analyze_audio(segment_path)
    
    # Get the top 3 predictions
    top_scores = scores.mean(axis=0)
    top_class_indices = top_scores.argsort()[-3:][::-1]
    top_class_names = class_names[top_class_indices]
    top_scores = top_scores[top_class_indices]
    
    print(f"Segment {i}:")
    for class_name, score in zip(top_class_names, top_scores):
        print(f"  - {class_name}: {score:.3f}")


Segment 0:
  - ['66' '/t/dd00013' 'Children playing']: 0.455
  - ['67' '/m/0jbk' 'Animal']: 0.363
  - ['68' '/m/068hy' 'Domestic animals, pets']: 0.270
Segment 1:
  - ['66' '/t/dd00013' 'Children playing']: 0.369
  - ['67' '/m/0jbk' 'Animal']: 0.313
  - ['68' '/m/068hy' 'Domestic animals, pets']: 0.281
Segment 2:
  - ['index' 'mid' 'display_name']: 0.135
  - ['67' '/m/0jbk' 'Animal']: 0.069
  - ['68' '/m/068hy' 'Domestic animals, pets']: 0.064
Segment 3:
  - ['493' '/m/07s12q4' 'Crunch']: 0.191
  - ['497' '/m/0hdsk' 'Chirp tone']: 0.190
  - ['474' '/m/07qcx4z' 'Tearing']: 0.102
Segment 4:
  - ['499' '/m/07pt_g0' 'Pulse']: 0.047
  - ['index' 'mid' 'display_name']: 0.046
  - ['131' '/m/032n05' 'Whale vocalization']: 0.034
Segment 5:
  - ['66' '/t/dd00013' 'Children playing']: 0.628
  - ['67' '/m/0jbk' 'Animal']: 0.504
  - ['68' '/m/068hy' 'Domestic animals, pets']: 0.476
Segment 6:
  - ['66' '/t/dd00013' 'Children playing']: 0.519
  - ['67' '/m/0jbk' 'Animal']: 0.366
  - ['68' '/m/068hy'