# Bird Sound Classifier Training

This notebook implements a bird sound classifier for 5 species:

1. White-breasted Wood-Wren
2. House Sparrow
3. Red Crossbill
4. Chestnut-crowned Antpitta
5. Azara's Spinetail

In [None]:
import tensorflow as tf
import tensorflow_hub as hub
import numpy as np
import librosa
import os
from sklearn.model_selection import train_test_split
from tensorflow.keras import layers, models

In [None]:
# Load YAMNet model
yamnet_model = hub.load('https://tfhub.dev/google/yamnet/1')

In [None]:
def load_audio(file_path, target_sr=16000):
    """Load and preprocess audio file"""
    audio, sr = librosa.load(file_path, sr=target_sr)
    
    # Ensure 30 second length (16000 * 30 samples)
    target_length = 16000 * 30
    if len(audio) < target_length:
        audio = np.pad(audio, (0, target_length - len(audio)))
    else:
        audio = audio[:target_length]
        
    return audio

In [None]:
def extract_yamnet_features(audio):
    """Extract features using YAMNet"""
    scores, embeddings, mel_spec = yamnet_model(audio)
    return embeddings  # Using YAMNet's embeddings as features

In [None]:
def prepare_dataset(data_dir):
    """Prepare dataset from directory structure"""
    features = []
    labels = []
    
    for i, bird_species in enumerate(os.listdir(data_dir)):
        species_dir = os.path.join(data_dir, bird_species)
        if not os.path.isdir(species_dir):
            continue
            
        for audio_file in os.listdir(species_dir):
            if not audio_file.endswith('.wav'):
                continue
                
            audio_path = os.path.join(species_dir, audio_file)
            audio = load_audio(audio_path)
            embeddings = extract_yamnet_features(audio)
            
            # Average the embeddings over time
            feature_vector = tf.reduce_mean(embeddings, axis=0)
            features.append(feature_vector)
            labels.append(i)
    
    return np.array(features), np.array(labels)

In [None]:
def create_model():
    """Create bird classifier model"""
    model = models.Sequential([
        layers.Dense(256, activation='relu', input_shape=(1024,)),  # YAMNet embedding size is 1024
        layers.Dropout(0.3),
        layers.Dense(128, activation='relu'),
        layers.Dropout(0.3),
        layers.Dense(64, activation='relu'),
        layers.Dense(5, activation='softmax')  # 5 bird species
    ])
    
    model.compile(
        optimizer='adam',
        loss='sparse_categorical_crossentropy',
        metrics=['accuracy']
    )
    
    return model

In [None]:
# Load and prepare dataset
DATA_DIR = 'path_to_your_bird_sound_dataset'
features, labels = prepare_dataset(DATA_DIR)

# Split dataset
X_train, X_test, y_train, y_test = train_test_split(
    features, labels, test_size=0.2, random_state=42
)

In [None]:
# Create and train model
model = create_model()

history = model.fit(
    X_train, y_train,
    epochs=50,
    batch_size=32,
    validation_data=(X_test, y_test),
    callbacks=[tf.keras.callbacks.EarlyStopping(patience=5)]
)

In [None]:
# Evaluate model
test_loss, test_accuracy = model.evaluate(X_test, y_test)
print(f'Test accuracy: {test_accuracy:.4f}')

In [None]:
# Convert to TFLite
converter = tf.lite.TFLiteConverter.from_keras_model(model)
tflite_model = converter.convert()

# Save TFLite model
with open('bird_classifier.tflite', 'wb') as f:
    f.write(tflite_model)