<a href="https://colab.research.google.com/github/Tisha-patel-2005/Basic-repo/blob/main/main1.ipynb" target="_parent"><img src="https://colab.research.google.com/assets/colab-badge.svg" alt="Open In Colab"/></a>

In [68]:
# Create project directories

!mkdir -p BirdAudioProject
!mkdir -p BirdAudioProject/static/css
!mkdir -p BirdAudioProject/static/js
!mkdir -p BirdAudioProject/static/data
!mkdir -p BirdAudioProject/static/audio
!mkdir -p BirdAudioProject/static/images
!mkdir -p BirdAudioProject/templates
!mkdir -p BirdAudioProject/models

In [69]:
# Install required packages

!pip install flask librosa tensorflow scikit-learn matplotlib numpy pandas flask-ngrok pyngrok



In [70]:
# Run this in your Google Colab notebook

%%writefile data_collector.py

import os
import requests
import pandas as pd
from tqdm import tqdm
import time

def download_xeno_canto_data(species_list, output_dir, max_recordings_per_species=10):
    """
    Download bird audio recordings from Xeno-Canto API.

    Args:
        species_list (list): List of bird species to download
        output_dir (str): Directory to save downloaded recordings
        max_recordings_per_species (int): Maximum number of recordings per species
    """
    base_url = "https://www.xeno-canto.org/api/2/recordings"

    # Create output directory if it doesn't exist
    os.makedirs(output_dir, exist_ok=True)

    for species in species_list:
        print(f"Downloading recordings for {species}...")

        # Create species directory
        species_dir = os.path.join(output_dir, species.replace(" ", "_"))
        os.makedirs(species_dir, exist_ok=True)

        # Query Xeno-Canto API
        query = f"?query={species} q:A"  # q:A means high quality recordings
        response = requests.get(base_url + query)

        if response.status_code == 200:
            data = response.json()
            recordings = data.get('recordings', [])

            # Limit number of recordings
            recordings = recordings[:min(len(recordings), max_recordings_per_species)]

            print(f"Found {len(recordings)} recordings for {species}")

            # Download each recording
            for i, recording in enumerate(recordings):
                try:
                    # Get download link
                    file_url = recording.get('file')
                    if not file_url:
                        continue

                    # Extract file name and extension
                    file_name = f"{species.replace(' ', '_')}_{i+1}.mp3"
                    file_path = os.path.join(species_dir, file_name)

                    # Download file
                    print(f"Downloading {file_name}...")
                    audio_response = requests.get(file_url)

                    if audio_response.status_code == 200:
                        with open(file_path, 'wb') as f:
                            f.write(audio_response.content)
                        print(f"Downloaded {file_name}")
                    else:
                        print(f"Failed to download {file_name}")

                    # Add a small delay to avoid overwhelming the server
                    time.sleep(1)

                except Exception as e:
                    print(f"Error downloading recording: {e}")

            # Add a delay between species to be nice to the API
            time.sleep(3)
        else:
            print(f"Failed to query Xeno-Canto API for {species}: {response.status_code}")

# List of bird species to download
species_list = [
    "American Robin",
    "Northern Cardinal",
    "Blue Jay",
    "Barn Owl",
    "Red-tailed Hawk"
]

# Download data (10 recordings per species)
download_xeno_canto_data(species_list, "bird_audio_data", max_recordings_per_species=10)

Writing data_collector.py


In [71]:
# Install required packages
!pip install requests pandas tqdm

# Run the data collection script
!python data_collector.py

Downloading recordings for American Robin...
Found 10 recordings for American Robin
Downloading American_Robin_1.mp3...
Downloaded American_Robin_1.mp3
Downloading American_Robin_2.mp3...
Downloaded American_Robin_2.mp3
Downloading American_Robin_3.mp3...
Downloaded American_Robin_3.mp3
Downloading American_Robin_4.mp3...
Downloaded American_Robin_4.mp3
Downloading American_Robin_5.mp3...
Downloaded American_Robin_5.mp3
Downloading American_Robin_6.mp3...
Downloaded American_Robin_6.mp3
Downloading American_Robin_7.mp3...
Downloaded American_Robin_7.mp3
Downloading American_Robin_8.mp3...
Downloaded American_Robin_8.mp3
Downloading American_Robin_9.mp3...
Downloaded American_Robin_9.mp3
Downloading American_Robin_10.mp3...
Downloaded American_Robin_10.mp3
Downloading recordings for Northern Cardinal...
Found 10 recordings for Northern Cardinal
Downloading Northern_Cardinal_1.mp3...
Downloaded Northern_Cardinal_1.mp3
Downloading Northern_Cardinal_2.mp3...
Downloaded Northern_Cardinal_2.

In [72]:
# Create a directory for bird images
!mkdir -p BirdAudioProject/static/images

# Download bird images using wget
!wget -O BirdAudioProject/static/images/american_robin.jpg "https://upload.wikimedia.org/wikipedia/commons/b/b8/Turdus-migratorius-002.jpg"
!wget -O BirdAudioProject/static/images/northern_cardinal.jpg "https://upload.wikimedia.org/wikipedia/commons/d/da/Cardinal.jpg"
!wget -O BirdAudioProject/static/images/blue_jay.jpg "https://tse2.mm.bing.net/th?id=OIP.LW7owKtD8WTiQPJjH-R1mwHaE7&pid=Api&P=0&h=180.jpg"
!wget -O BirdAudioProject/static/images/barn_owl.jpg "https://tse2.mm.bing.net/th?id=OIP.d5OM94ifQJ3NPa54gJYuSAHaEo&pid=Api&P=0&h=180.jpg"
!wget -O BirdAudioProject/static/images/red_tailed_hawk.jpg "https://upload.wikimedia.org/wikipedia/commons/5/51/Buteo_jamaicensis_-John_Heinz_National_Wildlife_Refuge_at_Tinicum%2C_Pennsylvania%2C_USA-8.jpg"
!wget -O BirdAudioProject/static/images/about-image.jpg "https://upload.wikimedia.org/wikipedia/commons/thumb/e/e7/Everglades_National_Park_Birds.jpg/1280px-Everglades_National_Park_Birds.jpg"

--2025-04-09 14:46:27--  https://upload.wikimedia.org/wikipedia/commons/b/b8/Turdus-migratorius-002.jpg
Resolving upload.wikimedia.org (upload.wikimedia.org)... 198.35.26.112, 2620:0:863:ed1a::2:b
Connecting to upload.wikimedia.org (upload.wikimedia.org)|198.35.26.112|:443... connected.
HTTP request sent, awaiting response... 200 OK
Length: 368966 (360K) [image/jpeg]
Saving to: ‘BirdAudioProject/static/images/american_robin.jpg’


2025-04-09 14:46:27 (4.43 MB/s) - ‘BirdAudioProject/static/images/american_robin.jpg’ saved [368966/368966]

--2025-04-09 14:46:27--  https://upload.wikimedia.org/wikipedia/commons/d/da/Cardinal.jpg
Resolving upload.wikimedia.org (upload.wikimedia.org)... 198.35.26.112, 2620:0:863:ed1a::2:b
Connecting to upload.wikimedia.org (upload.wikimedia.org)|198.35.26.112|:443... connected.
HTTP request sent, awaiting response... 200 OK
Length: 59435 (58K) [image/jpeg]
Saving to: ‘BirdAudioProject/static/images/northern_cardinal.jpg’


2025-04-09 14:46:28 (1.78 MB/s) - 

In [73]:
# Create the data directory structure
!mkdir -p BirdAudioProject/data/bird_audio
!mkdir -p BirdAudioProject/models

# Move the downloaded data to the project structure
!cp -r bird_audio_data/* BirdAudioProject/data/bird_audio/

# Split data into training and testing sets (80% training, 20% testing)
!mkdir -p BirdAudioProject/data/train_bird_audio
!mkdir -p BirdAudioProject/data/test_bird_audio

In [74]:

# Run this script to split the data
%%writefile split_data.py
import os
import shutil
import random

def split_data(source_dir, train_dir, test_dir, test_ratio=0.2):
    """
    Split data into training and testing sets.

    Args:
        source_dir (str): Source directory containing species folders
        train_dir (str): Directory to save training data
        test_dir (str): Directory to save testing data
        test_ratio (float): Ratio of data to use for testing
    """
    # Create output directories
    os.makedirs(train_dir, exist_ok=True)
    os.makedirs(test_dir, exist_ok=True)

    # Get species directories
    species_dirs = [d for d in os.listdir(source_dir) if os.path.isdir(os.path.join(source_dir, d))]

    for species in species_dirs:
        # Create species directories in train and test
        train_species_dir = os.path.join(train_dir, species)
        test_species_dir = os.path.join(test_dir, species)
        os.makedirs(train_species_dir, exist_ok=True)
        os.makedirs(test_species_dir, exist_ok=True)

        # Get audio files
        species_dir = os.path.join(source_dir, species)
        audio_files = [f for f in os.listdir(species_dir) if f.endswith(('.wav', '.mp3', '.ogg'))]

        # Shuffle files
        random.shuffle(audio_files)

        # Calculate split
        test_size = max(1, int(len(audio_files) * test_ratio))
        test_files = audio_files[:test_size]
        train_files = audio_files[test_size:]

        # Copy files
        for file in train_files:
            shutil.copy(os.path.join(species_dir, file), os.path.join(train_species_dir, file))

        for file in test_files:
            shutil.copy(os.path.join(species_dir, file), os.path.join(test_species_dir, file))

        print(f"{species}: {len(train_files)} training files, {len(test_files)} testing files")

# Split the data
split_data("BirdAudioProject/data/bird_audio",
           "BirdAudioProject/data/train_bird_audio",
           "BirdAudioProject/data/test_bird_audio")



Overwriting split_data.py


In [75]:
# Third cell: Run the script
!python split_data.py

INFO:werkzeug: * Detected change in '/content/BirdAudioProject/BirdAudioProject/split_data.py', reloading
Red-tailed_Hawk: 8 training files, 2 testing files
INFO:werkzeug: * Restarting with stat
2025-04-09 14:46:32.410173: E external/local_xla/xla/stream_executor/cuda/cuda_fft.cc:477] Unable to register cuFFT factory: Attempting to register factory for plugin cuFFT when one has already been registered
E0000 00:00:1744209992.435741   12525 cuda_dnn.cc:8310] Unable to register cuDNN factory: Attempting to register factory for plugin cuDNN when one has already been registered
E0000 00:00:1744209992.443579   12525 cuda_blas.cc:1418] Unable to register cuBLAS factory: Attempting to register factory for plugin cuBLAS when one has already been registered
Northern_Cardinal: 8 training files, 2 testing files
Barn_Owl: 8 training files, 2 testing files
Blue_Jay: 8 training files, 2 testing files
American_Robin: 8 training files, 2 testing files


In [76]:
# Create audio_processor.py
%%writefile BirdAudioProject/audio_processor.py

import os
import numpy as np
import librosa
import matplotlib.pyplot as plt

class AudioProcessor:
    def __init__(self, sample_rate=22050, n_mfcc=13, n_fft=2048, hop_length=512):
        """
        Initialize the AudioProcessor with audio processing parameters.

        Args:
            sample_rate (int): Sample rate for audio processing
            n_mfcc (int): Number of MFCC features to extract
            n_fft (int): FFT window size
            hop_length (int): Hop length for FFT
        """
        self.sample_rate = sample_rate
        self.n_mfcc = n_mfcc
        self.n_fft = n_fft
        self.hop_length = hop_length

    def load_audio(self, file_path):
        """
        Load audio file and return the signal and sample rate.

        Args:
            file_path (str): Path to the audio file

        Returns:
            tuple: (audio_signal, sample_rate)
        """
        try:
            signal, sr = librosa.load(file_path, sr=self.sample_rate)
            return signal, sr
        except Exception as e:
            print(f"Error loading audio file: {e}")
            return None, None

    def extract_features(self, signal, feature_type='mfcc'):
        """
        Extract audio features from the signal.

        Args:
            signal (numpy.ndarray): Audio signal
            feature_type (str): Type of feature to extract ('mfcc', 'mel_spectrogram', etc.)

        Returns:
            numpy.ndarray: Extracted features
        """
        if signal is None:
            return None

        if feature_type == 'mfcc':
            features = librosa.feature.mfcc(
                y=signal,
                sr=self.sample_rate,
                n_mfcc=self.n_mfcc,
                n_fft=self.n_fft,
                hop_length=self.hop_length
            )
            return features

        elif feature_type == 'mel_spectrogram':
            features = librosa.feature.melspectrogram(
                y=signal,
                sr=self.sample_rate,
                n_fft=self.n_fft,
                hop_length=self.hop_length
            )
            return features

        else:
            print(f"Feature type '{feature_type}' not supported")
            return None

    def normalize_features(self, features):
        """
        Normalize features to have zero mean and unit variance.

        Args:
            features (numpy.ndarray): Features to normalize

        Returns:
            numpy.ndarray: Normalized features
        """
        if features is None:
            return None

        mean = np.mean(features, axis=1, keepdims=True)
        std = np.std(features, axis=1, keepdims=True)
        normalized_features = (features - mean) / (std + 1e-10)
        return normalized_features

    def plot_features(self, features, feature_type='mfcc', save_path=None):
        """
        Plot extracted features.

        Args:
            features (numpy.ndarray): Features to plot
            feature_type (str): Type of feature ('mfcc', 'mel_spectrogram', etc.)
            save_path (str, optional): Path to save the plot

        Returns:
            matplotlib.figure.Figure: Figure object
        """
        if features is None:
            return None

        plt.figure(figsize=(10, 4))

        if feature_type == 'mfcc':
            librosa.display.specshow(
                features,
                x_axis='time',
                sr=self.sample_rate,
                hop_length=self.hop_length
            )
            plt.colorbar(format='%+2.0f dB')
            plt.title('MFCC')

        elif feature_type == 'mel_spectrogram':
            librosa.display.specshow(
                librosa.power_to_db(features, ref=np.max),
                y_axis='mel',
                x_axis='time',
                sr=self.sample_rate,
                hop_length=self.hop_length
            )
            plt.colorbar(format='%+2.0f dB')
            plt.title('Mel spectrogram')

        if save_path:
            plt.savefig(save_path)

        return plt.gcf()

    def preprocess_for_model(self, file_path, fixed_length=100):
        """
        Preprocess audio file for model input.

        Args:
            file_path (str): Path to the audio file
            fixed_length (int): Fixed length for feature padding/truncation

        Returns:
            numpy.ndarray: Preprocessed features ready for model input
        """
        signal, sr = self.load_audio(file_path)
        if signal is None:
            return None

        # Extract MFCC features
        features = self.extract_features(signal, feature_type='mfcc')
        if features is None:
            return None

        # Normalize features
        normalized_features = self.normalize_features(features)

        # Pad or truncate to fixed length
        if normalized_features.shape[1] < fixed_length:
            pad_width = fixed_length - normalized_features.shape[1]
            normalized_features = np.pad(
                normalized_features,
                pad_width=((0, 0), (0, pad_width)),
                mode='constant'
            )
        else:
            normalized_features = normalized_features[:, :fixed_length]

        # Reshape for model input (assuming CNN model)
        normalized_features = normalized_features.reshape(1, normalized_features.shape[0], normalized_features.shape[1], 1)

        return normalized_features

Overwriting BirdAudioProject/audio_processor.py


In [77]:
# Create model_trainer.py
%%writefile BirdAudioProject/model_trainer.py

import os
import numpy as np
import tensorflow as tf
from tensorflow.keras.models import Sequential
from tensorflow.keras.layers import Conv2D, MaxPooling2D, Dense, Dropout, GlobalAveragePooling2D, BatchNormalization
from tensorflow.keras.utils import to_categorical
from tensorflow.keras.callbacks import EarlyStopping, ModelCheckpoint
from sklearn.model_selection import train_test_split
from sklearn.preprocessing import LabelEncoder
import matplotlib.pyplot as plt
import pickle

class ModelTrainer:
    def __init__(self, input_shape=(128, 128, 1)):
        """
        Initialize ModelTrainer.
        Args:
            input_shape (tuple): Shape of input features (Height, Width, Channels)
        """
        self.input_shape = input_shape
        self.model = None
        self.history = None
        self.label_encoder = LabelEncoder()

    def build_model(self, num_classes):
        """
        Build and compile the CNN model.
        """
        model = Sequential([
            Conv2D(32, (3, 3), activation='relu', padding='same', input_shape=self.input_shape),
            BatchNormalization(),
            MaxPooling2D((2, 2)),

            Conv2D(64, (3, 3), activation='relu', padding='same'),
            BatchNormalization(),
            MaxPooling2D((2, 2)),

            Conv2D(128, (3, 3), activation='relu', padding='same'),
            BatchNormalization(),
            MaxPooling2D((2, 2)),

            GlobalAveragePooling2D(),
            Dense(128, activation='relu'),
            Dropout(0.4),
            Dense(num_classes, activation='softmax')
        ])

        model.compile(
            optimizer='adam',
            loss='categorical_crossentropy',
            metrics=['accuracy']
        )

        self.model = model
        return model

    def train(self, X, y, epochs=50, batch_size=32, validation_split=0.2, save_best_model_path=None):
        """
        Train the model with early stopping and optional model saving.
        """
        if self.model is None:
            raise ValueError("Model not built. Call build_model first.")

        # Encode labels
        encoded_y = self.label_encoder.fit_transform(y)
        categorical_y = to_categorical(encoded_y)

        # Split
        X_train, X_val, y_train, y_val = train_test_split(
            X, categorical_y, test_size=validation_split, random_state=42
        )

        # Callbacks
        callbacks = [
            EarlyStopping(patience=8, restore_best_weights=True, monitor='val_loss')
        ]
        if save_best_model_path:
            callbacks.append(ModelCheckpoint(save_best_model_path, save_best_only=True, monitor='val_loss'))

        # Train
        history = self.model.fit(
            X_train, y_train,
            epochs=epochs,
            batch_size=batch_size,
            validation_data=(X_val, y_val),
            callbacks=callbacks,
            verbose=1
        )

        self.history = history
        return history

    def evaluate(self, X_test, y_test):
        """
        Evaluate the model on test data.
        """
        if self.model is None:
            raise ValueError("Model not built or trained.")

        encoded_y = self.label_encoder.transform(y_test)
        categorical_y = to_categorical(encoded_y)

        loss, accuracy = self.model.evaluate(X_test, categorical_y, verbose=0)
        print(f"Test Loss: {loss:.4f}")
        print(f"Test Accuracy: {accuracy:.4f}")
        return loss, accuracy

    def predict(self, X):
        """
        Predict class labels and probabilities.
        """
        if self.model is None:
            raise ValueError("Model not trained.")

        probabilities = self.model.predict(X)
        predicted_indices = np.argmax(probabilities, axis=1)
        predicted_labels = self.label_encoder.inverse_transform(predicted_indices)
        return predicted_labels, probabilities

    def save_model(self, model_path, encoder_path=None):
        """
        Save model and label encoder.
        """
        if self.model is None:
            raise ValueError("Model not trained.")

        self.model.save(model_path)

        if encoder_path:
            with open(encoder_path, 'wb') as f:
                pickle.dump(self.label_encoder, f)

    def load_model(self, model_path, encoder_path=None):
        """
        Load saved model and label encoder.
        """
        self.model = tf.keras.models.load_model(model_path)

        if encoder_path and os.path.exists(encoder_path):
            with open(encoder_path, 'rb') as f:
                self.label_encoder = pickle.load(f)

    def plot_training_history(self, save_path=None):
        """
        Plot training history.
        """
        if self.history is None:
            raise ValueError("Model not trained yet.")

        fig, (ax1, ax2) = plt.subplots(1, 2, figsize=(15, 5))

        ax1.plot(self.history.history['accuracy'])
        ax1.plot(self.history.history['val_accuracy'])
        ax1.set_title('Accuracy')
        ax1.set_xlabel('Epoch')
        ax1.set_ylabel('Accuracy')
        ax1.legend(['Train', 'Val'])

        ax2.plot(self.history.history['loss'])
        ax2.plot(self.history.history['val_loss'])
        ax2.set_title('Loss')
        ax2.set_xlabel('Epoch')
        ax2.set_ylabel('Loss')
        ax2.legend(['Train', 'Val'])

        plt.tight_layout()
        if save_path:
            plt.savefig(save_path)
        return fig


Writing BirdAudioProject/model_trainer.py


In [78]:
# Create train_model.py
%%writefile BirdAudioProject/train_model.py

import os
import numpy as np
import pandas as pd
from audio_processor import AudioProcessor
from model_trainer import ModelTrainer

def train_model_with_data(data_dir, output_dir):
    """
    Train a bird species identification model with audio data.

    Args:
        data_dir (str): Directory containing audio files organized by species
        output_dir (str): Directory to save model and results
    """
    # Create output directory if it doesn't exist
    os.makedirs(output_dir, exist_ok=True)

    # Initialize audio processor
    processor = AudioProcessor(sample_rate=22050, n_mfcc=13)

    # Lists to store features and labels
    features_list = []
    labels_list = []

    # Process each species directory
    species_dirs = [d for d in os.listdir(data_dir) if os.path.isdir(os.path.join(data_dir, d))]

    print(f"Found {len(species_dirs)} species directories")

    for species in species_dirs:
        species_dir = os.path.join(data_dir, species)
        print(f"Processing {species}...")

        # Process each audio file in the species directory
        audio_files = [f for f in os.listdir(species_dir) if f.endswith(('.wav', '.mp3', '.ogg'))]

        for audio_file in audio_files:
            file_path = os.path.join(species_dir, audio_file)

            # Load and preprocess audio
            signal, sr = processor.load_audio(file_path)
            if signal is None:
                continue

            # Extract features
            features = processor.extract_features(signal, feature_type='mfcc')
            if features is None:
                continue

            # Normalize features
            normalized_features = processor.normalize_features(features)

            # Ensure consistent feature length (e.g., 100 time steps)
            fixed_length = 100
            if normalized_features.shape[1] < fixed_length:
                pad_width = fixed_length - normalized_features.shape[1]
                normalized_features = np.pad(
                    normalized_features,
                    pad_width=((0, 0), (0, pad_width)),
                    mode='constant'
                )
            else:
                normalized_features = normalized_features[:, :fixed_length]

            # Add to lists
            features_list.append(normalized_features)
            labels_list.append(species)

    # Convert lists to arrays
    X = np.array(features_list)
    y = np.array(labels_list)

    # Reshape features for CNN input: (samples, height, width, channels)
    X = X.reshape(X.shape[0], X.shape[1], X.shape[2], 1)

    print(f"Feature shape: {X.shape}")
    print(f"Number of samples: {len(y)}")
    print(f"Unique species: {np.unique(y)}")

    # Initialize and build model
    input_shape = (X.shape[1], X.shape[2], 1)
    trainer = ModelTrainer(input_shape=input_shape)

    num_classes = len(np.unique(y))
    model = trainer.build_model(num_classes)

    # Train model
    history = trainer.train(X, y, epochs=50, batch_size=32, validation_split=0.2)

    # Plot and save training history
    trainer.plot_training_history(save_path=os.path.join(output_dir, 'training_history.png'))

    # Save model and encoder
    model_path = os.path.join(output_dir, 'bird_species_model.h5')
    encoder_path = os.path.join(output_dir, 'label_encoder.pkl')
    trainer.save_model(model_path, encoder_path)

    print(f"Model saved to {model_path}")
    print(f"Label encoder saved to {encoder_path}")

if __name__ == "__main__":
    # Example usage
    data_dir = "path/to/bird_audio_data"
    output_dir = "path/to/output"
    train_model_with_data(data_dir, output_dir)

Writing BirdAudioProject/train_model.py


In [79]:
# Create app.py
%%writefile BirdAudioProject/app.py

import os
import json
import numpy as np
from flask import Flask, render_template, request, jsonify
import tensorflow as tf
from audio_processor import AudioProcessor
import pickle
import librosa
import matplotlib.pyplot as plt
import io
import base64
from werkzeug.utils import secure_filename

app = Flask(__name__)

# Load model and label encoder
MODEL_PATH = 'models/bird_species_model.h5'
ENCODER_PATH = 'models/label_encoder.pkl'
UPLOAD_FOLDER = 'static/uploads'
ALLOWED_EXTENSIONS = {'wav', 'mp3', 'ogg'}

os.makedirs(UPLOAD_FOLDER, exist_ok=True)

# Initialize audio processor
processor = AudioProcessor()

# Load bird information
def load_bird_info():
    try:
        with open('static/data/bird_info.json', 'r') as f:
            return json.load(f)
    except Exception as e:
        print(f"Error loading bird info: {e}")
        return {}

bird_info = load_bird_info()

# Load model and encoder if they exist
model = None
label_encoder = None

def load_model_and_encoder():
    global model, label_encoder
    try:
        if os.path.exists(MODEL_PATH):
            model = tf.keras.models.load_model(MODEL_PATH)
            print("Model loaded successfully")
        else:
            print(f"Model file not found at {MODEL_PATH}")

        if os.path.exists(ENCODER_PATH):
            with open(ENCODER_PATH, 'rb') as f:
                label_encoder = pickle.load(f)
            print("Label encoder loaded successfully")
        else:
            print(f"Label encoder file not found at {ENCODER_PATH}")
    except Exception as e:
        print(f"Error loading model or encoder: {e}")

# Try to load model and encoder at startup
try:
    load_model_and_encoder()
except Exception as e:
    print(f"Could not load model at startup: {e}")

def allowed_file(filename):
    return '.' in filename and filename.rsplit('.', 1)[1].lower() in ALLOWED_EXTENSIONS

@app.route('/')
def index():
    return render_template('index.html')

@app.route('/about')
def about():
    return render_template('about.html')

@app.route('/identify', methods=['POST'])
def identify_bird():
    if 'audio_file' not in request.files:
        return jsonify({'error': 'No file part'}), 400

    file = request.files['audio_file']

    if file.filename == '':
        return jsonify({'error': 'No selected file'}), 400

    if file and allowed_file(file.filename):
        # Save the uploaded file
        filename = secure_filename(file.filename)
        file_path = os.path.join(UPLOAD_FOLDER, filename)
        file.save(file_path)

        # Check if model is loaded
        if model is None or label_encoder is None:
            try:
                load_model_and_encoder()
                if model is None or label_encoder is None:
                    return jsonify({'error': 'Model not available'}), 500
            except Exception as e:
                return jsonify({'error': f'Error loading model: {str(e)}'}), 500

        try:
            # Process audio file
            features = processor.preprocess_for_model(file_path)

            if features is None:
                return jsonify({'error': 'Failed to process audio file'}), 400

            # Make prediction
            probabilities = model.predict(features)[0]
            predicted_index = np.argmax(probabilities)
            predicted_species = label_encoder.inverse_transform([predicted_index])[0]
            confidence = float(probabilities[predicted_index])

            # Generate spectrogram for visualization
            y, sr = librosa.load(file_path)
            plt.figure(figsize=(10, 4))
            S = librosa.feature.melspectrogram(y=y, sr=sr)
            librosa.display.specshow(librosa.power_to_db(S, ref=np.max), y_axis='mel', x_axis='time')
            plt.colorbar(format='%+2.0f dB')
            plt.title('Mel spectrogram')

            # Save plot to a bytes buffer
            buf = io.BytesIO()
            plt.savefig(buf, format='png')
            buf.seek(0)
            plt.close()

            # Convert to base64 for embedding in HTML
            spectrogram_b64 = base64.b64encode(buf.read()).decode('utf-8')

            # Get bird info
            bird_data = bird_info.get(predicted_species, {
                'common_name': predicted_species,
                'scientific_name': 'Unknown',
                'description': 'No information available for this species.',
                'image': '/static/images/placeholder.jpg'
            })

            # Return results
            return jsonify({
                'species': predicted_species,
                'common_name': bird_data['common_name'],
                'scientific_name': bird_data['scientific_name'],
                'description': bird_data['description'],
                'image': bird_data['image'],
                'confidence': confidence,
                'spectrogram': spectrogram_b64
            })

        except Exception as e:
            return jsonify({'error': f'Error during identification: {str(e)}'}), 500

    return jsonify({'error': 'Invalid file type'}), 400

@app.route('/train', methods=['POST'])
def train_model_endpoint():
    # This would be a more complex endpoint for training the model
    # In a real application, this might trigger a background job
    return jsonify({'message': 'Training functionality not implemented in this demo'})

if __name__ == '__main__':
    app.run(debug=True)

Overwriting BirdAudioProject/app.py


In [80]:
# Create layout.html
%%writefile BirdAudioProject/templates/layout.html

<!DOCTYPE html>
<html lang="en">
<head>
    <meta charset="UTF-8">
    <meta name="viewport" content="width=device-width, initial-scale=1.0">
    <title>Bird Species Identifier</title>
    <link rel="stylesheet" href="{{ url_for('static', filename='css/style.css') }}">
    <link href="https://fonts.googleapis.com/css2?family=Roboto:wght@300;400;500;700&display=swap" rel="stylesheet">
</head>
<body>
    <header>
        <div class="container">
            <h1>Bird Species Identifier</h1>
            <nav>
                <ul>
                    <li><a href="{{ url_for('index') }}">Home</a></li>
                    <li><a href="{{ url_for('about') }}">About</a></li>
                </ul>
            </nav>
        </div>
    </header>

    <main>
        <div class="container">
            {% block content %}{% endblock %}
        </div>
    </main>

    <footer>
        <div class="container">
            <p>&copy; <span class="current-year"></span> Bird Species Identifier Project</p>
        </div>
    </footer>

    <script src="{{ url_for('static', filename='js/main.js') }}"></script>
    {% block scripts %}{% endblock %}
</body>
</html>

Writing BirdAudioProject/templates/layout.html


In [81]:
# Create index.html
%%writefile BirdAudioProject/templates/index.html

{% extends "layout.html" %}

{% block content %}
<section class="hero">
    <h2>Identify Bird Species from Audio</h2>
    <p>Upload a bird call or song recording to identify the species using our machine learning model.</p>
</section>

<section class="upload-section">
    <div class="upload-container">
        <h3>Upload Audio File</h3>
        <form id="upload-form" enctype="multipart/form-data">
            <div class="file-input">
                <input type="file" id="audio-file" name="audio_file" accept=".wav,.mp3,.ogg">
                <label for="audio-file">Choose a file</label>
                <span id="file-name">No file chosen</span>
            </div>
            <div class="audio-controls">
                <audio id="audio-player" controls style="display: none;"></audio>
            </div>
            <button type="submit" id="identify-btn">Identify Bird</button>
        </form>
        <div id="loading" style="display: none;">
            <div class="spinner"></div>
            <p>Analyzing audio...</p>
        </div>
    </div>
</section>

<section class="results-section" id="results-section" style="display: none;">
    <h3>Identification Results</h3>
    <div class="results-container">
        <div class="bird-info">
            <div class="bird-image">
                <img id="bird-image" src="/placeholder.svg" alt="Bird Image">
            </div>
            <div class="bird-details">
                <h4 id="bird-name"></h4>
                <p id="scientific-name"></p>
                <div class="confidence-meter">
                    <span>Confidence:</span>
                    <div class="meter">
                        <div id="confidence-bar"></div>
                    </div>
                    <span id="confidence-value"></span>
                </div>
                <p id="bird-description"></p>
            </div>
        </div>
        <div class="audio-analysis">
            <h4>Audio Analysis</h4>
            <div class="spectrogram">
                <img id="spectrogram" src="/placeholder.svg" alt="Audio Spectrogram">
            </div>
        </div>
    </div>
</section>
{% endblock %}

{% block scripts %}
<script>
document.addEventListener('DOMContentLoaded', function() {
    const uploadForm = document.getElementById('upload-form');
    const audioFileInput = document.getElementById('audio-file');
    const fileNameSpan = document.getElementById('file-name');
    const audioPlayer = document.getElementById('audio-player');
    const loadingDiv = document.getElementById('loading');
    const resultsSection = document.getElementById('results-section');

    // Update file name display when file is selected
    audioFileInput.addEventListener('change', function() {
        if (this.files && this.files[0]) {
            const file = this.files[0];
            fileNameSpan.textContent = file.name;

            // Display audio player for preview
            audioPlayer.src = URL.createObjectURL(file);
            audioPlayer.style.display = 'block';
        } else {
            fileNameSpan.textContent = 'No file chosen';
            audioPlayer.style.display = 'none';
        }
    });

    // Handle form submission
    uploadForm.addEventListener('submit', function(e) {
        e.preventDefault();

        const formData = new FormData(uploadForm);

        if (!audioFileInput.files || audioFileInput.files.length === 0) {
            alert('Please select an audio file first.');
            return;
        }

        // Show loading spinner
        loadingDiv.style.display = 'flex';
        resultsSection.style.display = 'none';

        // Send request to server
        fetch('/identify', {
            method: 'POST',
            body: formData
        })
        .then(response => {
            if (!response.ok) {
                return response.json().then(data => {
                    throw new Error(data.error || 'Error processing request');
                });
            }
            return response.json();
        })
        .then(data => {
            // Hide loading spinner
            loadingDiv.style.display = 'none';

            // Display results
            document.getElementById('bird-name').textContent = data.common_name;
            document.getElementById('scientific-name').textContent = data.scientific_name;
            document.getElementById('bird-description').textContent = data.description;
            document.getElementById('bird-image').src = data.image;

            // Set confidence bar
            const confidencePercent = Math.round(data.confidence * 100);
            document.getElementById('confidence-bar').style.width = confidencePercent + '%';
            document.getElementById('confidence-value').textContent = confidencePercent + '%';

            // Set spectrogram
            document.getElementById('spectrogram').src = 'data:image/png;base64,' + data.spectrogram;

            // Show results section
            resultsSection.style.display = 'block';
        })
        .catch(error => {
            // Hide loading spinner
            loadingDiv.style.display = 'none';
            alert('Error: ' + error.message);
        });
    });
});
</script>
{% endblock %}

Writing BirdAudioProject/templates/index.html


In [82]:
# Create about.html
%%writefile BirdAudioProject/templates/about.html

{% extends "layout.html" %}

{% block content %}
<section class="about-section">
    <h2>About Bird Species Identifier</h2>
    <p>The Bird Species Identifier is a machine learning application that can identify bird species from audio recordings of their calls and songs.</p>

    <div class="about-content">
        <div class="about-text">
            <h3>How It Works</h3>
            <p>Our system uses a convolutional neural network (CNN) trained on a dataset of bird vocalizations. When you upload an audio file:</p>
            <ol>
                <li>The audio is processed to extract Mel-frequency cepstral coefficients (MFCCs)</li>
                <li>These features are fed into our trained neural network</li>
                <li>The model predicts the most likely bird species</li>
                <li>We display the results along with information about the identified species</li>
            </ol>

            <h3>Technologies Used</h3>
            <ul>
                <li>Python for backend processing and machine learning</li>
                <li>TensorFlow and Keras for the neural network model</li>
                <li>Librosa for audio processing</li>
                <li>Flask for the web application</li>
                <li>HTML, CSS, and JavaScript for the user interface</li>
            </ul>
        </div>

        <div class="about-image">
            <img src="{{ url_for('static', filename='images/about-image.jpg') }}" alt="Birds in natural habitat">
        </div>
    </div>

    <div class="dataset-info">
        <h3>Dataset Information</h3>
        <p>Our model was trained on a diverse dataset of bird vocalizations, including various species from different habitats and regions. The audio samples were collected from various sources including:</p>
        <ul>
            <li>Xeno-canto - A community database of bird sounds from around the world</li>
            <li>Cornell Lab of Ornithology's Macaulay Library</li>
            <li>Field recordings by ornithologists and bird enthusiasts</li>
        </ul>
    </div>
</section>
{% endblock %}

Writing BirdAudioProject/templates/about.html


In [83]:
# Create style.css
%%writefile BirdAudioProject/static/css/style.css

/* Base styles */
:root {
    --primary-color: #4CAF50;
    --secondary-color: #2E7D32;
    --accent-color: #8BC34A;
    --text-color: #333;
    --light-text: #fff;
    --background-color: #f5f5f5;
    --card-background: #fff;
    --border-color: #ddd;
    --error-color: #f44336;
    --success-color: #4CAF50;
}

* {
    margin: 0;
    padding: 0;
    box-sizing: border-box;
}

body {
    font-family: 'Roboto', sans-serif;
    line-height: 1.6;
    color: var(--text-color);
    background-color: var(--background-color);
}

.container {
    width: 90%;
    max-width: 1200px;
    margin: 0 auto;
    padding: 0 15px;
}

/* Header styles */
header {
    background-color: var(--primary-color);
    color: var(--light-text);
    padding: 1rem 0;
    box-shadow: 0 2px 5px rgba(0, 0, 0, 0.1);
}

header .container {
    display: flex;
    justify-content: space-between;
    align-items: center;
}

header h1 {
    font-size: 1.8rem;
    font-weight: 500;
}

nav ul {
    display: flex;
    list-style: none;
}

nav ul li {
    margin-left: 1.5rem;
}

nav ul li a {
    color: var(--light-text);
    text-decoration: none;
    font-weight: 500;
    transition: color 0.3s;
}

nav ul li a:hover {
    color: var(--accent-color);
}

/* Main content styles */
main {
    padding: 2rem 0;
    min-height: calc(100vh - 130px);
}

/* Hero section */
.hero {
    text-align: center;
    padding: 2rem 0;
    margin-bottom: 2rem;
}

.hero h2 {
    font-size: 2.5rem;
    margin-bottom: 1rem;
    color: var(--secondary-color);
}

.hero p {
    font-size: 1.2rem;
    max-width: 800px;
    margin: 0 auto;
    color: #666;
}

/* Upload section */
.upload-section {
    margin-bottom: 3rem;
}

.upload-container {
    background-color: var(--card-background);
    border-radius: 8px;
    padding: 2rem;
    box-shadow: 0 2px 10px rgba(0, 0, 0, 0.1);
    max-width: 600px;
    margin: 0 auto;
}

.upload-container h3 {
    margin-bottom: 1.5rem;
    color: var(--secondary-color);
    text-align: center;
}

.file-input {
    margin-bottom: 1.5rem;
    display: flex;
    flex-direction: column;
}

.file-input input[type="file"] {
    display: none;
}

.file-input label {
    background-color: var(--primary-color);
    color: white;
    padding: 10px 20px;
    border-radius: 4px;
    cursor: pointer;
    text-align: center;
    margin-bottom: 10px;
    transition: background-color 0.3s;
}

.file-input label:hover {
    background-color: var(--secondary-color);
}

#file-name {
    font-size: 0.9rem;
    color: #666;
    text-align: center;
}

.audio-controls {
    margin-bottom: 1.5rem;
}

.audio-controls audio {
    width: 100%;
}

button {
    background-color: var(--primary-color);
    color: white;
    border: none;
    padding: 12px 24px;
    border-radius: 4px;
    cursor: pointer;
    font-size: 1rem;
    font-weight: 500;
    width: 100%;
    transition: background-color 0.3s;
}

button:hover {
    background-color: var(--secondary-color);
}

/* Loading spinner */
#loading {
    display: flex;
    flex-direction: column;
    align-items: center;
    justify-content: center;
    margin-top: 1rem;
}

.spinner {
    border: 4px solid rgba(0, 0, 0, 0.1);
    border-radius: 50%;
    border-top: 4px solid var(--primary-color);
    width: 40px;
    height: 40px;
    animation: spin 1s linear infinite;
    margin-bottom: 1rem;
}

@keyframes spin {
    0% { transform: rotate(0deg); }
    100% { transform: rotate(360deg); }
}

/* Results section */
.results-section {
    margin-top: 3rem;
}

.results-section h3 {
    text-align: center;
    margin-bottom: 1.5rem;
    color: var(--secondary-color);
}

.results-container {
    background-color: var(--card-background);
    border-radius: 8px;
    padding: 2rem;
    box-shadow: 0 2px 10px rgba(0, 0, 0, 0.1);
}

.bird-info {
    display: flex;
    flex-wrap: wrap;
    margin-bottom: 2rem;
    gap: 2rem;
}

.bird-image {
    flex: 1;
    min-width: 250px;
}

.bird-image img {
    width: 100%;
    border-radius: 8px;
    box-shadow: 0 2px 5px rgba(0, 0, 0, 0.1);
}

.bird-details {
    flex: 2;
    min-width: 300px;
}

.bird-details h4 {
    font-size: 1.8rem;
    margin-bottom: 0.5rem;
    color: var(--secondary-color);
}

#scientific-name {
    font-style: italic;
    color: #666;
    margin-bottom: 1rem;
}

.confidence-meter {
    margin: 1.5rem 0;
}

.meter {
    height: 20px;
    background-color: #e0e0e0;
    border-radius: 10px;
    margin: 10px 0;
    overflow: hidden;
}

#confidence-bar {
    height: 100%;
    background-color: var(--primary-color);
    width: 0%;
    transition: width 1s ease-in-out;
}

#confidence-value {
    font-weight: bold;
}

.audio-analysis {
    margin-top: 2rem;
}

.audio-analysis h4 {
    margin-bottom: 1rem;
    color: var(--secondary-color);
}

.spectrogram {
    text-align: center;
}

.spectrogram img {
    max-width: 100%;
    border-radius: 8px;
    box-shadow: 0 2px 5px rgba(0, 0, 0, 0.1);
}

/* About page styles */
.about-section {
    max-width: 900px;
    margin: 0 auto;
}

.about-section h2 {
    text-align: center;
    margin-bottom: 1.5rem;
    color: var(--secondary-color);
}

.about-content {
    display: flex;
    flex-wrap: wrap;
    gap: 2rem;
    margin: 2rem 0;
}

.about-text {
    flex: 3;
    min-width: 300px;
}

.about-image {
    flex: 2;
    min-width: 250px;
}

.about-image img {
    width: 100%;
    border-radius: 8px;
    box-shadow: 0 2px 5px rgba(0, 0, 0, 0.1);
}

.about-section h3 {
    color: var(--secondary-color);
    margin: 1.5rem 0 1rem;
}

.about-section ul, .about-section ol {
    margin-left: 1.5rem;
    margin-bottom: 1.5rem;
}

.dataset-info {
    background-color: var(--card-background);
    border-radius: 8px;
    padding: 1.5rem;
    box-shadow: 0 2px 5px rgba(0, 0, 0, 0.1);
    margin-top: 2rem;
}

/* Footer styles */
footer {
    background-color: var(--primary-color);
    color: var(--light-text);
    padding: 1rem 0;
    text-align: center;
}

/* Responsive styles */
@media (max-width: 768px) {
    header h1 {
        font-size: 1.5rem;
    }

    .hero h2 {
        font-size: 2rem;
    }

    .bird-info {
        flex-direction: column;
    }

    .bird-image, .bird-details {
        width: 100%;
    }
}

Writing BirdAudioProject/static/css/style.css


In [84]:
# Create main.js
%%writefile BirdAudioProject/static/js/main.js

// Main JavaScript file for Bird Species Identifier

// Function to format date and time
function formatDateTime(date) {
  const options = {
      year: 'numeric',
      month: 'short',
      day: 'numeric',
      hour: '2-digit',
      minute: '2-digit'
  };
  return date.toLocaleDateString('en-US', options);
}

// Function to validate file size
function validateFileSize(file, maxSizeMB) {
  const maxSizeBytes = maxSizeMB * 1024 * 1024;
  if (file.size > maxSizeBytes) {
      return false;
  }
  return true;
}

// Add current year to footer
document.addEventListener('DOMContentLoaded', function() {
  // Replace any elements with the 'current-year' class with the current year
  const yearElements = document.querySelectorAll('.current-year');
  const currentYear = new Date().getFullYear();

  yearElements.forEach(element => {
      element.textContent = currentYear;
  });
});

Writing BirdAudioProject/static/js/main.js


In [85]:
# Create bird_info.json
%%writefile BirdAudioProject/static/data/bird_info.json

{
"American_Robin": {
  "common_name": "American Robin",
  "scientific_name": "Turdus migratorius",
  "description": "The American Robin is a migratory songbird of the true thrush genus and Turdidae, the wider thrush family. It is widely distributed throughout North America, wintering from southern Canada to central Mexico and along the Pacific Coast.",
  "image": "/static/images/american_robin.jpg"
},
"Northern_Cardinal": {
  "common_name": "Northern Cardinal",
  "scientific_name": "Cardinalis cardinalis",
  "description": "The Northern Cardinal is a bird in the genus Cardinalis. It is also known colloquially as the redbird, common cardinal, red cardinal, or just cardinal. It can be found in southeastern Canada, through the eastern United States from Maine to Minnesota to Texas, and south through Mexico, Belize, and Guatemala.",
  "image": "/static/images/northern_cardinal.jpg"
},
"Blue_Jay": {
  "common_name": "Blue Jay",
  "scientific_name": "Cyanocitta cristata",
  "description": "The Blue Jay is a passerine bird in the family Corvidae, native to eastern North America. It is resident through most of eastern and central United States and southern Canada, although western populations may be migratory.",
  "image": "/static/images/blue_jay.jpg"
},
"Barn_Owl": {
  "common_name": "Barn Owl",
  "scientific_name": "Tyto alba",
  "description": "The Barn Owl is the most widely distributed species of owl in the world and one of the most widespread of all species of birds. It is found almost everywhere in the world except for the polar and desert regions, Asia north of the Himalayas, most of Indonesia, and some Pacific islands.",
  "image": "/static/images/barn_owl.jpg"
},
"Red_Tailed_Hawk": {
  "common_name": "Red-tailed Hawk",
  "scientific_name": "Buteo jamaicensis",
  "description": "The Red-tailed Hawk is a bird of prey that breeds throughout most of North America, from the interior of Alaska and northern Canada to as far south as Panama and the West Indies. It is one of the most common members within the genus of Buteo in North America or worldwide.",
  "image": "/static/images/red_tailed_hawk.jpg"
}
}

Writing BirdAudioProject/static/data/bird_info.json


In [86]:
# Create data_acquisition.py
%%writefile BirdAudioProject/data_acquisition.py

import os
import requests
import pandas as pd
import numpy as np
import librosa
import soundfile as sf
from tqdm import tqdm

def download_xeno_canto_data(species_list, output_dir, max_recordings_per_species=10):
    """
    Download bird audio recordings from Xeno-Canto API.

    Args:
        species_list (list): List of bird species to download
        output_dir (str): Directory to save downloaded recordings
        max_recordings_per_species (int): Maximum number of recordings per species
    """
    base_url = "https://www.xeno-canto.org/api/2/recordings"

    # Create output directory if it doesn't exist
    os.makedirs(output_dir, exist_ok=True)

    for species in species_list:
        print(f"Downloading recordings for {species}...")

        # Create species directory
        species_dir = os.path.join(output_dir, species.replace(" ", "_"))
        os.makedirs(species_dir, exist_ok=True)

        # Query Xeno-Canto API
        query = f"?query={species}"
        response = requests.get(base_url + query)

        if response.status_code == 200:
            data = response.json()
            recordings = data.get('recordings', [])

            # Limit number of recordings
            recordings = recordings[:min(len(recordings), max_recordings_per_species)]

            print(f"Found {len(recordings)} recordings for {species}")

            # Download each recording
            for i, recording in enumerate(recordings):
                try:
                    # Get download link
                    file_url = recording.get('file')
                    if not file_url:
                        continue

                    # Extract file name and extension
                    file_name = f"{species.replace(' ', '_')}_{i+1}.mp3"
                    file_path = os.path.join(species_dir, file_name)

                    # Download file
                    print(f"Downloading {file_name}...")
                    audio_response = requests.get(file_url)

                    if audio_response.status_code == 200:
                        with open(file_path, 'wb') as f:
                            f.write(audio_response.content)
                        print(f"Downloaded {file_name}")
                    else:
                        print(f"Failed to download {file_name}")

                except Exception as e:
                    print(f"Error downloading recording: {e}")
        else:
            print(f"Failed to query Xeno-Canto API for {species}")

def generate_synthetic_data(output_dir, num_samples=5):
    """
    Generate synthetic bird call data for testing when real data is not available.

    Args:
        output_dir (str): Directory to save synthetic data
        num_samples (int): Number of samples per species
    """
    # Bird species to simulate
    species = ["American_Robin", "Northern_Cardinal", "Blue_Jay", "Barn_Owl", "Red_Tailed_Hawk"]

    # Create output directory if it doesn't exist
    os.makedirs(output_dir, exist_ok=True)

    # Parameters for synthetic audio
    sr = 22050  # Sample rate
    duration = 3  # Duration in seconds

    for bird in species:
        # Create species directory
        species_dir = os.path.join(output_dir, bird)
        os.makedirs(species_dir, exist_ok=True)

        print(f"Generating synthetic data for {bird}...")

        for i in range(num_samples):
            # Generate synthetic bird call
            # Each species has a different frequency range and pattern
            if bird == "American_Robin":
                # Robin-like chirps (higher frequency)
                t = np.linspace(0, duration, int(sr * duration), endpoint=False)
                chirp_rate = 4  # chirps per second
                chirp_duration = 0.1
                signal = np.zeros_like(t)

                for j in range(int(duration * chirp_rate)):
                    chirp_start = j / chirp_rate
                    chirp_mask = (t >= chirp_start) & (t < chirp_start + chirp_duration)
                    chirp = 0.5 * np.sin(2 * np.pi * 2000 * t[chirp_mask])
                    signal[chirp_mask] = chirp

            elif bird == "Northern_Cardinal":
                # Cardinal-like whistles
                t = np.linspace(0, duration, int(sr * duration), endpoint=False)
                signal = 0.5 * np.sin(2 * np.pi * 1500 * t) * np.exp(-0.5 * ((t - 1.5) / 0.5) ** 2)

            elif bird == "Blue_Jay":
                # Jay-like harsh calls
                t = np.linspace(0, duration, int(sr * duration), endpoint=False)
                signal = 0.3 * np.sin(2 * np.pi * 1200 * t) + 0.2 * np.sin(2 * np.pi * 2400 * t)
                signal *= (np.sin(2 * np.pi * 2 * t) > 0).astype(float)  # Add amplitude modulation

            elif bird == "Barn_Owl":
                # Owl-like screeches
                t = np.linspace(0, duration, int(sr * duration), endpoint=False)
                signal = 0.4 * np.sin(2 * np.pi * 800 * t) * np.exp(-0.5 * ((t - 1.5) / 1.0) ** 2)
                # Add some noise
                signal += 0.1 * np.random.normal(0, 1, len(t))

            else:  # Red_Tailed_Hawk
                # Hawk-like screams
                t = np.linspace(0, duration, int(sr * duration), endpoint=False)
                signal = 0.5 * np.sin(2 * np.pi * 600 * t) * np.exp(-0.5 * ((t - 1.5) / 0.8) ** 2)
                # Add harmonics
                signal += 0.3 * np.sin(2 * np.pi * 1200 * t) * np.exp(-0.5 * ((t - 1.5) / 0.8) ** 2)

            # Add some background noise
            noise = 0.05 * np.random.normal(0, 1, len(signal))
            signal = signal + noise

            # Normalize
            signal = signal / np.max(np.abs(signal))

            # Save as WAV file
            file_path = os.path.join(species_dir, f"{bird}_{i+1}.wav")
            sf.write(file_path, signal, sr)

            print(f"Generated {file_path}")

if __name__ == "__main__":
    # Example usage
    species_list = [
        "American Robin",
        "Northern Cardinal",
        "Blue Jay",
        "Barn Owl",
        "Red-tailed Hawk"
    ]

    # Uncomment to download real data from Xeno-Canto
    # download_xeno_canto_data(species_list, "data/bird_audio")

    # Generate synthetic data for testing
    generate_synthetic_data("data/synthetic_bird_audio")

Writing BirdAudioProject/data_acquisition.py


In [87]:
# Create test_model.py
%%writefile BirdAudioProject/test_model.py

import os
import numpy as np
import tensorflow as tf
import matplotlib.pyplot as plt
from audio_processor import AudioProcessor
import pickle
from sklearn.metrics import confusion_matrix, classification_report
import seaborn as sns

def test_model(model_path, encoder_path, test_data_dir):
    """
    Test the trained model on test data.

    Args:
        model_path (str): Path to the saved model
        encoder_path (str): Path to the saved label encoder
        test_data_dir (str): Directory containing test audio files organized by species
    """
    # Load model
    model = tf.keras.models.load_model(model_path)

    # Load label encoder
    with open(encoder_path, 'rb') as f:
        label_encoder = pickle.load(f)

    # Initialize audio processor
    processor = AudioProcessor()

    # Lists to store features and true labels
    X_test = []
    y_true = []
    file_paths = []

    # Process each species directory
    species_dirs = [d for d in os.listdir(test_data_dir) if os.path.isdir(os.path.join(test_data_dir, d))]

    for species in species_dirs:
        species_dir = os.path.join(test_data_dir, species)
        print(f"Processing test data for {species}...")

        # Process each audio file in the species directory
        audio_files = [f for f in os.listdir(species_dir) if f.endswith(('.wav', '.mp3', '.ogg'))]

        for audio_file in audio_files:
            file_path = os.path.join(species_dir, audio_file)

            # Preprocess audio for model input
            features = processor.preprocess_for_model(file_path)

            if features is not None:
                X_test.append(features[0])  # Remove batch dimension
                y_true.append(species)
                file_paths.append(file_path)

    # Convert lists to arrays
    X_test = np.array(X_test)
    X_test = X_test.reshape(X_test.shape[0], X_test.shape[1], X_test.shape[2], 1)

    # Make predictions
    y_pred_probs = model.predict(X_test)
    y_pred_indices = np.argmax(y_pred_probs, axis=1)
    y_pred = label_encoder.inverse_transform(y_pred_indices)

    # Encode true labels
    y_true_indices = label_encoder.transform(y_true)

    # Calculate accuracy
    accuracy = np.mean(y_pred == y_true)
    print(f"Test Accuracy: {accuracy:.4f}")

    # Print classification report
    print("\nClassification Report:")
    print(classification_report(y_true, y_pred))

    # Plot confusion matrix
    plt.figure(figsize=(10, 8))
    cm = confusion_matrix(y_true_indices, y_pred_indices)
    sns.heatmap(cm, annot=True, fmt='d', cmap='Blues',
                xticklabels=label_encoder.classes_,
                yticklabels=label_encoder.classes_)
    plt.xlabel('Predicted')
    plt.ylabel('True')
    plt.title('Confusion Matrix')
    plt.tight_layout()

    # Save confusion matrix
    plt.savefig('confusion_matrix.png')

    # Print misclassified examples
    print("\nMisclassified Examples:")
    for i, (true, pred, file_path) in enumerate(zip(y_true, y_pred, file_paths)):
        if true != pred:
            print(f"File: {os.path.basename(file_path)}")
            print(f"True: {true}, Predicted: {pred}")
            print(f"Probabilities: {y_pred_probs[i][y_pred_indices[i]]:.4f}")
            print()

if __name__ == "__main__":
    # Example usage
    model_path = "models/bird_species_model.h5"
    encoder_path = "models/label_encoder.pkl"
    test_data_dir = "data/test_bird_audio"

    test_model(model_path, encoder_path, test_data_dir)

Writing BirdAudioProject/test_model.py


In [88]:
# Create README.md
%%writefile BirdAudioProject/README.md

# Bird Species Identification from Audio

This project uses machine learning to identify bird species from audio recordings of their calls and songs. It includes a complete pipeline from audio processing to model training and a web interface for making predictions.

## Features

- Audio processing and feature extraction using librosa
- Convolutional Neural Network (CNN) for bird species classification
- Flask web application for uploading and analyzing bird audio
- Visualization of audio spectrograms and model predictions
- Synthetic data generation for testing when real data is unavailable

## Project Structure


Writing BirdAudioProject/README.md


In [89]:
# Create a modified training script
%%writefile BirdAudioProject/train_with_real_data.py

import os
import numpy as np
from audio_processor import AudioProcessor
from model_trainer import ModelTrainer

def train_model_with_data(data_dir, output_dir):
    """
    Train a bird species identification model with audio data.

    Args:
        data_dir (str): Directory containing audio files organized by species
        output_dir (str): Directory to save model and results
    """
    # Create output directory if it doesn't exist
    os.makedirs(output_dir, exist_ok=True)

    # Initialize audio processor
    processor = AudioProcessor(sample_rate=22050, n_mfcc=13)

    # Lists to store features and labels
    features_list = []
    labels_list = []

    # Process each species directory
    species_dirs = [d for d in os.listdir(data_dir) if os.path.isdir(os.path.join(data_dir, d))]

    print(f"Found {len(species_dirs)} species directories")

    for species in species_dirs:
        species_dir = os.path.join(data_dir, species)
        print(f"Processing {species}...")

        # Process each audio file in the species directory
        audio_files = [f for f in os.listdir(species_dir) if f.endswith(('.wav', '.mp3', '.ogg'))]

        for audio_file in audio_files:
            file_path = os.path.join(species_dir, audio_file)

            # Load and preprocess audio
            signal, sr = processor.load_audio(file_path)
            if signal is None:
                continue

            # Extract features
            features = processor.extract_features(signal, feature_type='mfcc')
            if features is None:
                continue

            # Normalize features
            normalized_features = processor.normalize_features(features)

            # Ensure consistent feature length (e.g., 100 time steps)
            fixed_length = 100
            if normalized_features.shape[1] < fixed_length:
                pad_width = fixed_length - normalized_features.shape[1]
                normalized_features = np.pad(
                    normalized_features,
                    pad_width=((0, 0), (0, pad_width)),
                    mode='constant'
                )
            else:
                normalized_features = normalized_features[:, :fixed_length]

            # Add to lists
            features_list.append(normalized_features)
            labels_list.append(species)

    # Convert lists to arrays
    X = np.array(features_list)
    y = np.array(labels_list)

    # Reshape features for CNN input: (samples, height, width, channels)
    X = X.reshape(X.shape[0], X.shape[1], X.shape[2], 1)

    print(f"Feature shape: {X.shape}")
    print(f"Number of samples: {len(y)}")
    print(f"Unique species: {np.unique(y)}")

    # Initialize and build model
    input_shape = (X.shape[1], X.shape[2], 1)
    trainer = ModelTrainer(input_shape=input_shape)

    num_classes = len(np.unique(y))
    model = trainer.build_model(num_classes)

    # Train model
    history = trainer.train(X, y, epochs=50, batch_size=32, validation_split=0.2)

    # Plot and save training history
    trainer.plot_training_history(save_path=os.path.join(output_dir, 'training_history.png'))

    # Save model and encoder
    model_path = os.path.join(output_dir, 'bird_species_model.h5')
    encoder_path = os.path.join(output_dir, 'label_encoder.pkl')
    trainer.save_model(model_path, encoder_path)

    print(f"Model saved to {model_path}")
    print(f"Label encoder saved to {encoder_path}")

if __name__ == "__main__":
    # Use the training data directory
    data_dir = "data/train_bird_audio"
    output_dir = "models"
    train_model_with_data(data_dir, output_dir)

Writing BirdAudioProject/train_with_real_data.py


In [90]:
# 8. Train the model with real data
%cd BirdAudioProject
!python train_with_real_data.py

/content/BirdAudioProject/BirdAudioProject/BirdAudioProject
2025-04-09 14:46:48.110706: E external/local_xla/xla/stream_executor/cuda/cuda_fft.cc:477] Unable to register cuFFT factory: Attempting to register factory for plugin cuFFT when one has already been registered
E0000 00:00:1744210008.210642   12554 cuda_dnn.cc:8310] Unable to register cuDNN factory: Attempting to register factory for plugin cuDNN when one has already been registered
E0000 00:00:1744210008.240266   12554 cuda_blas.cc:1418] Unable to register cuBLAS factory: Attempting to register factory for plugin cuBLAS when one has already been registered
2025-04-09 14:46:48.334579: I tensorflow/core/platform/cpu_feature_guard.cc:210] This TensorFlow binary is optimized to use available CPU instructions in performance-critical operations.
To enable the following instructions: AVX2 FMA, in other operations, rebuild TensorFlow with the appropriate compiler flags.
2025-04-09 14:46:57.868947: E external/local_xla/xla/stream_execu

In [91]:
!pip install flask pyngrok




In [92]:
# !ngrok config add-authtoken 2uo5kJSJJTKVT5hTWceu9gv0JDT_36trhvECETpBAFYespywL
from pyngrok import ngrok

# Add ngrok token (only need to do once per Colab session)
!ngrok config add-authtoken 2uo5kJSJJTKVT5hTWceu9gv0JDT_36trhvECETpBAFYespywL



Authtoken saved to configuration file: /root/.config/ngrok/ngrok.yml


In [93]:
!pip install flask-ngrok pyngrok
!ngrok authtoken 2uo5kJSJJTKVT5hTWceu9gv0JDT_36trhvECETpBAFYespywL

Authtoken saved to configuration file: /root/.config/ngrok/ngrok.yml


In [94]:
!mkdir -p BirdAudioProject


In [95]:
# create app.py
%%writefile BirdAudioProject/app.py

# app.py

import os
import json
import numpy as np
from flask import Flask, render_template, request, jsonify
import tensorflow as tf
from audio_processor import AudioProcessor
import pickle
import librosa
import librosa.display
import matplotlib.pyplot as plt
import io
import base64
from werkzeug.utils import secure_filename

app = Flask(__name__)

# Configuration
MODEL_PATH = 'models/bird_species_model.h5'
ENCODER_PATH = 'models/label_encoder.pkl'
UPLOAD_FOLDER = 'static/uploads'
ALLOWED_EXTENSIONS = {'wav', 'mp3', 'ogg'}

os.makedirs(UPLOAD_FOLDER, exist_ok=True)
processor = AudioProcessor()

# Load bird info from JSON
def load_bird_info():
    try:
        with open('static/data/bird_info.json', 'r') as f:
            return json.load(f)
    except Exception as e:
        print(f"Error loading bird info: {e}")
        return {}

bird_info = load_bird_info()

# Load model and label encoder
model, label_encoder = None, None

def load_model_and_encoder():
    global model, label_encoder
    try:
        if os.path.exists(MODEL_PATH):
            model = tf.keras.models.load_model(MODEL_PATH)
            print("Model loaded successfully")
        else:
            print(f"Model file not found at {MODEL_PATH}")

        if os.path.exists(ENCODER_PATH):
            with open(ENCODER_PATH, 'rb') as f:
                label_encoder = pickle.load(f)
            print("Label encoder loaded successfully")
        else:
            print(f"Label encoder file not found at {ENCODER_PATH}")
    except Exception as e:
        print(f"Error loading model/encoder: {e}")

load_model_and_encoder()

# Helper to check file extension
def allowed_file(filename):
    return '.' in filename and filename.rsplit('.', 1)[1].lower() in ALLOWED_EXTENSIONS

# Routes
@app.route('/')
def index():
    return render_template('index.html')

@app.route('/about')
def about():
    return render_template('about.html')

@app.route('/identify', methods=['POST'])
def identify_bird():
    if 'audio_file' not in request.files:
        return jsonify({'error': 'No file part'}), 400

    file = request.files['audio_file']

    if file.filename == '':
        return jsonify({'error': 'No selected file'}), 400

    if file and allowed_file(file.filename):
        filename = secure_filename(file.filename)
        file_path = os.path.join(UPLOAD_FOLDER, filename)
        file.save(file_path)

        if model is None or label_encoder is None:
            load_model_and_encoder()
            if model is None or label_encoder is None:
                return jsonify({'error': 'Model not available'}), 500

        try:
            features = processor.preprocess_for_model(file_path)
            if features is None:
                return jsonify({'error': 'Failed to process audio file'}), 400

            probabilities = model.predict(features)[0]
            predicted_index = np.argmax(probabilities)
            predicted_species = label_encoder.inverse_transform([predicted_index])[0]
            confidence = float(probabilities[predicted_index])

            y, sr = librosa.load(file_path)
            plt.figure(figsize=(10, 4))
            S = librosa.feature.melspectrogram(y=y, sr=sr)
            librosa.display.specshow(librosa.power_to_db(S, ref=np.max), y_axis='mel', x_axis='time')
            plt.colorbar(format='%+2.0f dB')
            plt.title('Mel spectrogram')
            buf = io.BytesIO()
            plt.savefig(buf, format='png')
            buf.seek(0)
            plt.close()
            spectrogram_b64 = base64.b64encode(buf.read()).decode('utf-8')

            bird_data = bird_info.get(predicted_species, {
                'common_name': predicted_species,
                'scientific_name': 'Unknown',
                'description': 'No information available.',
                'image': '/static/images/placeholder.jpg'
            })

            return jsonify({
                'species': predicted_species,
                'common_name': bird_data['common_name'],
                'scientific_name': bird_data['scientific_name'],
                'description': bird_data['description'],
                'image': bird_data['image'],
                'confidence': confidence,
                'spectrogram': spectrogram_b64
            })

        except Exception as e:
            return jsonify({'error': f'Error during prediction: {str(e)}'}), 500

    return jsonify({'error': 'Invalid file type'}), 400

@app.route('/train', methods=['POST'])
def train_model_endpoint():
    return jsonify({'message': 'Training feature is not implemented in this demo.'})

if __name__ == '__main__':
    app.run(debug=True)


Writing BirdAudioProject/app.py


In [96]:
from pyngrok import ngrok
import threading

# Create a tunnel to the Flask app
public_url = ngrok.connect(5000)
print("Your app is running on:", public_url)

# Run the Flask app in background thread
def run_flask():
    !python3 app.py

thread = threading.Thread(target=run_flask)
thread.start()


Your app is running on: NgrokTunnel: "https://ec6e-34-125-52-51.ngrok-free.app" -> "http://localhost:5000"
