In [1]:
import os
import tensorflow as tf
import numpy as np
import librosa
import soundfile as sf
from matplotlib import pyplot as plt
from tensorflow.keras import Sequential
from sklearn.model_selection import train_test_split
from tensorflow.keras.layers import Conv2D, MaxPooling2D, Flatten, Dense, Input


In [2]:
# Paths To The Dataset
DATA_DIR = 'augmented_data' 
BIRD_CLASSES = ['collared_dove', 'indian_mayna', 'kingfisher', 'nightangale', 'owl', 'sparrow', 'noise', 'unknown']


In [3]:
def load_wav_16k_mono(filename):
    # Load the audio file using librosa
    wav, sample_rate = librosa.load(filename, sr=16000, mono=True)
    
    # Convert the wav to a mono channel if it’s not already
    if wav.ndim > 1:
        wav = librosa.to_mono(wav)
        
    return wav

In [4]:
def preprocess(file_path):
    # Load the audio file and convert to a mono channel
    wav = load_wav_16k_mono(file_path)
    
    # Compute the STFT of the audio signal
    spectrogram = tf.signal.stft(wav, frame_length=320, frame_step=32)
    
    # Take the absolute value of the STFT to get the magnitude spectrogram
    spectrogram = tf.abs(spectrogram)
    
    # Add the channel dimension to make the spectrogram 3D (height, width, channels)
    spectrogram = tf.expand_dims(spectrogram, axis=-1)  # Shape becomes (time, frequency, 1)
    
    # Ensure fixed size by padding or truncating (150 time steps, 257 frequency bins, 1 channel)
    desired_shape = (150, 257, 1)
    spectrogram = tf.image.resize_with_crop_or_pad(spectrogram, desired_shape[0], desired_shape[1])
    
    return spectrogram


In [None]:
# Preprocess the audio file to get the spectrogram
spect = preprocess(os.path.join('augmented_data', 'collared_dove', 'collared_dove_sample1_sample10_aug3.wav'))

# Transpose the spectrogram for better visualization
spect = tf.transpose(spect, perm=[1, 0, 2])  # Transposing time and frequency for a better display

# Remove the channel dimension for plotting
spect = tf.squeeze(spect, axis=-1)

# Plot the spectrogram
plt.figure(figsize=(30, 20))
plt.imshow(spect, aspect='auto', origin='lower', cmap='inferno')
plt.colorbar(format='%+2.0f dB')
plt.title('Spectrogram')
plt.show()


In [6]:
# Load audio files and labels into a TensorFlow dataset
def load_dataset(data_dir, classes):
    data = []
    labels = []
    
    for label, bird_class in enumerate(classes):
        class_dir = os.path.join(data_dir, bird_class)
        for file in os.listdir(class_dir):
            if file.endswith('.wav'):  # Ensure only audio files are processed
                file_path = os.path.join(class_dir, file)
                
                # Preprocess the audio file to get the spectrogram
                spectrogram = preprocess(file_path)
                
                # Append the preprocessed data and corresponding label
                data.append(spectrogram)
                labels.append(label)
    
    # Convert lists to numpy arrays
    data = np.array(data)
    labels = np.array(labels)
    
    return data, labels



# Load the dataset
X, y = load_dataset(DATA_DIR, BIRD_CLASSES)

# Shuffle and split into training and testing sets
X_train, X_test, y_train, y_test = train_test_split(X, y, test_size=0.2, stratify=y, random_state=42)

# Convert to TensorFlow datasets
train_dataset = tf.data.Dataset.from_tensor_slices((X_train, y_train)).batch(16).prefetch(tf.data.AUTOTUNE)
test_dataset = tf.data.Dataset.from_tensor_slices((X_test, y_test)).batch(16).prefetch(tf.data.AUTOTUNE)


In [None]:
# Define the model
model = Sequential([
    Input(shape=(X_train.shape[1], X_train.shape[2], 1)),
    
    Conv2D(32, (3, 3), activation='relu'),
    BatchNormalization(),
    MaxPooling2D((2, 2)),
    
    Conv2D(64, (3, 3), activation='relu'),
    BatchNormalization(),
    MaxPooling2D((2, 2)),
    
    Conv2D(128, (3, 3), activation='relu'),
    BatchNormalization(),
    MaxPooling2D((2, 2)),
    
    GlobalAveragePooling2D(),
    
    Dense(128, activation='relu'),
    Dropout(0.5),  # Reduce overfitting
    
    Dense(len(BIRD_CLASSES), activation='softmax')  # Multi-class classification
])

# Compile the model
model.compile(optimizer='adam', loss='sparse_categorical_crossentropy', metrics=['accuracy'])

# View the model summary
model.summary()


In [None]:
# Train the model
history = model.fit(train_dataset, epochs=50, validation_data=test_dataset)

# Plot training and validation accuracy/loss
plt.figure(figsize=(10, 5))
plt.plot(history.history['accuracy'], label='accuracy')
plt.plot(history.history['val_accuracy'], label='val_accuracy')
plt.xlabel('Epoch')
plt.ylabel('Accuracy')
plt.legend()
plt.show()


In [None]:
# Save the trained model
model.save('bird_classification_model.h5')
