In [1]:
!pip install kaggle




In [1]:
import os

# Make sure to upload 'kaggle.json' file to your environment first
os.environ['KAGGLE_USERNAME'] = "netflixnice"  # replace with your Kaggle username
os.environ['KAGGLE_KEY'] = "a57c060085cb6470273355d027a49678"            # replace with your Kaggle key



In [2]:
!kaggle datasets download -d ttahara/birdsong-resampled-train-audio-00
!unzip -q birdsong-resampled-train-audio-00.zip -d dataset_folder


Downloading birdsong-resampled-train-audio-00.zip to /content
100% 11.7G/11.7G [02:18<00:00, 96.4MB/s]
100% 11.7G/11.7G [02:18<00:00, 91.3MB/s]


In [None]:
import librosa
import numpy as np
import matplotlib.pyplot as plt


def spectral_gate_and_spectrogram(audio_file, threshold=20, frame_length=2048, hop_length=512, target_size=(128, 128)):
    y, sr = librosa.load(audio_file, sr=None)
    stft = librosa.stft(y, n_fft=frame_length, hop_length=hop_length)
    magnitude, phase = librosa.magphase(stft)
    magnitude_db = librosa.amplitude_to_db(magnitude)

    mask = magnitude_db > threshold
    magnitude_db_filtered = magnitude_db * mask
    filtered_magnitude = librosa.db_to_amplitude(magnitude_db_filtered)
    y_filtered = librosa.istft(filtered_magnitude * phase, hop_length=hop_length)

    S = librosa.feature.melspectrogram(y=y_filtered, sr=sr)
    S_DB = librosa.power_to_db(S, ref=np.max)

    # Save the spectrogram to a file
    spectrogram_path = os.path.splitext(audio_file)[0] + "_spectrogram.png"
    plt.figure(figsize=(10, 4))
    librosa.display.specshow(S_DB, sr=sr, x_axis='time', y_axis='mel')
    plt.axis('off')
    plt.savefig(spectrogram_path, bbox_inches='tight', pad_inches=0)
    plt.close()

    # Open the saved spectrogram and resize
    img = Image.open(spectrogram_path)
    img = img.resize(target_size, Image.ANTIALIAS)

    # Convert to array and normalize
    img_array = img_to_array(img)
    img_array = img_array / 255.0

    return img_array, spectrogram_path


def get_audio_files_and_labels(dataset_dir):
    audio_files = []
    labels = []

    for species in os.listdir(dataset_dir):
        species_dir = os.path.join(dataset_dir, species)
        if os.path.isdir(species_dir):
            for audio_file in os.listdir(species_dir):
                audio_files.append(os.path.join(species_dir, audio_file))
                labels.append(species)

    return audio_files, labels

def preprocess_dataset(audio_files):
    spectrograms = []
    for file in audio_files:
        spectrogram = spectral_gate_and_spectrogram(file)
        spectrograms.append(spectrogram)
    return np.array(spectrograms)

dataset_dir = 'dataset_folder'
audio_files, labels = get_audio_files_and_labels(dataset_dir)
spectrograms = preprocess_dataset(audio_files)


  img = img.resize(target_size, Image.ANTIALIAS)


In [None]:
from sklearn.preprocessing import LabelEncoder
from keras.utils import to_categorical
from sklearn.model_selection import train_test_split

encoder = LabelEncoder()
encoded_labels = encoder.fit_transform(labels)
categorical_labels = to_categorical(encoded_labels)

# Splitting the dataset
X_train, X_test, y_train, y_test = train_test_split(spectrograms, categorical_labels, test_size=0.2, random_state=42)


In [None]:
from keras.models import Sequential
from keras.layers import Conv2D, MaxPooling2D, Flatten, Dense, Dropout

def build_model(input_shape, num_classes):
    model = Sequential([
        Conv2D(32, (3, 3), activation='relu', input_shape=input_shape),
        MaxPooling2D(2, 2),
        Conv2D(64, (3, 3), activation='relu'),
        MaxPooling2D(2, 2),
        Flatten(),
        Dense(64, activation='relu'),
        Dropout(0.5),
        Dense(num_classes, activation='softmax')
    ])
    model.compile(optimizer='adam', loss='categorical_crossentropy', metrics=['accuracy'])
    return model

# Assuming your spectrograms are reshaped properly
model = build_model(input_shape=X_train.shape[1:], num_classes=y_train.shape[1])
model.fit(X_train, y_train, epochs=10, batch_size=32, validation_data=(X_test, y_test))

# Save the model
model.save('bird_classification_model.h5')


In [None]:
from keras.models import load_model

def predict_species(audio_file, model_path='bird_classification_model.h5'):
    model = load_model(model_path)
    spectrogram = spectral_gate_and_spectrogram(audio_file)
    spectrogram = np.expand_dims(spectrogram, axis=0)
    prediction = model.predict(spectrogram)
    predicted_label = np.argmax(prediction, axis=1)
    return encoder.inverse_transform(predicted_label)

# Example usage
# species = predict_species('path_to_new_audio_file.wav')
