In [1]:
!pip install numpy librosa scikit-learn matplotlib tensorflow
!pip install --upgrade --force-reinstall librosa
!pip install --upgrade --force-reinstall resampy



Collecting librosa
  Using cached librosa-0.10.1-py3-none-any.whl (253 kB)
Collecting audioread>=2.1.9 (from librosa)
  Using cached audioread-3.0.1-py3-none-any.whl (23 kB)
Collecting numpy!=1.22.0,!=1.22.1,!=1.22.2,>=1.20.3 (from librosa)
  Using cached numpy-1.26.4-cp310-cp310-manylinux_2_17_x86_64.manylinux2014_x86_64.whl (18.2 MB)
Collecting scipy>=1.2.0 (from librosa)
  Using cached scipy-1.12.0-cp310-cp310-manylinux_2_17_x86_64.manylinux2014_x86_64.whl (38.4 MB)
Collecting scikit-learn>=0.20.0 (from librosa)
  Using cached scikit_learn-1.4.1.post1-cp310-cp310-manylinux_2_17_x86_64.manylinux2014_x86_64.whl (12.1 MB)
Collecting joblib>=0.14 (from librosa)
  Using cached joblib-1.3.2-py3-none-any.whl (302 kB)
Collecting decorator>=4.3.0 (from librosa)
  Using cached decorator-5.1.1-py3-none-any.whl (9.1 kB)
Collecting numba>=0.51.0 (from librosa)
  Using cached numba-0.59.0-cp310-cp310-manylinux2014_x86_64.manylinux_2_17_x86_64.whl (3.7 MB)
Collecting soundfile>=0.12.1 (from libros

Collecting resampy
  Using cached resampy-0.4.3-py3-none-any.whl (3.1 MB)
Traceback (most recent call last):
  File "/usr/local/lib/python3.10/dist-packages/pip/_internal/cli/base_command.py", line 169, in exc_logging_wrapper
    status = run_func(*args)
  File "/usr/local/lib/python3.10/dist-packages/pip/_internal/cli/req_command.py", line 242, in wrapper
    return func(self, options, args)
  File "/usr/local/lib/python3.10/dist-packages/pip/_internal/commands/install.py", line 377, in run
    requirement_set = resolver.resolve(
  File "/usr/local/lib/python3.10/dist-packages/pip/_internal/resolution/resolvelib/resolver.py", line 92, in resolve
    result = self._result = resolver.resolve(
  File "/usr/local/lib/python3.10/dist-packages/pip/_vendor/resolvelib/resolvers.py", line 546, in resolve
    state = resolution.resolve(requirements, max_rounds=max_rounds)
  File "/usr/local/lib/python3.10/dist-packages/pip/_vendor/resolvelib/resolvers.py", line 427, in resolve
    failure_cause

In [3]:
!unzip pos.zip -d ./
!unzip neg.zip -d ./


[1;30;43mStreaming output truncated to the last 5000 lines.[0m
  inflating: ./__MACOSX/pos/._608.wav  
  inflating: ./pos/90.wav            
  inflating: ./__MACOSX/pos/._90.wav  
  inflating: ./pos/146.wav           
  inflating: ./__MACOSX/pos/._146.wav  
  inflating: ./pos/620.wav           
  inflating: ./__MACOSX/pos/._620.wav  
  inflating: ./pos/634.wav           
  inflating: ./__MACOSX/pos/._634.wav  
  inflating: ./pos/152.wav           
  inflating: ./__MACOSX/pos/._152.wav  
  inflating: ./pos/47.wav            
  inflating: ./__MACOSX/pos/._47.wav  
  inflating: ./pos/807.wav           
  inflating: ./__MACOSX/pos/._807.wav  
  inflating: ./pos/53.wav            
  inflating: ./__MACOSX/pos/._53.wav  
  inflating: ./pos/813.wav           
  inflating: ./__MACOSX/pos/._813.wav  
  inflating: ./pos/1.wav             
  inflating: ./__MACOSX/pos/._1.wav  
  inflating: ./pos/185.wav           
  inflating: ./__MACOSX/pos/._185.wav  
  inflating: ./pos/191.wav           
  in

In [None]:
!pip install librosa matplotlib scikit-learn tensorflow seaborn

import os
import librosa
import numpy as np
import pandas as pd
from sklearn.model_selection import train_test_split
from sklearn.metrics import confusion_matrix, accuracy_score
from sklearn.manifold import TSNE
import matplotlib.pyplot as plt
from tensorflow.keras.utils import to_categorical
from tensorflow.keras.models import Sequential
from tensorflow.keras.layers import Dense, Activation, Dropout, BatchNormalization, LeakyReLU
from tensorflow.keras.optimizers import Adam
from tensorflow.keras.regularizers import l2
import seaborn as sns

def features_extractor(file):
    try:
        audio_data, sr = librosa.load(file, res_type='kaiser_fast')

        # MFCCs
        mfccs = librosa.feature.mfcc(y=audio_data, sr=sr, n_mfcc=40)
        mfccs_scaled = np.mean(mfccs.T, axis=0)

        # Chroma feature
        chroma_stft = librosa.feature.chroma_stft(y=audio_data, sr=sr)
        chroma_stft_scaled = np.mean(chroma_stft.T, axis=0)

        # Mel-scaled spectrogram
        mel_spec = librosa.feature.melspectrogram(y=audio_data, sr=sr)
        mel_spec_scaled = np.mean(librosa.power_to_db(mel_spec), axis=1)

        # Spectral contrast
        spectral_contrast = librosa.feature.spectral_contrast(y=audio_data, sr=sr)
        spectral_contrast_scaled = np.mean(spectral_contrast, axis=1)

        # Tonnetz
        tonnetz = librosa.feature.tonnetz(y=audio_data, sr=sr)
        tonnetz_scaled = np.mean(tonnetz.T, axis=0)

        # Combine features into a single array
        features = np.hstack((mfccs_scaled, chroma_stft_scaled, mel_spec_scaled, spectral_contrast_scaled, tonnetz_scaled))
        return features
    except Exception as e:
        print(f"Error processing {file}: {e}")
        return None

def load_data(pos_dir='pos/', neg_dir='neg/'):
    data = []
    for label, directory in zip([1, 0], [pos_dir, neg_dir]):
        if not os.path.isdir(directory):
            print(f"Directory {directory} not found.")
            continue
        for filename in os.listdir(directory):
            if filename.lower().endswith('.wav') and not filename.startswith('.'):
                file_path = os.path.join(directory, filename)
                features = features_extractor(file_path)
                data.append([features, label])
            else:
                print(f"Skipping non-wav file: {filename}")
    if not data:
        raise ValueError("No valid data could be loaded. Please check the contents of the pos/ and neg/ directories.")
    return pd.DataFrame(data, columns=['features', 'class'])

def define_model(input_shape):
    model = Sequential([
        Dense(256, input_shape=(input_shape,), kernel_regularizer=l2(0.001)),
        LeakyReLU(alpha=0.1),
        BatchNormalization(),
        Dropout(0.5),
        Dense(512, kernel_regularizer=l2(0.001)),
        LeakyReLU(alpha=0.1),
        BatchNormalization(),
        Dropout(0.5),
        Dense(512, kernel_regularizer=l2(0.001)),
        LeakyReLU(alpha=0.1),
        BatchNormalization(),
        Dropout(0.5),
        Dense(2, activation='softmax')
    ])
    model.compile(loss='categorical_crossentropy', metrics=['accuracy'], optimizer=Adam(learning_rate=0.0001))
    return model

# Include plot_results() function here
# Plot the learning curves
    plt.figure(figsize=(12, 5))
    plt.subplot(1, 2, 1)
    plt.plot(history.history['accuracy'], label='Training accuracy')
    plt.plot(history.history['val_accuracy'], label='Validation accuracy')
    plt.title('Training and validation accuracy')
    plt.xlabel('Epoch')
    plt.ylabel('Accuracy')
    plt.legend()

    plt.subplot(1, 2, 2)
    plt.plot(history.history['loss'], label='Training loss')
    plt.plot(history.history['val_loss'], label='Validation loss')
    plt.title('Training and validation loss')
    plt


def plot_results(history, X_test, y_test, model):
    # Plot learning curves
    plt.figure(figsize=(14, 5))
    plt.subplot(1, 2, 1)
    plt.plot(history.history['accuracy'], label='Training Accuracy')
    plt.plot(history.history['val_accuracy'], label='Validation Accuracy')
    plt.title('Model Accuracy')
    plt.ylabel('Accuracy')
    plt.xlabel('Epoch')
    plt.legend()

    plt.subplot(1, 2, 2)
    plt.plot(history.history['loss'], label='Training Loss')
    plt.plot(history.history['val_loss'], label='Validation Loss')
    plt.title('Model Loss')
    plt.ylabel('Loss')
    plt.xlabel('Epoch')
    plt.legend()
    plt.tight_layout()
    plt.show()

    # Predictions for confusion matrix
    predictions = model.predict(X_test)
    predictions = np.argmax(predictions, axis=1)
    y_true = np.argmax(y_test, axis=1)
    cm = confusion_matrix(y_true, predictions)
    sns.heatmap(cm, annot=True, fmt="d")
    plt.title("Confusion Matrix")
    plt.show()

    # Accuracy
    print(f"Test Accuracy: {accuracy_score(y_true, predictions) * 100:.2f}%")

    # t-SNE Visualization
    tsne = TSNE(n_components=2, random_state=42)
    X_test_2d = tsne.fit_transform(X_test)
    plt.figure(figsize=(10, 8))
    plt.scatter(X_test_2d[:, 0], X_test_2d[:, 1], c=y_true, cmap='viridis', alpha=0.5)
    plt.colorbar()
    plt.title("t-SNE visualization of Test set")
    plt.show()

def main():
    # Check if 'pos/' and 'neg/' directories exist
    if not os.path.isdir('pos/') or not os.path.isdir('neg/'):
        print("pos/ or neg/ directories not found. Make sure they are uploaded or mounted correctly.")
        return

    df = load_data()  # Assuming this function correctly populates your DataFrame
    if df.empty:
        raise ValueError("Loaded DataFrame is empty. No valid data could be loaded.")

    df['features'] = df['features'].apply(lambda x: np.array(x))
    X = np.array(df['features'].tolist())
    y = to_categorical(np.array(df['class'].tolist()))

    X_train, X_test, y_train, y_test = train_test_split(X, y, test_size=0.2, random_state=42)

    model = define_model(X_train.shape[1])
    history = model.fit(X_train, y_train, epochs=20, batch_size=32, validation_data=(X_test, y_test), verbose=1)

    # Save the trained model
    model.save('bird_song_model.hdf5')

    plot_results(history, X_test, y_test, model)

if __name__ == "__main__":
    main()


In [27]:
from google.colab import files
from tensorflow.keras.models import load_model
import librosa
import numpy as np

# Define the features_extractor function according to your model's training
def features_extractor(file_path):
    audio_data, sr = librosa.load(file_path, sr=None)
    mfccs = librosa.feature.mfcc(y=audio_data, sr=sr, n_mfcc=40)
    chroma_stft = librosa.feature.chroma_stft(y=audio_data, sr=sr)
    mel_spec = librosa.feature.melspectrogram(y=audio_data, sr=sr)
    spectral_contrast = librosa.feature.spectral_contrast(y=audio_data, sr=sr)
    tonnetz = librosa.feature.tonnetz(y=librosa.effects.harmonic(audio_data), sr=sr)
    features = np.hstack([np.mean(mfccs, axis=1), np.mean(chroma_stft, axis=1), np.mean(librosa.power_to_db(mel_spec), axis=1),
                          np.mean(spectral_contrast, axis=1), np.mean(tonnetz, axis=1)])
    return features.reshape(1, -1)

# Define the detect_motif function
def detect_motif(model, file_path):
    print(f"Processing {file_path} to detect motifs...")
    features = features_extractor(file_path)
    prediction = model.predict(features)
    if prediction[0, 1] > 0.5:  # Assuming class 1 is the motif
        print("Motif found!")
    else:
        print("No motif detected.")

# Main function to upload a specific file and process it
def main():
    # Hardcoding the filename for direct use
    audio_file_name = "bird.wav"

    # Check if the audio file is present
    if not os.path.exists(audio_file_name):
        print(f"{audio_file_name} not found. Please upload the file.")
        uploaded = files.upload()  # Prompts user to upload the bird.wav file
        if "bird.wav" not in uploaded:
            print("File not uploaded correctly. Please try again.")
            return

    model_path = 'bird_song_model.hdf5'  # Assuming the model is in the root directory
    model = load_model(model_path)  # Load the pre-trained model

    # Process the uploaded audio file to detect motifs
    detect_motif(model, audio_file_name)

if __name__ == "__main__":
    main()


bird.wav not found. Please upload the file.


Saving bird.wav to bird.wav
Processing bird.wav to detect motifs...
Motif found!
