In [5]:
import os
import librosa
import numpy as np
import pandas as pd
import librosa.display
import matplotlib.pyplot as plt
from sklearn.model_selection import train_test_split
from sklearn.ensemble import RandomForestClassifier
from sklearn.metrics import classification_report
from tensorflow.keras.models import Sequential
from tensorflow.keras.layers import Conv2D, MaxPooling2D, Flatten, Dense
from tensorflow.keras.utils import to_categorical


# Directory containing bee sound audio files
audio_dir = "/content/drive/MyDrive/Colab Notebooks/piping_database/"
output_csv = "/content/drive/MyDrive/Colab Notebooks/piping_database/bee_sound_labels.csv"

# Define conditions
conditions = {
    "Normal Worker Hum": (30, 50, 100, 250),
    "Swarming - Worker Piping": (51, 70, 100, 250),
    "Swarming - Queen Piping": (50, 100, 330, 430),
    "Potential Stress or Excited State": (None, None, 220, 290),
}

def extract_features(file_path):
    try:
        # Load the audio file
        y, sr = librosa.load(file_path, sr=None)




        # Compute RMS (Root Mean Square) for the audio
        rms = np.sqrt(np.mean(y**2))
        # Apply a small constant to avoid log(0), RMS should be greater than a small threshold.
        amplitude_db = 20 * np.log10(rms + 1e-6)  # Avoiding log(0) by adding a small value

        # STFT for frequency analysis
        stft = np.abs(librosa.stft(y))
        frequencies = librosa.fft_frequencies(sr=sr)
        dominant_frequency = frequencies[np.argmax(np.sum(stft, axis=1))]
           # Calculate additional features
        bandwidth = np.max(frequencies) - np.min(frequencies)  # Spectral bandwidth
        spectral_centroid = np.mean(librosa.feature.spectral_centroid(y=y, sr=sr))

        # MFCCs (Mel-frequency cepstral coefficients)
        mfccs = librosa.feature.mfcc(y=y, sr=sr, n_mfcc=13)
        mfccs_mean = np.mean(mfccs, axis=1)

        return amplitude_db, dominant_frequency, mfccs_mean
    except Exception as e:
        print(f"Error processing {file_path}: {e}")
        return None, None, None


def create_spectrogram(file_path, output_dir):
    y, sr = librosa.load(file_path, sr=None)
    plt.figure(figsize=(10, 4))
    S = librosa.feature.melspectrogram(y=y, sr=sr, n_mels=128)
    S_DB = librosa.power_to_db(S, ref=np.max)
    spectrogram_path = os.path.join(output_dir, os.path.basename(file_path).replace(".wav", ".png"))
    librosa.display.specshow(S_DB, x_axis='time', y_axis='mel', sr=sr, fmax=8000)
    plt.colorbar(format='%+2.0f dB')
    plt.title('Mel Spectrogram')
    plt.savefig(spectrogram_path)
    plt.close()
    return spectrogram_path

def assign_condition(amplitude_db, dominant_frequency, bandwidth=None, spectral_centroid=None):
    if amplitude_db > -45:  # Updated amplitude threshold
        if 100 <= dominant_frequency <= 250:
            if bandwidth and bandwidth < 200:
                return "Normal Worker Hum"
        elif 220 <= dominant_frequency <= 290:
            return "Potential Stress or Excited State"
    if -60 < amplitude_db <= -45:
        if 100 <= dominant_frequency <= 250:
            return "Swarming - Worker Piping"
        elif 330 <= dominant_frequency <= 430:
            return "Swarming - Queen Piping"
    return "Unknown"

def categorize_files(audio_dir):
    data = []
    for root, _, files in os.walk(audio_dir):
        for file_name in files:
            if file_name.endswith(".wav") or file_name.endswith(".mp3"):
                file_path = os.path.join(root, file_name)
                amplitude_db, dominant_frequency, mfccs = extract_features(file_path)
                if amplitude_db is not None:
                    condition = assign_condition(amplitude_db, dominant_frequency)
                    data.append((file_name, amplitude_db, dominant_frequency, condition, mfccs))
    return data

def train_classifier(features, labels):
    X_train, X_test, y_train, y_test = train_test_split(features, labels, test_size=0.2, random_state=42)
    classifier = RandomForestClassifier(n_estimators=100, random_state=42)
    classifier.fit(X_train, y_train)
    y_pred = classifier.predict(X_test)
    print(classification_report(y_test, y_pred))
    return classifier

def create_cnn_model(input_shape, num_classes):
    model = Sequential([
        Conv2D(32, (3, 3), activation='relu', input_shape=input_shape),
        MaxPooling2D((2, 2)),
        Conv2D(64, (3, 3), activation='relu'),
        MaxPooling2D((2, 2)),
        Flatten(),
        Dense(128, activation='relu'),
        Dense(num_classes, activation='softmax')
    ])
    model.compile(optimizer='adam', loss='categorical_crossentropy', metrics=['accuracy'])
    return model

# Main program
def main():
    print("Processing audio files...")
    data = categorize_files(audio_dir)

    if data:
        df = pd.DataFrame(data, columns=["file_name", "amplitude_db", "dominant_frequency", "condition", "mfccs"])

        # Save labeled data
        print(f"Saving results to {output_csv}...")
        df.to_csv(output_csv, index=False)

        # Train a model using MFCCs and labels
        features = np.array([mfccs for _, _, _, _, mfccs in data])
        labels = np.array([condition for _, _, _, condition, _ in data])

        print("Training classifier...")
        train_classifier(features, labels)
    else:
        print("No audio files found or processed.")

if __name__ == "__main__":
    main()


Processing audio files...
Saving results to /content/drive/MyDrive/Colab Notebooks/piping_database/bee_sound_labels.csv...
Training classifier...
                                   precision    recall  f1-score   support

Potential Stress or Excited State       0.00      0.00      0.00         1
         Swarming - Worker Piping       0.00      0.00      0.00         1
                          Unknown       0.91      1.00      0.95        20

                         accuracy                           0.91        22
                        macro avg       0.30      0.33      0.32        22
                     weighted avg       0.83      0.91      0.87        22



  _warn_prf(average, modifier, f"{metric.capitalize()} is", len(result))
  _warn_prf(average, modifier, f"{metric.capitalize()} is", len(result))
  _warn_prf(average, modifier, f"{metric.capitalize()} is", len(result))
