In [14]:
# -*- coding: utf-8 -*-

import sys
import pandas as pd
import numpy as np
import librosa
from sklearn.ensemble import RandomForestClassifier
from sklearn.model_selection import train_test_split
from sklearn.preprocessing import LabelEncoder
from sklearn.metrics import (
    roc_auc_score,
    confusion_matrix,
    precision_score,
    recall_score,
    ConfusionMatrixDisplay,
)

import numpy as np
import pandas as pd
from ast import literal_eval

from concurrent.futures import ThreadPoolExecutor, as_completed




In [2]:
def extract_features(file_path):
    """
    Extract features from audio file using librosa.

    Args:
        file_path (str): Path to the audio file.

    Returns:
        np.array: Extracted features.
    """
    audio, sr = librosa.load(file_path)
    result = np.array([])

    # MFCC
    mfccs = np.mean(librosa.feature.mfcc(y=audio, sr=sr).T, axis=0)
    result = np.hstack((result, mfccs))

    # Chroma
    stft = np.abs(librosa.stft(audio))
    chroma = np.mean(librosa.feature.chroma_stft(S=stft, sr=sr).T, axis=0)
    result = np.hstack((result, chroma))

    # Mel-scaled spectrogram
    mel = np.mean(librosa.feature.melspectrogram(y=audio, sr=sr).T, axis=0)
    result = np.hstack((result, mel))

    return result


def process_data():
    """Read Csv with fileanme and generate spectrogram for each sample

    Returns:
        DataFrame: dataframe with all data
    """
    # data_csv_path = sys.argv[1]
    data_csv_path = "RAW.csv"
    df = pd.read_csv(data_csv_path)

    # Initialize a list to store the results
    spectrograms = []

    with ThreadPoolExecutor() as executor:
        futures = [
            executor.submit(extract_features, row["filename"])
            for _, row in df.iterrows()
        ]

        for future in as_completed(futures):
            try:
                spectrogram = future.result()
                spectrograms.append(spectrogram)
            except Exception as exc:
                print(f"Generated an exception: {exc}")

    # Convert the list of spectrograms into a DataFrame
    spectrogram_df = pd.DataFrame(spectrograms)

    # Concatenate the original DataFrame with the new DataFrame containing spectrograms
    df = pd.concat([df, spectrogram_df], axis=1)

    return df






In [3]:
df = process_data()
df.head()

Unnamed: 0,siteId,filename,species,0,1,2,3,4,5,6,...,150,151,152,153,154,155,156,157,158,159
0,1,A:\Documents\Capstone\Dataset\Extracted\Site01...,"E. coqui - co, E. coqui - qui, E. wightmanae",-239.740738,51.801659,-68.505341,48.076183,-0.365455,43.309589,-31.691626,...,1.2e-05,6e-06,4e-06,4e-06,5e-06,4e-06,5e-06,7e-06,3e-06,1.894625e-07
1,1,A:\Documents\Capstone\Dataset\Extracted\Site01...,"E. coqui - co, E. coqui - qui, E. wightmanae",-242.454758,46.531017,-76.802185,41.409466,12.214672,43.399223,-39.117214,...,1.4e-05,7e-06,5e-06,5e-06,5e-06,5e-06,5e-06,6e-06,3e-06,2.409685e-07
2,1,A:\Documents\Capstone\Dataset\Extracted\Site01...,"E. coqui - co, E. coqui - qui, E. wightmanae",-219.443344,46.16124,-95.875694,47.622898,17.900232,44.626976,-33.330956,...,3.3e-05,3.1e-05,2.7e-05,1.4e-05,1.3e-05,1.6e-05,1.9e-05,2e-05,9e-06,4.669851e-07
3,1,A:\Documents\Capstone\Dataset\Extracted\Site01...,"E. coqui - co, E. coqui - qui, E. wightmanae",-235.995483,53.128262,-90.414513,38.268307,22.369974,39.09473,-35.36005,...,2.7e-05,1.8e-05,1.5e-05,1e-05,8e-06,9e-06,1.1e-05,1.1e-05,6e-06,8.626405e-07
4,1,A:\Documents\Capstone\Dataset\Extracted\Site01...,"E. coqui - co, E. coqui - qui, E. wightmanae",-215.102142,45.698521,-99.809929,36.31823,31.34408,37.819267,-33.294186,...,9.8e-05,5.1e-05,3.6e-05,3.8e-05,4.2e-05,4.2e-05,5.9e-05,4.8e-05,2.5e-05,1.352183e-06


In [4]:
x = df.drop(
    columns=["filename", "species"]
)  # Adjust this to include only feature columns
# Convert all column names to strings
x.columns = x.columns.astype(str)

x

Unnamed: 0,siteId,0,1,2,3,4,5,6,7,8,...,150,151,152,153,154,155,156,157,158,159
0,1,-239.740738,51.801659,-68.505341,48.076183,-0.365455,43.309589,-31.691626,19.327181,-4.303411,...,1.179595e-05,5.730149e-06,4.331661e-06,4.032777e-06,4.504478e-06,4.489876e-06,5.217792e-06,7.434868e-06,3.252247e-06,1.894625e-07
1,1,-242.454758,46.531017,-76.802185,41.409466,12.214672,43.399223,-39.117214,24.669601,-13.220637,...,1.378657e-05,6.668717e-06,4.991001e-06,4.670110e-06,4.714180e-06,4.585213e-06,5.118298e-06,5.801096e-06,2.555159e-06,2.409685e-07
2,1,-219.443344,46.161240,-95.875694,47.622898,17.900232,44.626976,-33.330956,15.888066,-10.194343,...,3.258029e-05,3.053538e-05,2.730950e-05,1.411799e-05,1.337321e-05,1.553343e-05,1.854990e-05,1.981999e-05,8.528460e-06,4.669851e-07
3,1,-235.995483,53.128262,-90.414513,38.268307,22.369974,39.094730,-35.360050,21.712530,-15.581470,...,2.700963e-05,1.828149e-05,1.463376e-05,9.784726e-06,8.377148e-06,8.812436e-06,1.082565e-05,1.134505e-05,6.472015e-06,8.626405e-07
4,1,-215.102142,45.698521,-99.809929,36.318230,31.344080,37.819267,-33.294186,21.369223,-14.718131,...,9.811153e-05,5.148281e-05,3.647849e-05,3.826631e-05,4.219232e-05,4.168539e-05,5.900799e-05,4.766534e-05,2.524724e-05,1.352183e-06
...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...
1545,14,-393.008453,101.178017,-79.029175,28.293989,28.579849,39.117920,-4.398160,-8.549830,25.789724,...,9.911903e-07,7.633814e-07,6.386384e-07,6.014852e-07,6.094005e-07,5.244206e-07,4.654867e-07,4.065945e-07,2.103827e-07,2.030373e-08
1546,14,-357.060486,86.556892,-102.962212,53.054485,1.244374,41.770462,-6.510597,-18.457352,22.713778,...,1.137464e-06,8.839895e-07,7.812621e-07,8.371287e-07,1.265145e-06,1.530953e-06,1.277714e-06,6.588891e-07,2.814223e-07,6.534739e-08
1547,14,-377.759125,101.542320,-99.980896,45.115143,22.382904,23.369156,-5.359596,-17.448772,13.056074,...,3.981077e-06,1.693332e-06,9.041005e-07,7.401843e-07,6.317388e-07,5.661995e-07,4.605900e-07,3.382758e-07,1.616416e-07,1.892943e-08
1548,14,-382.037750,88.057480,-89.982834,44.273216,13.101156,44.574432,-5.251865,-14.950739,26.366344,...,9.574214e-07,7.652221e-07,6.425955e-07,6.224543e-07,7.346852e-07,9.050754e-07,7.329835e-07,5.039424e-07,2.141876e-07,1.818880e-08


In [5]:
y = df["species"]

# Encode the target labels as integers
label_encoder = LabelEncoder()
y = label_encoder.fit_transform(y)
y



array([10, 10, 10, ...,  9,  9,  9])

In [24]:
x_train, x_test, y_train, y_test = train_test_split(x, y, test_size=0.2)

classifier = RandomForestClassifier(n_estimators=600, max_depth=18, min_samples_leaf=3)

classifier.fit(x_train, y_train)

y_pred = classifier.predict_proba(
    x_test,
)

accuracy = roc_auc_score(y_test, y_pred, multi_class="ovr")
print("Accuracy :", accuracy)




Accuracy : 0.9962154033143414
