In [12]:
import os
import librosa
import numpy as np
import pandas as pd

In [13]:
# === CONFIGURATION ===
audio_folder = r"C:\Users\leona\Documents\Thesis_Project_UACH\Temp\Dataset\BeesAnna\sound_files"  # Folder with .wav files
output_folder = "mfcc_npy_files/"  # Where to save .npy files
csv_path = r"Datasets\BeeHive_audio_updated.csv"
sample_rate = 22050
n_mfcc = 13

In [14]:
# Create output folder if it doesn't exist
os.makedirs(output_folder, exist_ok=True)

In [15]:
# Load label info
labels_df = pd.read_csv(csv_path)
labels_df["file name"] = labels_df["file name"].str.strip()
labels_df.set_index("file name", inplace=True)

In [16]:
# Save label linkage here
metadata = []

In [17]:
# Check if audio_folder exists before processing
if not os.path.exists(audio_folder):
    print(f"Error: The folder '{audio_folder}' does not exist. Please check the path.")
else:
    # Process each audio file
    for filename in os.listdir(audio_folder):
        if filename.endswith(".wav"):
            try:
                file_path = os.path.join(audio_folder, filename)
                y, sr = librosa.load(file_path, sr=sample_rate)
                mfcc = librosa.feature.mfcc(y=y, sr=sr, n_mfcc=n_mfcc)
                mfcc = mfcc.T  # Shape: [frames, 13]

                # Save MFCC matrix
                npy_name = filename.replace(".wav", "_mfcc.npy")
                np.save(os.path.join(output_folder, npy_name), mfcc)

                # Link to label using raw filename
                raw_key = filename.split("__")[0] + ".raw"
                label = labels_df.loc[raw_key]["queen status"] if raw_key in labels_df.index else None

                metadata.append({"file_name": filename, "mfcc_file": npy_name, "queen_status": label})

            except Exception as e:
                print(f"Error processing {filename}: {e}")

In [18]:
# Save metadata file
pd.DataFrame(metadata).to_csv("mfcc_metadata.csv", index=False)
print(f"Done! Processed {len(metadata)} files. MFCCs saved to '{output_folder}'")

Done! Processed 7100 files. MFCCs saved to 'mfcc_npy_files/'
