## Updated preprocessing pipeline that uses spectogram data over image

Previously, we have trained the machine learning model using (please make a link to 1-1, 1-2, 1-3 notebooks, and display each link text as 1-1[link for 1-1]) where we take the audio file, split it into the windows that are labelleed for each clas, turn into spectogram, plot with matplot lib, to turn into image, then save the image to a file, and load the image as a numpy array, this is how we trained the model on with that data. 
This notebook aims to change several of those preproceessing step by training the model on the specogram data befrore its passed to matplotlib to be turned into an image.
Two key benefits:
1. Much faster preprocessing due to vectoriseation of the windows
2. Better training accuracy as we are prociign the raw sata to the ml model.

In [None]:
import yaml
import os

from src.anemonefish_acoustics.data import postprocess_prediction
from src.anemonefish_acoustics.utils.logger import get_logger

logging = get_logger(__name__)


## Load config

In [None]:
# --- Load Configuration from YAML ---

# !!! UPDATE THIS PATH TO YOUR CONFIG FILE !!!
CONFIG_PATH = '/Volumes/InsightML/NAS/3_Lucia_Yllan/Clown_Fish_Acoustics/data/2_training_datasets/preprocessing_config_template.yaml'

# Load configuration
logging.info(f"Loading configuration from: {CONFIG_PATH}")
with open(CONFIG_PATH, 'r') as f:
    config = yaml.safe_load(f)

# Extract configuration values
WORKSPACE_BASE_PATH = config['workspace_base_path']
DATASET_VERSION = config['dataset_version']
RAW_DATA_SITE = config['raw_data_site']
ANNOTATION_VERSION = config['annotation_version']
CLASSES = config['classes']

# Construct paths based on new directory structure
INPUT_AUDIO_DIR = os.path.join(WORKSPACE_BASE_PATH, 'data', '1_raw', RAW_DATA_SITE, 'audio')
INPUT_ANNOTATIONS_DIR = os.path.join(WORKSPACE_BASE_PATH, 'data', '1_raw', RAW_DATA_SITE, ANNOTATION_VERSION)
OUTPUT_AUDIO_FILES_DIR = os.path.join(WORKSPACE_BASE_PATH, 'data', '_cache', '1_generate_training_audio')

# Audio processing parameters
WINDOW_SIZE_SECONDS = config['audio_processing']['window_size_seconds']
SLIDE_SECONDS = config['audio_processing']['slide_seconds']
MIN_SEGMENT_DURATION_SECONDS = config['audio_processing']['min_segment_duration_seconds']

# Noise padding parameters
NOISE_PADDING_RATIO = config['noise_padding']['padding_ratio']
MIN_NOISE_DURATION_FOR_SHORTENING = config['noise_padding']['min_duration_seconds']
MAX_NOISE_DURATION_FOR_SHORTENING = config['noise_padding']['max_duration_seconds']

# Create output directories for each class
OUTPUT_CLASS_DIRS = {}
for class_name in CLASSES:
    class_dir = os.path.join(OUTPUT_AUDIO_FILES_DIR, class_name)
    os.makedirs(class_dir, exist_ok=True)
    OUTPUT_CLASS_DIRS[class_name] = class_dir

# Log configuration
logging.info(f"=== Configuration Loaded ===")
logging.info(f"Dataset Version: {DATASET_VERSION}")
logging.info(f"Raw Data Site: {RAW_DATA_SITE}")
logging.info(f"Annotation Version: {ANNOTATION_VERSION}")
logging.info(f"Classes: {CLASSES}")
logging.info(f"Input Audio Directory: {INPUT_AUDIO_DIR}")
logging.info(f"Input Annotations Directory: {INPUT_ANNOTATIONS_DIR}")
logging.info(f"Output Base Directory: {OUTPUT_AUDIO_FILES_DIR}")
for class_name, class_dir in OUTPUT_CLASS_DIRS.items():
    logging.info(f"  - {class_name}: {class_dir}")
logging.info(f"Audio Window Size: {WINDOW_SIZE_SECONDS}s")
logging.info(f"Sliding Window Hop: {SLIDE_SECONDS}s")
logging.info(f"Minimum Segment Duration: {MIN_SEGMENT_DURATION_SECONDS}s")
logging.info(f"Noise Padding Ratio: {NOISE_PADDING_RATIO} ({int(NOISE_PADDING_RATIO*100)}%)")

# Validate input directories exist
if not os.path.isdir(INPUT_AUDIO_DIR):
    logging.critical(f"Input audio directory not found: {INPUT_AUDIO_DIR}")
    logging.critical("Please check your configuration file.")
if not os.path.isdir(INPUT_ANNOTATIONS_DIR):
    logging.critical(f"Input annotations directory not found: {INPUT_ANNOTATIONS_DIR}")
    logging.critical("Please check your configuration file.")

# Pipeline
1. Load audio file and classes data
2. Split audio file into classes and noise
3. 