# Project Echo - Experiment Benchmarking Framework

This notebook provides an interactive interface to the benchmarking framework. It allows you to run various experiments with different model architectures and augmentation strategies, and compare their performance.

## Overview

The benchmarking framework is designed to systematically evaluate different combinations of:
- Model architectures (EfficientNet, MobileNet, ResNet, etc.)
- Audio augmentation strategies
- Image/spectrogram augmentation strategies

Results are collected and visualized to help identify the best performing configurations for bird sound classification.

## 0.1 Install Required Libraries
The following cell is to install required libraries if you are running this notebook remotely, such as on an instance from Vast.ai or Google Colab.
Ensure you have a clean Python 3.9+ kernel to start.
Details on how to set this up are contained within the readme.

In [None]:
from ipywidgets import IntSlider
from IPython.display import display
slider = IntSlider()
display(slider)

## 1. Import Required Libraries

In [None]:
# --- CONFIGURATION FOR DEVCONTAINER ---
import os
import sys

# When running in devcontainer, the workspace is mounted to /workspace
if os.path.exists('/workspace') and os.path.exists('/workspace/config'):
    # Running in devcontainer with proper mount
    base_path = "/workspace"
    print("✓ Running in devcontainer environment")
elif os.path.exists('./config'):
    # Running locally
    base_path = os.getcwd()
    print("✓ Running in local environment")
else:
    # Fallback - try to find config directory
    current_dir = os.getcwd()
    parent_dir = os.path.dirname(current_dir)
    if os.path.exists(os.path.join(current_dir, 'config')):
        base_path = current_dir
    elif os.path.exists(os.path.join(parent_dir, 'config')):
        base_path = parent_dir
    else:
        print("❌ ERROR: Cannot find config directory")
        print(f"Current directory: {current_dir}")
        print(f"Available files/dirs: {os.listdir(current_dir)}")
        base_path = current_dir

print(f"Using base path: {base_path}")

# Add to Python path
if base_path not in sys.path:
    sys.path.insert(0, base_path)

# Verify required directories exist
required_dirs = ['config', 'utils']
for dir_name in required_dirs:
    dir_path = os.path.join(base_path, dir_name)
    exists = os.path.exists(dir_path)
    print(f"{dir_name} directory: {'✓' if exists else '❌'} {dir_path}")

# --- END CONFIGURATION ---

In [None]:
# Import necessary libraries
import os
import sys
import tensorflow as tf
import matplotlib.pyplot as plt
from ipywidgets import widgets
from IPython.display import display, HTML, clear_output

# When running in devcontainer, use the mounted workspace path
if os.path.exists('/workspace'):
    actual_module_path_inside_container = "/workspace"
    print("Running in devcontainer environment")
else:
    actual_module_path_inside_container = os.getcwd()
    print("Running in local environment")

print(f"Using module path: {actual_module_path_inside_container}")
if actual_module_path_inside_container not in sys.path:
    sys.path.insert(0, actual_module_path_inside_container)

# Verify required directories
config_path = os.path.join(actual_module_path_inside_container, 'config')
utils_path = os.path.join(actual_module_path_inside_container, 'utils')
print(f"Config directory exists: {os.path.exists(config_path)}")
print(f"Utils directory exists: {os.path.exists(utils_path)}")

from config.experiment_configs import EXPERIMENTS

## 2. Configuration

Set up the directories and options for benchmarking.
Ensure to update these in the `system_config.py` file in the `config` folder.

In [None]:
# Import directories from system_config
from config.system_config import SC

# Get directory paths from system config
DATA_DIR = SC['AUDIO_DATA_DIRECTORY']
CACHE_DIR = SC['CACHE_DIRECTORY']
OUTPUT_DIR = SC['OUTPUT_DIRECTORY']

print("Using directories from system_config:")
print(f"Data Directory: {DATA_DIR}")
print(f"Cache Directory: {CACHE_DIR}")
print(f"Output Directory: {OUTPUT_DIR}")

# Create output directory if it doesn't exist
os.makedirs(OUTPUT_DIR, exist_ok=True)

# GPU info
print("Physical GPUs:", tf.config.list_physical_devices('GPU'))
print("Built with CUDA:", tf.test.is_built_with_cuda())
print("GPU name:", tf.test.gpu_device_name())

# Configure GPU memory if available
gpus = tf.config.experimental.list_physical_devices('GPU')
if gpus:
    for gpu in gpus:
        tf.config.experimental.set_memory_growth(gpu, True)
    print(f"GPU support enabled: {len(gpus)} GPU(s) found")
else:
    print("No GPU support found, running on CPU")

In [None]:
# Display available experiments
import pandas as pd

experiment_data = []
for exp in EXPERIMENTS:
    experiment_data.append({
        "name": exp["name"],
        "model": exp["model"],
        "audio_augmentation": exp["audio_augmentation"],
        "image_augmentation": exp["image_augmentation"],
        "epochs": exp["epochs"],
        "batch_size": exp["batch_size"]
    })

experiments_df = pd.DataFrame(experiment_data)
display(experiments_df)

## 3. Interactive Experiment Selection
Use the widgets below to select experiments and set directories.

In [None]:
# Create widgets for directory selection
data_dir_widget = widgets.Text(
    value=DATA_DIR,
    description='Data Directory:',
    style={'description_width': 'initial'},
    layout=widgets.Layout(width='80%')
)
cache_dir_widget = widgets.Text(
    value=CACHE_DIR,
    description='Cache Directory:',
    style={'description_width': 'initial'},
    layout=widgets.Layout(width='80%')
)
output_dir_widget = widgets.Text(
    value=OUTPUT_DIR,
    description='Output Directory:',
    style={'description_width': 'initial'},
    layout=widgets.Layout(width='80%')
)

# Group directory widgets
dir_widgets_box = widgets.VBox([data_dir_widget, cache_dir_widget, output_dir_widget])

# Create widget for experiment selection
experiment_options = [(exp["name"], exp["name"]) for exp in EXPERIMENTS]
experiment_widget = widgets.SelectMultiple(
    options=experiment_options,
    description='Select Experiments:',
    disabled=False,
    style={'description_width': 'initial'},
    layout=widgets.Layout(width='50%', height='200px')
)

# Buttons for actions
run_selected_button = widgets.Button(
    description='Run Selected Experiments',
    button_style='primary',
    tooltip='Run the selected experiments'
)
run_all_button = widgets.Button(
    description='Run All Experiments',
    tooltip='Run all experiments'
)
generate_report_button = widgets.Button(
    description='Generate Report Only',
    button_style='info',
    tooltip='Generate a report from existing results'
)

# Group buttons
buttons_box = widgets.HBox([run_selected_button, run_all_button, generate_report_button])

# Output area for logs
output_area = widgets.Output(layout={'border': '1px solid black', 'width': '90%', 'height': '300px'})

# Main container
controls_box = widgets.VBox([
    widgets.HTML("<h3>Directory Configuration:</h3>"),
    dir_widgets_box,
    widgets.HTML("<hr><h3>Experiment Selection:</h3>"),
    experiment_widget,
    widgets.HTML("<hr><h3>Actions:</h3>"),
    buttons_box
])

display(controls_box)
display(output_area)

## 4. Experiment Runner Functions

In [None]:
from utils.optimised_engine_pipeline import train_model
import importlib
import config.system_config  # Ensure module import

def run_selected_experiments(b):
    """Run experiments based on widget selections.
    Updates configuration in memory and ensures directories exist."""
    output_area.clear_output(wait=True)
    with output_area:
        print("Starting experiment run...")

        # Get new directory paths from widgets
        new_data_dir = data_dir_widget.value
        new_cache_dir = cache_dir_widget.value
        new_output_dir = output_dir_widget.value

        # Update config directly in memory
        from config.system_config import SC
        SC['AUDIO_DATA_DIRECTORY'] = new_data_dir
        SC['CACHE_DIRECTORY'] = new_cache_dir
        SC['OUTPUT_DIRECTORY'] = new_output_dir
        print("System configuration updated in memory.")
        print(f"  - Data Dir: {SC['AUDIO_DATA_DIRECTORY']}")
        print(f"  - Cache Dir: {SC['CACHE_DIRECTORY']}")
        print(f"  - Output Dir: {SC['OUTPUT_DIRECTORY']}")

        # Ensure directories exist
        for path in [new_data_dir, new_cache_dir, new_output_dir]:
            os.makedirs(path, exist_ok=True)

        selected_experiments = list(experiment_widget.value)
        if not selected_experiments:
            print("No experiment selected. Please select at least one experiment.")
            return

        for exp_name in selected_experiments:
            exp_config = next((exp for exp in EXPERIMENTS if exp["name"] == exp_name), None)
            if exp_config is None:
                print(f"Experiment {exp_name} not found in EXPERIMENTS.")
                continue

            print(f"\nRunning experiment: {exp_config['name']}")
            try:
                model, history = train_model(
                    model_name=exp_config['model'],
                    epochs=exp_config.get('epochs'),
                    batch_size=exp_config.get('batch_size')
                )
                print(f"✅ Training completed for experiment: {exp_config['name']}")
                if model:
                    model.summary(print_fn=lambda x: print(x))
            except Exception as e:
                import traceback
                print(f"❌ An error occurred during training for experiment {exp_config['name']}:")
                traceback.print_exc()
            print("-" * 40)

# Bind button
run_selected_button.on_click(run_selected_experiments)

## 5. View Previous Results (under development)

In [None]:
import pandas as pd
from pathlib import Path
import os

def load_results(output_dir=OUTPUT_DIR):
    if not os.path.exists(output_dir):
        print(f"Results directory does not exist: {output_dir}")
        return None
    csv_files = [f for f in os.listdir(output_dir) if f.startswith("comparison_results_") and f.endswith(".csv")]
    if not csv_files:
        print("No comparison results found. Run experiments or generate a report first.")
        return None
    latest_csv = max(csv_files)
    csv_path = os.path.join(output_dir, latest_csv)
    results_df = pd.read_csv(csv_path)
    print(f"Loaded results from: {csv_path}")
    return results_df

results_df = load_results()
if results_df is not None:
    display(results_df)

## 6. Visualise Results

In [None]:
import seaborn as sns

def visualize_results(results_df):
    if results_df is None or len(results_df) == 0:
        print("No results available to visualize.")
        return
    
    plt.figure(figsize=(14, 8))

    # Accuracy
    plt.subplot(2, 2, 1)
    sns.barplot(x='Experiment', y='Test Accuracy', data=results_df)
    plt.title('Test Accuracy by Experiment')
    plt.xticks(rotation=45, ha='right')
    plt.tight_layout()

    # F1
    plt.subplot(2, 2, 2)
    sns.barplot(x='Experiment', y='F1 Score', data=results_df)
    plt.title('F1 Score by Experiment')
    plt.xticks(rotation=45, ha='right')
    plt.tight_layout()

    # Training time
    plt.subplot(2, 2, 3)
    sns.barplot(x='Experiment', y='Training Time (min)', data=results_df)
    plt.title('Training Time by Experiment (minutes)')
    plt.xticks(rotation=45, ha='right')
    plt.tight_layout()

    # Average by Model
    plt.subplot(2, 2, 4)
    model_comparison = results_df.groupby('Model')['Test Accuracy'].mean().reset_index()
    sns.barplot(x='Model', y='Test Accuracy', data=model_comparison)
    plt.title('Average Accuracy by Model')
    plt.tight_layout()

    plt.tight_layout(pad=3.0)
    plt.show()

if results_df is not None:
    visualize_results(results_df)