In [None]:
# 1. Install Dependencies
!pip install optuna librosa pyyaml pandas matplotlib torchaudio

In [None]:
# 2. Clone Repository
!git clone https://github.com/Quarkisinproton/IndianBatsModel.git
%cd IndianBatsModel

In [None]:
# 3. Patch Codebase (Fixes & Features)

# A. Fix Syntax Error in whombat_project_to_wombat.py
file_path = 'MainShitz/data_prep/whombat_project_to_wombat.py'
try:
    with open(file_path, 'r') as f:
        content = f.read()
    # Fix missing colon if present
    bad_syntax = "if not ann_list continue"
    good_syntax = "if not ann_list: continue"
    if bad_syntax in content:
        content = content.replace(bad_syntax, good_syntax)
        with open(file_path, 'w') as f:
            f.write(content)
        print("Fixed syntax error in whombat_project_to_wombat.py")
except FileNotFoundError:
    print(f"Warning: {file_path} not found.")

# B. Patch train.py to report Final Validation Loss
train_script_path = 'MainShitz/train.py'
try:
    with open(train_script_path, 'r') as f:
        content = f.read()

    if "FINAL_VAL_LOSS" not in content:
        target_str = "print(f\"Training curves saved to {plot_path}\")"
        new_code = """
    print(f"Training curves saved to {plot_path}")

    # Report final validation loss for hyperparameter tuning
    if val_losses:
        print(f"FINAL_VAL_LOSS: {val_losses[-1]}")
"""
        if target_str in content:
            content = content.replace(target_str, new_code)
            with open(train_script_path, 'w') as f:
                f.write(content)
            print("Successfully patched train.py")
        else:
            print("WARNING: Could not find target string to patch train.py.")
    else:
        print("train.py already contains FINAL_VAL_LOSS reporting.")
except FileNotFoundError:
    print(f"Warning: {train_script_path} not found.")

In [None]:
# 4. Create smart_tuner.py
tuner_code = """
import optuna
import yaml
import os
import subprocess
import sys

def objective(trial):
    # 1. Suggest Hyperparameters
    learning_rate = trial.suggest_float("learning_rate", 1e-5, 1e-1, log=True)
    batch_size = trial.suggest_categorical("batch_size", [8, 16, 32])
    weight_decay = trial.suggest_float("weight_decay", 1e-6, 1e-3, log=True)
    
    print(f"\\n--- Trial {trial.number} ---")
    print(f"Params: lr={learning_rate}, bs={batch_size}, wd={weight_decay}")

    # 2. Load Base Config
    base_config_path = 'configs/config.yaml'
    if not os.path.exists(base_config_path):
        raise FileNotFoundError(f"Config file not found: {base_config_path}")
        
    with open(base_config_path, 'r') as f:
        config = yaml.safe_load(f)
    
    if 'train' not in config:
        config['train'] = {}
        
    config['train']['learning_rate'] = learning_rate
    config['train']['batch_size'] = batch_size
    config['train']['weight_decay'] = weight_decay
    
    model_save_path = os.path.join('models', f'trial_{trial.number}.pth')
    config['train']['model_save_path'] = model_save_path
    
    temp_config_path = f'temp_config_{trial.number}.yaml'
    with open(temp_config_path, 'w') as f:
        yaml.dump(config, f)
        
    # 3. Run Training
    cmd = [sys.executable, "-m", "MainShitz.train", "--config", temp_config_path]
    
    try:
        result = subprocess.run(cmd, capture_output=True, text=True, check=True)
        output = result.stdout
        
        final_val_loss = None
        for line in output.splitlines():
            if "FINAL_VAL_LOSS:" in line:
                try:
                    final_val_loss = float(line.split("FINAL_VAL_LOSS:")[1].strip())
                except ValueError:
                    pass
        
        if final_val_loss is None:
            print("Warning: Could not find FINAL_VAL_LOSS in output.")
            return 999.0
            
        return final_val_loss

    except subprocess.CalledProcessError as e:
        print(f"Training failed for trial {trial.number}")
        print("Error:", e.stderr)
        return 999.0
        
    finally:
        if os.path.exists(temp_config_path):
            os.remove(temp_config_path)

if __name__ == "__main__":
    study = optuna.create_study(direction="minimize")
    print("Starting Hyperparameter Optimization...")
    study.optimize(objective, n_trials=20)
    
    print("\\n" + "="*40)
    print("Optimization Complete")
    print("="*40)
    print("Best Hyperparameters:")
    for key, value in study.best_params.items():
        print(f"  {key}: {value}")
    print(f"Best Validation Loss: {study.best_value}")
    print("="*40)
"""

with open('smart_tuner.py', 'w') as f:
    f.write(tuner_code)
print("Created smart_tuner.py")

In [None]:
import os
import json
import glob
import librosa
import sys

# Ensure we are in the right directory for imports
if os.getcwd().split('/')[-1] != 'IndianBatsModel':
    if os.path.exists('IndianBatsModel'):
        os.chdir('IndianBatsModel')
    sys.path.append('.')

from MainShitz.data_prep.wombat_to_spectrograms import process_all as generate_spectrograms
from MainShitz.data_prep.whombat_project_to_wombat import convert_whombat_project_to_wombat_jsons

# --- CONFIGURATION ---

# 1. Input Paths (Adjust these to match your Kaggle Dataset structure)
#    These are the folders containing your .wav files
RAW_AUDIO_DIRS = [
    '/kaggle/input/annotations-tenuis-ceylonicus/Pip ceylonicus',
    '/kaggle/input/annotations-tenuis-ceylonicus/Pip._tenuis'
]

#    These are the JSON exports from Whombat
WHOMBAT_PROJECT_JSONS = [
    '/kaggle/input/annotations-tenuis-ceylonicus/tenuis annotations.json',
    '/kaggle/input/annotations-tenuis-ceylonicus/Pip ceylonicus.json',
]

#    Path to your noise data (UPDATE THIS if your folder name is different)
NOISE_AUDIO_DIR = '/kaggle/input/noise-data' 

# 2. Output Paths (In the writable /kaggle/working directory)
JSON_DIR = '/kaggle/working/data/annotations_json_folder'
SPECT_OUT = '/kaggle/working/data/processed/spectrograms'

os.makedirs(JSON_DIR, exist_ok=True)
os.makedirs(SPECT_OUT, exist_ok=True)

# --- EXECUTION ---

# 1. Convert Bat Annotations
print("Converting Bat Annotations...")
for pj in WHOMBAT_PROJECT_JSONS:
    # Fix: Only pass 2 arguments (Input File, Output Dir)
    convert_whombat_project_to_wombat_jsons(pj, JSON_DIR)

# 2. Generate Noise Annotations
print("Generating Noise Annotations...")
noise_files = glob.glob(os.path.join(NOISE_AUDIO_DIR, "*.wav"))
print(f"Found {len(noise_files)} noise files.")

noise_annotations = []
for nf in noise_files:
    try:
        # Handle different librosa versions for duration check
        try:
            dur = librosa.get_duration(path=nf)
        except TypeError:
            dur = librosa.get_duration(filename=nf)
            
        # Create a simple annotation for the whole file
        ann = {
            "start": 0.0,
            "end": dur,
            "label": "Noise",
            "filename": os.path.basename(nf)
        }
        noise_annotations.append(ann)
    except Exception as e:
        print(f"Error reading {nf}: {e}")

noise_json_path = os.path.join(JSON_DIR, "noise_annotations.json")
with open(noise_json_path, 'w') as f:
    json.dump(noise_annotations, f, indent=4)

# 3. Generate Spectrograms
print("Generating Spectrograms...")

# Combine bat audio dirs and noise audio dir into one list for the processor
ALL_AUDIO_DIRS = RAW_AUDIO
import os
import json
import glob
import librosa
import sys

# Ensure we are in the right directory for imports
if os.getcwd().split('/')[-1] != 'IndianBatsModel':
    if os.path.exists('IndianBatsModel'):
        os.chdir('IndianBatsModel')
    sys.path.append('.')

from MainShitz.data_prep.wombat_to_spectrograms import process_all as generate_spectrograms
from MainShitz.data_prep.whombat_project_to_wombat import convert_whombat_project_to_wombat_jsons

# --- CONFIGURATION ---

# 1. Input Paths (Adjust these to match your Kaggle Dataset structure)
#    These are the folders containing your .wav files
RAW_AUDIO_DIRS = [
    '/kaggle/input/annotations-tenuis-ceylonicus/Pip ceylonicus',
    '/kaggle/input/annotations-tenuis-ceylonicus/Pip._tenuis'
]

#    These are the JSON exports from Whombat
WHOMBAT_PROJECT_JSONS = [
    '/kaggle/input/annotations-tenuis-ceylonicus/tenuis annotations.json',
    '/kaggle/input/annotations-tenuis-ceylonicus/Pip ceylonicus.json',
]

#    Path to your noise data (UPDATE THIS if your folder name is different)
NOISE_AUDIO_DIR = '/kaggle/input/noice-files/Noise' 

# 2. Output Paths (In the writable /kaggle/working directory)
JSON_DIR = '/kaggle/working/data/annotations_json_folder'
SPECT_OUT = '/kaggle/working/data/processed/spectrograms'

os.makedirs(JSON_DIR, exist_ok=True)
os.makedirs(SPECT_OUT, exist_ok=True)

# --- EXECUTION ---

# 1. Convert Bat Annotations
print("Converting Bat Annotations...")
for pj in WHOMBAT_PROJECT_JSONS:
    # Fix: Only pass 2 arguments (Input File, Output Dir)
    convert_whombat_project_to_wombat_jsons(pj, JSON_DIR)

# 2. Generate Noise Annotations
print("Generating Noise Annotations...")
noise_files = glob.glob(os.path.join(NOISE_AUDIO_DIR, "*.wav"))
print(f"Found {len(noise_files)} noise files.")

noise_annotations = []
for nf in noise_files:
    try:
        # Handle different librosa versions for duration check
        try:
            dur = librosa.get_duration(path=nf)
        except TypeError:
            dur = librosa.get_duration(filename=nf)
            
        # Create a simple annotation for the whole file
        ann = {
            "start": 0.0,
            "end": dur,
            "label": "Noise",
            "filename": os.path.basename(nf)
        }
        noise_annotations.append(ann)
    except Exception as e:
        print(f"Error reading {nf}: {e}")

noise_json_path = os.path.join(JSON_DIR, "noise_annotations.json")
with open(noise_json_path, 'w') as f:
    json.dump(noise_annotations, f, indent=4)

# 3. Generate Spectrograms
print("Generating Spectrograms...")

# Combine bat audio dirs and noise audio dir into one list for the processor
ALL_AUDIO_DIRS = RAW_AUDIO_DIRS + [NOISE_AUDIO_DIR]

In [None]:
# 6. Update Config and Run Tuner
import yaml

config_path = 'configs/config.yaml'
with open(config_path, 'r') as f:
    config = yaml.safe_load(f)

# Update data path
config['data']['processed_data_path'] = SPECT_OUT
config['data']['train_spectrograms'] = SPECT_OUT

# Set epochs for tuning (e.g., 5 epochs per trial)
config['train']['epochs'] = 5

with open(config_path, 'w') as f:
    yaml.dump(config, f)

print("Config updated. Starting Tuner...")

!python smart_tuner.py