# Generate animal YAML files

This notebook shows which animals in `metadata/animals_entries.xlsx` already have YAML files and provides a helper to (re)generate YAML metadata for selected animals. Any failures during generation are reported per animal without stopping the batch.


In [None]:
from __future__ import annotations

import shutil
import tempfile
from pathlib import Path
from typing import Iterable

import pandas as pd

from social_imaging_scripts.metadata import build_metadata_from_excel

# Determine repository root even when this notebook is executed from exampleNotebooks/
REPO_ROOT = Path.cwd().resolve()
if not (REPO_ROOT / 'metadata').exists():
    REPO_ROOT = REPO_ROOT.parent
XLSX_PATH = REPO_ROOT / 'metadata/animals_entries.xlsx'
YAML_DIR = REPO_ROOT / 'metadata/animals'
YAML_DIR.mkdir(parents=True, exist_ok=True)


In [None]:
REPO_ROOT

In [None]:

# Inspect which animals already have YAML metadata
animals_df = pd.read_excel(XLSX_PATH, sheet_name='Animals')
animals_df['animal_id'] = animals_df['animal_id'].astype(str).str.strip()
existing_yaml = {p.stem for p in YAML_DIR.glob('*.yaml')}
status_df = animals_df[['animal_id']].copy()
status_df['yaml_exists'] = status_df['animal_id'].isin(existing_yaml)
status_df = status_df.sort_values('animal_id').reset_index(drop=True)
status_df


In [None]:

def build_yaml_for_animals(animal_ids: Iterable[str]) -> tuple[list[str], dict[str, str]]:
    '''Generate YAML metadata for the requested animals.

    Returns a tuple ``(successes, failures)`` where *successes* is a list of
    animal IDs that were written successfully and *failures* maps the animal ID
    to the exception message raised during generation.
    '''

    animals_df = pd.read_excel(XLSX_PATH, sheet_name='Animals')
    sessions_df = pd.read_excel(XLSX_PATH, sheet_name='Sessions')
    settings_df = pd.read_excel(XLSX_PATH, sheet_name='2p_settings')

    animals_df['animal_id'] = animals_df['animal_id'].astype(str).str.strip()
    sessions_df['animal_id'] = sessions_df['animal_id'].astype(str).str.strip()
    sessions_df['stack_id'] = sessions_df['stack_id'].astype(str).str.strip()
    settings_df['session_id'] = settings_df['session_id'].astype(str).str.strip()

    successes: list[str] = []
    failures: dict[str, str] = {}

    for raw_id in animal_ids:
        animal_id = str(raw_id).strip()
        if not animal_id:
            continue

        mask = animals_df['animal_id'] == animal_id
        if not mask.any():
            failures[animal_id] = 'animal_id not found in workbook'
            continue

        tmp_dir = Path(tempfile.mkdtemp(prefix=f'animal_{animal_id}_'))
        tmp_xlsx = tmp_dir / 'animal_subset.xlsx'
        try:
            sessions_subset = sessions_df[sessions_df['animal_id'] == animal_id].copy()
            tp_session_ids = sessions_subset['stack_id'].astype(str).str.strip()
            settings_subset = settings_df[settings_df['session_id'].isin(tp_session_ids)].copy()

            with pd.ExcelWriter(tmp_xlsx, engine='openpyxl') as writer:
                animals_df[mask].to_excel(writer, sheet_name='Animals', index=False)
                sessions_subset.to_excel(writer, sheet_name='Sessions', index=False)
                settings_subset.to_excel(writer, sheet_name='two_photon_settings', index=False)

            out_path = YAML_DIR / f'{animal_id}.yaml'
            if out_path.exists():
                out_path.unlink()

            build_metadata_from_excel(tmp_xlsx, YAML_DIR)
            successes.append(animal_id)
        except Exception as exc:
            failures[animal_id] = str(exc)
        finally:
            shutil.rmtree(tmp_dir, ignore_errors=True)

    return successes, failures


In [None]:

# Specify the animals you want to (re)generate YAML files for.
# Update this list as needed.
target_animals = [
    'L395_f10',
    'L395_f11',
    'L331_f01',
    'L395_f06',
]

successes, failures = build_yaml_for_animals(target_animals)
print('YAML generation complete.')
print('  successes:', successes)
if failures:
    print('  failures:')
    for aid, msg in failures.items():
        print(f'    {aid}: {msg}')
else:
    print('  failures: none')


In [None]:

# Refresh the status table after generation
existing_yaml = {p.stem for p in YAML_DIR.glob('*.yaml')}
status_df['yaml_exists'] = status_df['animal_id'].isin(existing_yaml)
status_df.sort_values('animal_id').reset_index(drop=True)
