# GSM8K GA Orchestrator
End-to-end experiment runner and live monitor (population=50, 30 generations).

In [None]:
# Bootstrap config and environment
from src.utils.config import load_config
cfg = load_config()
print(f'Using provider={cfg.model_provider}, model={cfg.model_name}, temp={cfg.temperature}, max_tokens={cfg.max_tokens}')
print('Paths:', cfg.paths)
print('Population size (config):', cfg.raw['population']['population_size'], 'Max generations:', cfg.raw['population']['max_generations'])
print('Concurrency limit:', cfg.raw['evaluation']['concurrency_limit'])

## Data: Ensure GSM8K subsets exist

In [None]:
import os
required = ['data/gsm8k_primary_eval.jsonl','data/gsm8k_validation.jsonl','data/gsm8k_final_test.jsonl']
for p in required:
    print(f'Found {p}:', os.path.exists(p))

## Seeds: curated 50 prompts preview

In [None]:
from src.genetics.seeds import SEED_PROMPTS
print('Curated seeds:', len(SEED_PROMPTS))
for i, s in enumerate(SEED_PROMPTS[:5]):
    print(f'{i+1}.', s[:120])

## Run full 30-generation experiment (population=50) with live progress

In [None]:
import threading, time, json
import matplotlib.pyplot as plt
from IPython.display import clear_output, display
from src.genetics.controller import evolve
metrics_path = cfg.paths.get('logs','data/results/logs') + '/metrics.jsonl'
# Start evolution in background thread
def _run():
    evolve()
t = threading.Thread(target=_run)
t.start()
# Live monitor loop
def read_metrics(path):
    gens, bests, avgs, divs, hits, calls = [], [], [], [], [], []
    best_text = None
    if not os.path.exists(path):
        return gens, bests, avgs, divs, hits, calls, best_text
    with open(path, 'r') as f:
        for line in f:
            r = json.loads(line)
            gens.append(r.get('generation'))
            bests.append(r.get('best_fitness'))
            avgs.append(r.get('avg_fitness'))
            divs.append(r.get('diversity'))
            hits.append(r.get('cache_hit_rate', 0.0))
            calls.append(r.get('api_calls', 0))
            bt = r.get('best_text')
            if bt: best_text = bt
    return gens, bests, avgs, divs, hits, calls, best_text
while t.is_alive():
    clear_output(wait=True)
    print('Running evolution...')
    gens, bests, avgs, divs, hits, calls, best_text = read_metrics(metrics_path)
    if gens:
        fig, axs = plt.subplots(2, 3, figsize=(14,8))
        axs[0,0].plot(gens, bests); axs[0,0].set_title('Best Fitness');
        axs[0,1].plot(gens, avgs); axs[0,1].set_title('Average Fitness');
        axs[0,2].plot(gens, divs); axs[0,2].set_title('Diversity');
        axs[1,0].plot(gens, hits); axs[1,0].set_title('Cache Hit Rate');
        axs[1,1].plot(gens, calls); axs[1,1].set_title('API Calls');
        axs[1,2].axis('off');
        plt.tight_layout(); display(fig); plt.close(fig)
        if best_text:
            print('
Current best prompt preview:
', best_text[:400])
    else:
        print('Waiting for metrics...')
    time.sleep(5)
# Final draw
clear_output(wait=True)
gens, bests, avgs, divs, hits, calls, best_text = read_metrics(metrics_path)
print('Evolution finished. Generations logged:', len(gens))
if gens:
    fig, axs = plt.subplots(2, 3, figsize=(14,8))
    axs[0,0].plot(gens, bests); axs[0,0].set_title('Best Fitness');
    axs[0,1].plot(gens, avgs); axs[0,1].set_title('Average Fitness');
    axs[0,2].plot(gens, divs); axs[0,2].set_title('Diversity');
    axs[1,0].plot(gens, hits); axs[1,0].set_title('Cache Hit Rate');
    axs[1,1].plot(gens, calls); axs[1,1].set_title('API Calls');
    axs[1,2].axis('off'); plt.tight_layout(); display(fig); plt.close(fig)
    if best_text:
        print('
Final best prompt (truncated):
', best_text[:800])


## Save final results

In [None]:
# Copy best prompt text to results directory if present in metrics
import os, json, shutil
metrics_path = cfg.paths.get('logs','data/results/logs') + '/metrics.jsonl'
best_text = None
if os.path.exists(metrics_path):
    with open(metrics_path, 'r') as f:
        for line in f:
            r = json.loads(line); bt = r.get('best_text');
            if bt: best_text = bt
if best_text:
    outp = os.path.join(cfg.paths.get('results','data/results'), 'best_prompt_final.txt')
    with open(outp, 'w') as f: f.write(best_text)
    print('Saved final best prompt to', outp)
else:
    print('No best_text found in metrics.')
