In [None]:
import json
from pathlib import Path
from typing import Dict, List
from tusoai import tusoai


In [None]:
openrouter=False #Whether or the LLM is using openrouter
api_key = ''
semantic_scholar_api_key = ''

# ==========================================================
# Configuration for TusoAI
# ==========================================================

# LLM settings
LLM_MODEL       = "gpt-4o-mini"   # LLM to use
temperature     = 0.5             # LLM temperature

# Task settings
task_description = "single cell RNA-seq imputation"
data_available   = "an AnnData object"   # e.g., single-cell RNA-seq
features_available = None                # Describe existing features if relevant

# File paths
initial_file = "denoise_initial.py"             # Initial template file path
filename     = "single_cell_denoise/denoise"  # File path for optimization

# Optimization hints (guide TusoAI)
hints = [
    'Make sure to store the denoised data in adata.obsm["denoised"].',
    "Keep the function header, input, output the same.",
]

# Search / instruction parameters
instruction_count = 10    # Number of instructions per-category (per refinement)
paper_searches    = 10    # Max papers to extract
num_cat           = 10    # Number of categories (before refinement)
num_init          = 5     # Number of initial solutions to construct

# Evolution / optimization loop parameters
n_generations      = 10000   # Number of cluster-evolve rounds
children_per_model = 1       # Each model spawns children per generation
bug_retries        = 3       # Attempts at fixing bugs during optimization
initial_bug_fix_attempts = 5 # Attempts at fixing bugs for initial solutions
timeout            = 600     # Timeout for each execution (seconds)
skip_timeout       = True    # Skip runs that timeout instead of debugging
drop_island_iter   = 2       # Iterations before lowering count of solution pools

# Prompt sampling / feedback
n_feedback_buffer  = 5       # Number of feedback samples
prompt_samples     = 3       # Instructions sampled per iteration
alter_info_samples = 3       # Diagnostic prompts sampled per iteration
prompt_decay       = 1.1     # Update prior of category usefulness

# Control options
use_initial   = False   # Keep original template code as an initial solution
TIME_LIMIT    = 60 * 60 * 8.0   # Total runtime limit for optimization
val_limit     = 1.0    # Validation metric limit (avoid overfitting)
debug_mode    = False  # Print debugging statements if True


In [None]:
client = tusoai.initialize(api_key, openrouter=openrouter)

In [None]:
summaries = tusoai.make_summaries(task_description, api_key = semantic_scholar_api_key, top_n=paper_searches, client=client, model=LLM_MODEL)


In [None]:
categories = tusoai.make_categories(task_description, data_available, num_cat=num_cat, summaries=summaries, client=client, model=LLM_MODEL)


In [None]:
instructions = tusoai.make_instructions(task_description, data_available, categories, summaries, instruction_count, client=client, model=LLM_MODEL)


In [None]:
solutions = tusoai.make_solutions(task_description, data_available, num_init, summaries, client=client, model=LLM_MODEL)


In [None]:
probabilities = tusoai.make_probabilities(task_description, data_available, categories, solutions, client=client, model=LLM_MODEL)

In [None]:
import json
from pathlib import Path

# Path to the JSON file
json_path = Path("tusoai") / "diagnostic_prompts.json"

# Load the JSON
with open(json_path, "r", encoding="utf-8") as f:
    alter_info_prompts = json.load(f)


In [None]:
best_model, optimization_history = tusoai.discover_method(llm_model=LLM_MODEL,
                                                          temperature=temperature,
                                                          client=client,
                                                          prompts=instructions,
                                                          probabilities=probabilities,
                                                          reference_filename=initial_file,
                                                          initialisations=solutions,
                                                          n_generations=n_generations,
                                                          children_per_model=children_per_model,
                                                          bug_retries=bug_retries,
                                                          initial_bug_fix_attempts=initial_bug_fix_attempts,
                                                          timeout=timeout,
                                                          n_feedback_buffer=n_feedback_buffer,
                                                          skip_timeout=skip_timeout,
                                                          drop_island_iter=drop_island_iter,
                                                          prompt_samples=prompt_samples,
                                                          alter_info_samples=alter_info_samples,
                                                          prompt_decay=prompt_decay,
                                                          hints=hints,
                                                          filename=filename,
                                                          use_initial=use_initial,
                                                          TIME_LIMIT=TIME_LIMIT,
                                                          task_description=task_description,
                                                          val_limit=val_limit,
                                                          debug=debug_mode,
                                                          alter_info_prompts=alter_info_prompts)
                                                          