In [1]:
import concurrent.futures

from virtual_lab.constants import CONSISTENT_TEMPERATURE, CREATIVE_TEMPERATURE
from virtual_lab.prompts import CODING_RULES, create_merge_prompt
from virtual_lab.run_meeting import run_meeting
from virtual_lab.utils import load_summaries

from nanobody_constants import (
    background_prompt,
    nanobody_prompt,
    discussions_dir,
    discussions_phase_to_dir,
    generic_agent,
    principal_investigator,
    immunologist,
    immunologist_ft,
    team_members,
    num_rounds,
    num_iterations,
)

## Setup

In [2]:
ablations_dir = discussions_dir / "ablations"
ablations_dir.mkdir(parents=True, exist_ok=True)

## Generic agent vs specialized agent vs finetuned agent (knowledge)

Below compares the knowledge of a generic agent, a specialized agent (Immunologist), and a finetuned agent (Immunologist-FT).

In [None]:
# Run meetings with base and specialized agents
agenda = "What are some of the RBD mutations of the KP.3 and JN.1 variants of the SARS-CoV-2 spike protein that do not appear in previous variants?"

agents_names = [
    (generic_agent, "generic"),
    (immunologist, "immunologist"),
    (immunologist_ft, "immunologist_ft"),
]

for agent, agent_name in agents_names:
    run_meeting(
        meeting_type="individual", 
        team_member=agent,
        agenda=agenda,
        save_dir=ablations_dir,
        save_name=f"knowledge_{agent_name}",
        temperature=CONSISTENT_TEMPERATURE,
    )

## Generic agent vs specialized agent vs multi-agent (coding)

Below runs a generic agent on the AlphaFold-Multimer coding task. For the specialized agent and multi-agent comparison, see `discussions/alphafold/discussion_4.md`. The first round answer is the product of just the specialized agent (Computational Biologist) while the final round answer is the product of the multi-agent team (Computational Biologist and Scientific Critic).

In [None]:
agenda = f"{background_prompt} {nanobody_prompt} Now you must use AlphaFold-Multimer to predict the structure of a nanobody-antigen complex and evaluate its binding. I will run AlphaFold-Multimer on several nanobody-antigen complexes and you need to process the outputs. Please write a complete Python script that takes as input a directory containing PDB files where each PDB file contains one nanobody-antigen complex predicted by AlphaFold-Multimer and outputs a CSV file containing the AlphaFold-Multimer confidence of each nanobody-antigen complex in terms of the interface pLDDT."

run_meeting(
    meeting_type="individual", 
    team_member=generic_agent,
    agenda=agenda,
    agenda_rules=CODING_RULES,
    save_dir=ablations_dir,
    save_name="alphafold_generic",
    temperature=CREATIVE_TEMPERATURE,
)

## Prompt engineering

Illustrate the effect of prompt engineering on tool selection. For the answer using the original agenda and agenda questions, see `discussions/tools_selection/merged.md`.

In [None]:
# Tools selection - setup
tools_selection_prior_summaries = load_summaries(
    discussion_paths=[discussions_phase_to_dir["project_specification"] / "merged.json"])
print(f"Number of prior summaries: {len(tools_selection_prior_summaries)}")

tools_selection_dir = ablations_dir / "tools_selection"

In [4]:
# Tools selection - agendas and questions
tools_selection_agendas = [
    f"{background_prompt} {nanobody_prompt} Now you need to select machine learning and/or computational tools to implement this nanobody design approach. Please list several tools (5-10) that would be relevant to this nanobody design approach and how they could be used in the context of this project.",
    f"{background_prompt} {nanobody_prompt} Now you need to select tools to implement this nanobody design approach. Please list several tools (5-10) that would be relevant to this nanobody design approach and how they could be used in the context of this project.",
    f"{background_prompt} {nanobody_prompt} Now you need to select tools to implement this nanobody design approach. Please list several tools that would be relevant to this nanobody design approach and how they could be used in the context of this project.",
    f"{background_prompt} {nanobody_prompt} Now you need to select tools to implement this nanobody design approach. Please list several tools that would be relevant to this nanobody design approach.",
]

tools_selection_questions = [
    (
        "What machine learning and/or computational tools could be used for this nanobody design approach (list 5-10)?",
        "For each tool, how could it be used for designing modified nanobodies?",
    ),
    (
        "What tools could be used for this nanobody design approach (list 5-10)?",
        "For each tool, how could it be used for designing modified nanobodies?",
    ),
    (
        "What tools could be used for this nanobody design approach?",
        "For each tool, how could it be used for designing modified nanobodies?",
    ),
    (
        "What tools could be used for this nanobody design approach?",
    ),
]

In [None]:
# Tools selection – ablations
for i, (agenda, agenda_questions) in enumerate(zip(tools_selection_agendas, tools_selection_questions)):
    if i != 3:
        continue

    # Create save directory
    save_dir = tools_selection_dir / f"prompt_{i + 1}"
    save_dir.mkdir(parents=True, exist_ok=True)

    # Run meetings
    with concurrent.futures.ThreadPoolExecutor() as executor:
        concurrent.futures.wait([
            executor.submit(
                run_meeting,
                meeting_type="team",
                team_lead=principal_investigator,
                team_members=team_members,
                summaries=tools_selection_prior_summaries,
                agenda=agenda,
                agenda_questions=agenda_questions,
                save_dir=save_dir,
                save_name=f"discussion_{iteration_num + 1}",
                temperature=CREATIVE_TEMPERATURE,
                num_rounds=num_rounds,
            ) for iteration_num in range(num_iterations)
        ])

    # Load summaries
    summaries = load_summaries(discussion_paths=list(save_dir.glob("discussion_*.json")))
    print(f"Number of summaries: {len(summaries)}")

    # Create merge prompt
    merge_prompt = create_merge_prompt(agenda=agenda, agenda_questions=agenda_questions)

    # Run merge meeting
    run_meeting(
        meeting_type="individual",
        team_member=principal_investigator,
        summaries=summaries,
        agenda=merge_prompt,
        save_dir=save_dir,
        save_name="merged",
        temperature=CONSISTENT_TEMPERATURE,
        num_rounds=num_rounds,
    )

## Parallel meetings

To see the effect of parallel meetings, simply compare the individual discussions (e.g., `discussion_1.md` through `discussion_5.md`) with the result of the parallel meeting (e.g., `merged.md`) for any of the discussions.