In [None]:
import concurrent.futures
import shutil

import matplotlib.pyplot as plt
import pandas as pd
from scipy.stats import fisher_exact, wilcoxon

from virtual_lab.agent import Agent
from virtual_lab.constants import CONSISTENT_TEMPERATURE, CREATIVE_TEMPERATURE
from virtual_lab.prompts import create_merge_prompt
from virtual_lab.run_meeting import run_meeting
from virtual_lab.utils import load_summaries

from nanobody_constants import (
    background_prompt,
    nanobody_prompt,
    discussions_phase_to_dir,
    generic_team_lead,
    generic_team,
    principal_investigator,
    immunologist,
    team_members,
    num_rounds,
    num_iterations,
)

## Setup

In [None]:
ablations_dir = discussions_phase_to_dir["ablations"]
ablations_dir.mkdir(parents=True, exist_ok=True)

fontsize = 18
figsize = (10, 6)
colors = ['#2166AC', '#FDDBC7', '#B2182B']  # Blue, Light Pink, Dark Red

## Generic vs specialized agent

Compare a team of generic agents to a team of specialized agents.

In [None]:
# Project specification - prompts
project_specification_agenda = f"{background_prompt} Please create an antibody/nanobody design approach to solve this problem. Decide whether you will design antibodies or nanobodies. For your choice, decide whether you will design the antibodies/nanobodies de novo or whether you will modify existing antibodies/nanobodies. If modifying existing antibodies/nanobodies, please specify which antibodies/nanobodies to start with as good candidates for targeting the newest variant of the SARS-CoV-2 spike protein. If designing antibodies/nanobodies de novo, please describe how you will propose antibody/nanobody candidates."

project_specification_questions = (
    "Will you design standard antibodies or nanobodies?",
    "Will you design antibodies/nanobodies de novo or will you modify existing antibodies/nanobodies (choose only one)?",
    "If modifying existing antibodies/nanobodies, which precise antibodies/nanobodies will you modify (please list 3-4)?",
    "If designing antibodies/nanobodies de novo, how exactly will you propose antibody/nanobody candidates?",
)

project_specification_dir = ablations_dir / "project_specification"
project_specification_dir.mkdir(parents=True, exist_ok=True)

In [None]:
# Copy the pre-existing five scientist project specification discussions
shutil.copytree(
    discussions_phase_to_dir["project_specification"],
    project_specification_dir / "scientist_team",
)

In [None]:
# Project specification - discussion (generic team)
with concurrent.futures.ThreadPoolExecutor() as executor:
    concurrent.futures.wait([
        executor.submit(
            run_meeting,
            meeting_type="team",
            team_lead=generic_team_lead,
            team_members=generic_team,
            agenda=project_specification_agenda,
            agenda_questions=project_specification_questions,
            save_dir=project_specification_dir / "generic_team",
            save_name=f"discussion_{iteration_num + 1}",
            temperature=CREATIVE_TEMPERATURE,
            num_rounds=num_rounds,
        ) for iteration_num in range(num_iterations)
    ])

In [None]:
# Project specification - merge (generic team)
project_specification_summaries = load_summaries(
    discussion_paths=list((project_specification_dir / "generic_team").glob("discussion_[1|2|3|4|5].json")))
print(f"Number of summaries: {len(project_specification_summaries)}")

project_specification_merge_prompt = create_merge_prompt(
    agenda=project_specification_agenda,
    agenda_questions=project_specification_questions,
)

run_meeting(
    meeting_type="individual",
    team_member=generic_team_lead,
    summaries=project_specification_summaries,
    agenda=project_specification_merge_prompt,
    save_dir=project_specification_dir / "generic_team",
    save_name="merged",
    temperature=CONSISTENT_TEMPERATURE,
    num_rounds=num_rounds,
)

In [None]:
# Project specification - discussion (scaled up generic team and scientist team)
for team_lead, team, team_name in [
    (generic_team_lead, generic_team, "generic_team"),
    (principal_investigator, team_members, "scientist_team"),
]:
    with concurrent.futures.ThreadPoolExecutor() as executor:
        concurrent.futures.wait([
            executor.submit(
                run_meeting,
                meeting_type="team",
                team_lead=team_lead,
                team_members=team,
                agenda=project_specification_agenda,
                agenda_questions=project_specification_questions,
                save_dir=project_specification_dir / team_name,
                save_name=f"discussion_{iteration_num + 1}",
                temperature=CREATIVE_TEMPERATURE,
                num_rounds=num_rounds,
            ) for iteration_num in range(num_iterations, 50)
        ])

In [None]:
# Load design choices
design_choices = pd.read_csv(project_specification_dir / "design_choices.csv")

In [None]:
# Analyze statistical significance of the de novo vs modification choice
generic_protein_counts = design_choices["Generic Team: Antibody or Nanobody"].value_counts()
generic_design_counts = design_choices["Generic Team: New or Existing"].value_counts()

scientist_protein_counts = design_choices["Scientist Team: Antibody or Nanobody"].value_counts()
scientist_design_counts = design_choices["Scientist Team: New or Existing"].value_counts()

protein_table = [
    [generic_protein_counts.get("Nanobody", 0), generic_protein_counts.sum() - generic_protein_counts.get("Nanobody", 0)],
    [scientist_protein_counts.get("Nanobody", 0), scientist_protein_counts.sum() - scientist_protein_counts.get("Nanobody", 0)],
]

print(pd.DataFrame(protein_table, index=["Generic Team", "Scientist Team"], columns=["Nanobody", "Antibody"]))
print()

protein_oddsratio, protein_p_value = fisher_exact(protein_table)
print("Fisher's Exact Test Results:")
print(f"Odds Ratio: {protein_oddsratio:.3f}, p-value: {protein_p_value:.3f}")
print()

design_table = [
    [generic_design_counts.get("Existing", 0), generic_design_counts.sum() - generic_design_counts.get("Existing", 0)],
    [scientist_design_counts.get("Existing", 0), scientist_design_counts.sum() - scientist_design_counts.get("Existing", 0)],
]

print(pd.DataFrame(design_table, index=["Generic Team", "Scientist Team"], columns=["Existing", "Both or New"]))
print()

design_oddsratio, design_p_value = fisher_exact(design_table)
print("Fisher's Exact Test Results:")
print(f"Odds Ratio: {design_oddsratio:.3f}, p-value: {design_p_value:.3f}")

In [None]:
# Add statistical significance annotations
def annotate_significance(ax, p_value, x1, x2, y):
    if p_value < 0.001:
        significance = '***'
    elif p_value < 0.01:
        significance = '**'
    elif p_value < 0.05:
        significance = '*'
    else:
        significance = 'ns'  # Not significant

    # Add horizontal line
    ax.plot([x1, x2], [y-2, y-2], color='black', linewidth=1)
    # Add vertical lines
    ax.plot([x1, x1], [y-3, y-2], color='black', linewidth=1)
    ax.plot([x2, x2], [y-3, y-2], color='black', linewidth=1)
    # Add significance marker
    ax.text((x1 + x2) / 2, y-1, significance, ha='center', va='bottom', fontsize=fontsize)

In [None]:
# Plot protein choices
fig, ax = plt.subplots(figsize=figsize)

plt.bar(["Generic Team", "Scientist Team"], 
    [generic_protein_counts.get("Nanobody", 0), scientist_protein_counts.get("Nanobody", 0)],
    label="Nanobody", color=colors[0])
plt.bar(["Generic Team", "Scientist Team"],
    [generic_protein_counts.get("Antibody", 0), scientist_protein_counts.get("Antibody", 0)],
    bottom=[generic_protein_counts.get("Nanobody", 0), scientist_protein_counts.get("Nanobody", 0)],
    label="Antibody", color=colors[2])

annotate_significance(ax, protein_p_value, 0, 1, 55)

plt.ylabel("Count", fontsize=fontsize)
plt.xticks(fontsize=fontsize)
plt.yticks(fontsize=fontsize)
plt.legend(fontsize=fontsize)
plt.ylim(0, 58)
plt.savefig(project_specification_dir / "protein_counts.pdf", bbox_inches='tight')

In [None]:
# Plot design choices
fig, ax = plt.subplots(figsize=figsize)

plt.bar(["Generic Team", "Scientist Team"], 
    [generic_design_counts.get("Existing", 0), scientist_design_counts.get("Existing", 0)],
    label="Existing", color=colors[0])
plt.bar(["Generic Team", "Scientist Team"],
    [generic_design_counts.get("Both", 0), scientist_design_counts.get("Both", 0)],
    bottom=[generic_design_counts.get("Existing", 0), scientist_design_counts.get("Existing", 0)],
    label="Both", color=colors[1])
plt.bar(["Generic Team", "Scientist Team"],
    [generic_design_counts.get("New", 0), scientist_design_counts.get("New", 0)],
    bottom=[generic_design_counts.get("Existing", 0) + generic_design_counts.get("Both", 0), 
            scientist_design_counts.get("Existing", 0) + scientist_design_counts.get("Both", 0)],
    label="New", color=colors[2])

annotate_significance(ax, design_p_value, 0, 1, 55)

plt.ylabel("Count", fontsize=fontsize)
plt.xticks(fontsize=fontsize)
plt.yticks(fontsize=fontsize)
plt.legend(fontsize=fontsize)
plt.ylim(0, 58)
plt.savefig(project_specification_dir / "design_counts.pdf", bbox_inches='tight')

## Specialized vs finetuned agent

### Question and Answer

See `finetune_agents.ipynb` for the question and answer pairs and evaluation.

### Knowledge

Compare the knowledge of a specialized agent (Immunologist) and a finetuned agent (Immunologist-FT).

In [None]:
immunologist_ft = Agent(
    title=immunologist.title,
    expertise=immunologist.expertise,
    goal=immunologist.goal,
    role=immunologist.role,
    model="ft:gpt-4o-mini-2024-07-18:personal:sars-cov-2-variants-kp-3-and-jn-1:AzeSvPAe",
)

In [None]:
agenda = "What are some of the RBD mutations of the KP.3 and JN.1 variants of the SARS-CoV-2 spike protein?"

agents_names = [
    (immunologist, "immunologist"),
    (immunologist_ft, "immunologist_ft"),
]

for agent, agent_name in agents_names:
    run_meeting(
        meeting_type="individual", 
        team_member=agent,
        agenda=agenda,
        save_dir=ablations_dir,
        save_name=f"knowledge_{agent_name}",
        temperature=CONSISTENT_TEMPERATURE,
    )

## Single vs multi agent

Look at `discussions/alphafold/discussion_4.md` to compare the effect of a single agent vs a multi-agent interaction. The first round answer is the product of just a single specialized agent (Computational Biologist) while the final round answer is the product of the multi-agent interaction (Computational Biologist and Scientific Critic).

Also, see below for a human analysis of the effects of single agent vs multi-agent interactions for coding.

In [None]:
coding_eval = pd.read_csv(ablations_dir / "coding_eval.csv")

In [None]:
oddsratio, p_value = wilcoxon(coding_eval["# Enhancements"], alternative="greater")
print("Wilcoxon Signed-Rank Test Results:")
print(f"Statistic: {oddsratio:.3f}, p-value: {p_value:.3e}")

In [None]:
import seaborn as sns

plt.figure(figsize=figsize)
plt.axhline(y=0, color='black', linestyle='-', linewidth=2)
sns.swarmplot(data=coding_eval, x='Topic', y='# Enhancements', size=18)
plt.xlabel("")
plt.xticks(fontsize=fontsize)
plt.yticks(fontsize=fontsize)
plt.ylabel("# Enhancements", fontsize=fontsize)
plt.savefig(ablations_dir / "coding_eval.pdf", bbox_inches='tight')

## Single vs parallel meetings

To see the effect of parallel meetings, compare the individual discussions (e.g., `discussion_1.md` through `discussion_5.md`) with the result of the parallel meeting (e.g., `merged.md`) for any of the discussions in the `discussions` directory.

## Prompt engineering

Illustrate the effect of prompt engineering on tool selection. For the answer using the original agenda and agenda questions, see `discussions/tools_selection/merged.md`.

In [None]:
# Tools selection - setup
tools_selection_prior_summaries = load_summaries(
    discussion_paths=[discussions_phase_to_dir["project_specification"] / "merged.json"])
print(f"Number of prior summaries: {len(tools_selection_prior_summaries)}")

tools_selection_dir = ablations_dir / "tools_selection"

In [None]:
# Tools selection - agendas and questions
tools_selection_agendas = [
    f"{background_prompt} {nanobody_prompt} Now you need to select machine learning and/or computational tools to implement this nanobody design approach. Please list several tools (5-10) that would be relevant to this nanobody design approach and how they could be used in the context of this project.",
    f"{background_prompt} {nanobody_prompt} Now you need to select tools to implement this nanobody design approach. Please list several tools (5-10) that would be relevant to this nanobody design approach and how they could be used in the context of this project.",
    f"{background_prompt} {nanobody_prompt} Now you need to select tools to implement this nanobody design approach. Please list several tools that would be relevant to this nanobody design approach and how they could be used in the context of this project.",
    f"{background_prompt} {nanobody_prompt} Now you need to select tools to implement this nanobody design approach. Please list several tools that would be relevant to this nanobody design approach.",
]

tools_selection_questions = [
    (
        "What machine learning and/or computational tools could be used for this nanobody design approach (list 5-10)?",
        "For each tool, how could it be used for designing modified nanobodies?",
    ),
    (
        "What tools could be used for this nanobody design approach (list 5-10)?",
        "For each tool, how could it be used for designing modified nanobodies?",
    ),
    (
        "What tools could be used for this nanobody design approach?",
        "For each tool, how could it be used for designing modified nanobodies?",
    ),
    (
        "What tools could be used for this nanobody design approach?",
    ),
]

In [None]:
# Tools selection – ablations
for i, (agenda, agenda_questions) in enumerate(zip(tools_selection_agendas, tools_selection_questions)):
    if i != 3:
        continue

    # Create save directory
    save_dir = tools_selection_dir / f"prompt_{i + 1}"
    save_dir.mkdir(parents=True, exist_ok=True)

    # Run meetings
    with concurrent.futures.ThreadPoolExecutor() as executor:
        concurrent.futures.wait([
            executor.submit(
                run_meeting,
                meeting_type="team",
                team_lead=principal_investigator,
                team_members=team_members,
                summaries=tools_selection_prior_summaries,
                agenda=agenda,
                agenda_questions=agenda_questions,
                save_dir=save_dir,
                save_name=f"discussion_{iteration_num + 1}",
                temperature=CREATIVE_TEMPERATURE,
                num_rounds=num_rounds,
            ) for iteration_num in range(num_iterations)
        ])

    # Load summaries
    summaries = load_summaries(discussion_paths=list(save_dir.glob("discussion_*.json")))
    print(f"Number of summaries: {len(summaries)}")

    # Create merge prompt
    merge_prompt = create_merge_prompt(agenda=agenda, agenda_questions=agenda_questions)

    # Run merge meeting
    run_meeting(
        meeting_type="individual",
        team_member=principal_investigator,
        summaries=summaries,
        agenda=merge_prompt,
        save_dir=save_dir,
        save_name="merged",
        temperature=CONSISTENT_TEMPERATURE,
        num_rounds=num_rounds,
    )