In [None]:
import concurrent.futures
import json
from pathlib import Path

from virtual_lab.constants import CONSISTENT_TEMPERATURE, CREATIVE_TEMPERATURE
from virtual_lab.prompts import (
    CODING_RULES,
    REWRITE_PROMPT,
    create_merge_prompt,
)
from virtual_lab.run_meeting import run_meeting
from virtual_lab.utils import load_summaries

from knowledge_base_constants import (
    background_prompt,
    # nanobody_prompt,
    num_iterations,
    num_rounds,
    discussions_phase_to_dir,
    principal_investigator,
    team_members,
    scientific_critic,
    tech_lead,
    data_curator,
    validation_scientist,
)

## Team selection

In [2]:
# Team selection - prompts
team_selection_agenda = f"""{background_prompt}
TASK: Define 3 distinct Agents to form the AlzKB Implementation Team.

OUTPUT FORMAT: Python `Agent()` objects ONLY. No conversational filler.
Do not include yourself.
TEMPLATE:
Agent(
    title="Principal Investigator (Alzheimer's KG)",
    expertise=(
        "Lead scientist specializing in Alzheimer's Disease (AD) data integration. "
        "Expert in constructing heterogeneous Knowledge Graphs connecting clinical phenotypes, "
        "neuroimaging features, genetic biomarkers (e.g., APOE), and tau/amyloid pathology."
    ),
    role=(
        "1. Define rigorous schemas aligning with standard ontologies (e.g., SNOMED CT, Gene Ontology). "
        "2. Direct the Tech Lead to prioritize high-confidence data sources (e.g., ADNI, AMP-AD). "
        "3. Review extraction pipelines for precision over recall to prevent hallucinated associations. "
        "4. Enforce strict validation protocols for entity resolution across multi-modal datasets."
    ),
)
"""

In [3]:
# Team selection - discussion
with concurrent.futures.ThreadPoolExecutor() as executor:
    concurrent.futures.wait([
        executor.submit(
            run_meeting,
            meeting_type="individual",
            team_member=principal_investigator,
            agenda=team_selection_agenda,
            save_dir=discussions_phase_to_dir["team_selection"],
            save_name=f"discussion_{iteration_num + 1}",
            temperature=CREATIVE_TEMPERATURE,
        ) for iteration_num in range(num_iterations)
    ])

Rounds (+ Final Round):   0%|                                                                                                                                                                  | 0/1 [00:00<?, ?it/s]
Team:   0%|                                                                                                                                                                                    | 0/2 [00:00<?, ?it/s][A

Rounds (+ Final Round):   0%|                                                                                                                                                                  | 0/1 [00:00<?, ?it/s][A[A


Team:   0%|                                                                                                                                                                                    | 0/2 [00:13<?, ?it/s][A[A[A


Rounds (+ Final Round): 100%|████████████████████████████████████████████████████████████████████████████████████████████

Input token count: 274
Output token count: 454
Tool token count: 0
Max token length: 728
Cost: $0.00
Time: 0:16


Team:   0%|                                                                                                                                                                                    | 0/2 [00:21<?, ?it/s]
Rounds (+ Final Round): 100%|██████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████| 1/1 [00:21<00:00, 21.23s/it]

Input token count: 274
Output token count: 428
Tool token count: 0
Max token length: 702
Cost: $0.00
Time: 0:23





In [4]:
# Team selection - merge
team_selection_summaries = load_summaries(
    discussion_paths=list(discussions_phase_to_dir["team_selection"].glob("discussion_*.json")))
print(f"Number of summaries: {len(team_selection_summaries)}")

team_selection_merge_prompt = create_merge_prompt(agenda=team_selection_agenda)

run_meeting(
    meeting_type="individual",
    team_member=principal_investigator,
    summaries=team_selection_summaries,
    agenda=team_selection_merge_prompt,
    save_dir=discussions_phase_to_dir["team_selection"],
    save_name="merged",
    temperature=CONSISTENT_TEMPERATURE,
)

Number of summaries: 5


Rounds (+ Final Round):   0%|                                                                                                                                                                  | 0/1 [00:00<?, ?it/s]
Team:   0%|                                                                                                                                                                                    | 0/2 [00:10<?, ?it/s][A
Rounds (+ Final Round): 100%|██████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████| 1/1 [00:10<00:00, 10.05s/it]


Input token count: 1,978
Output token count: 521
Tool token count: 0
Max token length: 2,499
Cost: $0.01
Time: 0:12


## Projects specification

In [None]:
# Project specification - prompts (Optimized)

project_specification_agenda = f"""{background_prompt}

TASK: Define the Technical Specification for AlzKB.
The team must agree on the architectural foundation before implementation.

OUTPUT REQUIREMENTS:
1. SCHEMA CORE: Define top 6 Entity Types and 6 Edge Types (Must be biologically specific, e.g., 'phosphorylates' not 'interacts').
2. DATA STRATEGY: Select 4 priority Data Sources (e.g., ADNI) and define the Evidence Scoring mechanism (e.g., 'Clinical vs. Preclinical').
3. SUCCESS METRICS: Define 3 quantitative KPIs for coverage and precision.

CONSTRAINTS: Be specific. No fluff. Prioritize AD-specific nuances (e.g., Tau isoforms).
"""

project_specification_questions = (
    "PROPOSE SCHEMA: List the top 6 Node Types and 6 specific Edge Types essential for AD reasoning. Justify why these support hypothesis generation.",
    "DATA INGESTION: Which 4 external databases are critical for Day 1 import? How will we handle conflicting evidence (e.g., human vs. mouse data) in the graph structure?",
    "VALIDATION: Define the 'Gold Standard'. How exactly will we measure Precision and Recall? (e.g., 'Manually curated dataset of 100 triples')."
)

In [3]:
# Project specification - discussion
with concurrent.futures.ThreadPoolExecutor() as executor:
    concurrent.futures.wait([
        executor.submit(
            run_meeting,
            meeting_type="team",
            team_lead=principal_investigator,
            team_members=team_members,
            agenda=project_specification_agenda,
            agenda_questions=project_specification_questions,
            save_dir=discussions_phase_to_dir["project_specification"],
            save_name=f"discussion_{iteration_num + 1}",
            temperature=CREATIVE_TEMPERATURE,
            num_rounds=num_rounds,
        ) for iteration_num in range(num_iterations)
    ])

Rounds (+ Final Round):   0%|                                                                                                                                                                  | 0/4 [00:00<?, ?it/s]
Team:   0%|                                                                                                                                                                                    | 0/5 [00:00<?, ?it/s][A

Rounds (+ Final Round):   0%|                                                                                                                                                                  | 0/4 [00:00<?, ?it/s][A[A


Team:   0%|                                                                                                                                                                                    | 0/5 [00:00<?, ?it/s][A[A[A



Rounds (+ Final Round):   0%|                                                                                           

Input token count: 181,079
Output token count: 21,779
Tool token count: 0
Max token length: 24,101
Time: 16:53


In [4]:
# Project specification - merge
project_specification_summaries = load_summaries(
    discussion_paths=list(discussions_phase_to_dir["project_specification"].glob("discussion_*.json")))
print(f"Number of summaries: {len(project_specification_summaries)}")

project_specification_merge_prompt = create_merge_prompt(
    agenda=project_specification_agenda,
    agenda_questions=project_specification_questions,
)

run_meeting(
    meeting_type="individual",
    team_member=principal_investigator,
    summaries=project_specification_summaries,
    agenda=project_specification_merge_prompt,
    save_dir=discussions_phase_to_dir["project_specification"],
    save_name="merged",
    temperature=CONSISTENT_TEMPERATURE,
    num_rounds=num_rounds,
)

Number of summaries: 5


Rounds (+ Final Round):   0%|                                                                                                                                                                  | 0/4 [00:00<?, ?it/s]
Team:   0%|                                                                                                                                                                                    | 0/2 [00:00<?, ?it/s][A
Team:  50%|██████████████████████████████████████████████████████████████████████████████████████                                                                                      | 1/2 [00:28<00:28, 28.05s/it][A
Team: 100%|████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████| 2/2 [00:56<00:00, 28.40s/it][A
Rounds (+ Final Round):  25%|██████████████████████████████████████▌                                                                   

ValueError: Run failed: failed

## Tool Selection

In [None]:
# Tools selection - prompts
tools_selection_agenda = f"{background_prompt} {nanobody_prompt} Now you need to select machine learning and/or computational tools to implement this nanobody design approach. Please list several tools (5-10) that would be relevant to this nanobody design approach and how they could be used in the context of this project. If selecting machine learning tools, please prioritize pre-trained models (e.g., pre-trained protein language models or protein structure prediction models) for simplicity."

tools_selection_questions = (
    "What machine learning and/or computational tools could be used for this nanobody design approach (list 5-10)?",
    "For each tool, how could it be used for designing modified nanobodies?",
)

tools_selection_prior_summaries = load_summaries(
    discussion_paths=[discussions_phase_to_dir["project_specification"] / "merged.json"])
print(f"Number of prior summaries: {len(tools_selection_prior_summaries)}")