In [3]:
from langchain_openai import ChatOpenAI
import os
from dotenv import load_dotenv
from openai import OpenAI
import nest_asyncio
nest_asyncio.apply()
import asyncio  # Add this import
import os
from dotenv import load_dotenv
import json
from APIs.combinedapi import PubMedProcessor
%autoawait asyncio
import datetime
from IPython.display import display, HTML
from difflib import unified_diff




openai_client = OpenAI(api_key="OPENAI_API_KEY")


load_dotenv()

llm = ChatOpenAI()

llm = ChatOpenAI(api_key="OPENAI_API_KEY" )



In [5]:
chat_prompt = "You are ChatGPT, a large language model trained by OpenAI, based on the GPT-4 architecture and you cite the papers you use in your answers using Harvard Style."

working_hypothesis_prompt = """ 
# Scientific Rationale for Gamma Secretase in Alzheimer's Disease


## Target Information 
### Develop a scientific rationale for the following:
                             
    **Given target:** Gamma secretase
    **Given disease:** Alzheimer's disease
    **Given mode of action:** Gamma secretase is a multi-subunit protease complex that cleaves type I transmembrane proteins, including the amyloid precursor protein (APP) leading to the generation of amyloid-beta (Aβ) peptides.

##Context:
Aβ is a family of secreted peptides generated from the sequential cleavages of the type 1 membrane protein APP by beta-secretase (BACE) and gamma-secretase (GSEC), respectively. 
BACE cleaves APP in the luminal domain, releasing the N-terminal soluble APPβ domain and leaving the C-terminal fragment, APP-CTF, which remains in the membrane. 
Subsequently, the APP-CTF is recruited to GSEC, a complex comprising four subunits, including PS, which harbors the active site. GSEC first cuts APP-CTF at the epsilon-cleavage site located close to the inner leaflet of the membrane. 
This cleavage event produces either Aβ48 or Aβ49 and the APP intracellular domain (AICD). The membrane-retained Aβ48 or Aβ49 is then further processed by GSEC in a continuous cascade of proteolytical events at every third of fourth amino acid, where the N-terminal product of each reaction becomes the substrate for the next GSEC cleavage event.
Accordingly, GSEC processes APP-CTF along two main product lines, Aβ49 → 46 → 43 → 40 → 37… and Aβ48 → 45 → 42 → 38…, respectively (Takami et al., 2009; Matsumura et al., 2014; Olsson et al., 2014). During this processing cascade, Aβ43 and shorter Aβ peptides stochastically escape further processing by GSEC and are released into the extracellular space. 
As a result, Aβ peptides varying from 30 to 43 amino acids in length are secreted into the extracellular space. Among all secreted Aβ, Aβ40 is the most abundant in human CSF, followed by Aβ38, Aβ42, and Aβ37 (Liu et al., 2022). In cognitively normal individuals, Aβ42 and Aβ43 represent a smaller portion of the total secreted Aβ (Liu et al., 2022).
These longer forms of Aβ seed the formation of Aβ-amyloid aggregates, a key step in the formation of amyloid plaques (Veugelen et al., 2016), as illustrated in Figure 1. Aβ42, which is produced in higher amounts than Aβ43, is the most abundant Aβ in amyloid plaques (Welander et al., 2009).

## Task 1: Develop Scientific Rationale

### Working Hypothesis
- Detailed description of the idea
- Unmet medical need
- Suitability for combination therapy
- Predictive biomarkers
- Clinical relevance of existing biomarkers

"""

scientific_rationale_system_prompt = """You are 'scientific rationale', an expert in target discovery and assessment at a pharmaceutical company, you get commission when you assess and discovery a new target. 
                        You provide scientific rationales to support the selection of drug targets for a specific disease.
                        You are meticulous in your analysis and provide evidence-based assessments to guide drug discovery efforts.
                        Your role is to:
                        1. Your primary role is to develop comprehensive scientific rationales for given targets in specified diseases using available literature. """


refine_prompt ='''
Referencing the answers provided by all agents, synthesize a more detailed and comprehensive response by integrating all relevant details from these answers. Ensure logical coherence and provide ONLY THE MERGED ANSWER AS THE OUTPUT, omitting any discussion of the comparison process or analytical thoughts.
'''

In [18]:
import json
from typing import Dict, Any
import openai
import asyncio
from typing import Dict, Any


async def pubmed_paperqa(query: str) -> Dict[str, Any]:
    """ Searches PubmedCentral for papers using a query
    and returns the most relevant chunks using paperQA"""
    
    max_results: int = 10
    pubmed_query = query
    doc_query = query
    email = "sanazkazemi@hotmail.com"
    print(f"pubmed_paperqa called with query: {query}, max_results: {max_results}")
    
    pubmed_instance = PubMedProcessor(email)
    results_dict = await pubmed_instance.full_process(pubmed_query, doc_query, max_results)
    
    return json.dumps(results_dict, indent=4)

# To run this in a Jupyter notebook cell:
# query = "Alzheimer's disease and gamma secretase"
# results = await pubmed_paperqa(query)
# print(results)

from difflib import unified_diff
import html

def generate_diff_html(text1, text2, fromfile='Original', tofile='Modified'):
    diff = unified_diff(text1.splitlines(keepends=True),
                        text2.splitlines(keepends=True),
                        fromfile=fromfile, tofile=tofile, n=3)
    
    html_output = ['''
    <style>
        .diff-container {
            font-family: monospace;
            white-space: pre-wrap;
            word-wrap: break-word;
            background-color: #f8f9fa;
            border: 1px solid #dee2e6;
            border-radius: 4px;
            padding: 10px;
            margin-bottom: 20px;
        }
        .diff-header {
            color: #6c757d;
            margin-bottom: 10px;
        }
        .diff-add {
            background-color: #e6ffec;
            color: #24292e;
        }
        .diff-sub {
            background-color: #ffebe9;
            color: #24292e;
        }
        .diff-line {
            display: block;
            margin-bottom: 0;
            padding: 2px 0;
        }
        .collapse-button {
            background-color: #007bff;
            color: white;
            border: none;
            padding: 5px 10px;
            margin-bottom: 10px;
            cursor: pointer;
            border-radius: 4px;
        }
        .hidden {
            display: none;
        }
    </style>
    <div class="diff-container">
    <button class="collapse-button" onclick="toggleDiff(this)">Collapse/Expand Diff</button>
    <div class="diff-content">
    ''']
    
    for line in diff:
        if line.startswith('---') or line.startswith('+++'):
            html_output.append(f'<div class="diff-header">{html.escape(line)}</div>')
        elif line.startswith('+'):
            html_output.append(f'<span class="diff-line diff-add">{html.escape(line)}</span>')
        elif line.startswith('-'):
            html_output.append(f'<span class="diff-line diff-sub">{html.escape(line)}</span>')
        else:
            html_output.append(f'<span class="diff-line">{html.escape(line)}</span>')
    
    html_output.append('''
    </div>
    </div>
    <script>
    function toggleDiff(button) {
        var content = button.nextElementSibling;
        if (content.style.display === "none") {
            content.style.display = "block";
            button.textContent = "Collapse Diff";
        } else {
            content.style.display = "none";
            button.textContent = "Expand Diff";
        }
    }
    </script>
    ''')
    
    return ''.join(html_output)




def pick_best_query(queries: list, question: str, answer: str) -> str:
    """picks the best query from the list of queries"""

    gpt_prompt ="""
    for the given question and answer, pick the best query 
    from the list of queries that you think is most relevant especially to the last few sentences of the answer.
    ## IMPORTANT:
    Just return the best query. Do not add any additional information.
    """

    best_query = openai.chat.completions.create(

     model = "gpt-4o-mini",
        messages=[
            {
                "role": "system",
                "content": "you are a scientific researcher, you are tasked with finding the best query to search for scientific papers on PubMed."            
        },
            {
                "role": "user",
                "content": f"##Instruction:{gpt_prompt}\n\n###Question: {question}\n\n###Question:{answer}\n\n##Queries: {[queries]}\n\n"
            }
        ],
        temperature=1 # here you can adjust the temperature to get more or less creative search terms
    ).choices[0].message.content

    return best_query







def get_query(question, answer, num_queries) -> str:

    """Generates queries to search for in Pubmed based on the question"""    
    query_prompt = """ You are a scientific researcher, 
                        you are tasked with finding the best query to search 
                        for scientific papers on PubMed.
                                                    
                            I want to verify the content correctness of the given answer especially the last few sentences.
                            Please summarize the content with the corresponding question.
                            This summarization will be used as a query to search with Bing search engine.
                            The query should be short but needs to be specific to promise Bing can find related knowledge or pages.
                            You can also use search syntax to make the query short and clear enough for the search engine to find relevant language data.
                            Try to make the query as relevant as possible to the last few setences of the the answer provided.
                            **IMPORTANT**
                            Just output the query directly. DO NOT add additional explanations or introducement in the answer unless you are asked to.

                        The following worked very well for me in the past in terms of generating the highest number of results use it as a guide:
                        ###Example:
                        "{Target}" AND "{Disease}" AND ("{relevant_keyword}" OR "{relevant_keyword_1} Or "{relevant_keyword_n}")" and so on.
                        ##IMPORTANT:
                        Just provide the query. Do not add any additional information.
                        DO NOT copy the given example"""
    

    queries = []

    for i in range(num_queries):
        try:
            query = openai.chat.completions.create(
                model="gpt-4o-mini",
                messages=[
                    {
                        "role": "system",
                        "content": "You are ChatGPT, a large language model trained by OpenAI, based on the GPT-4 architecture."

                    },
                    {
                        "role": "user",
                        "content": f"##Question: {question}\n\n##Content: {answer}\n\n##Instruction: {query_prompt}"
                    }
                ],
                temperature=1
            ).choices[0].message.content

            print(f"query {i}: {query}")

        except Exception as e:
            print(f"error {e}")
        queries.append(query)

    best_query = pick_best_query(queries,answer, question)
    print(f"best query: {best_query}")

    return best_query




async def main(question: str, answer, num_queries: int):
    """Main function to get the best query for the question"""
    
    
    search_query = get_query(question, answer, num_queries)
    
    # Remove only the outermost single quotes if they exist otherwise doesnt work - not elegant but works
    if search_query.startswith("'") and search_query.endswith("'"):
        cleaned_query = search_query[1:-1]
    else:
        cleaned_query = search_query
    
    # Replace escaped single quotes with regular single quotes
    cleaned_query = cleaned_query.replace("\\'", "'")
    
    print(f"Cleaned search query: {cleaned_query}")
    
    results = await pubmed_paperqa(cleaned_query)

    print(results)  # This is the final output


    return results


In [7]:
answer = """ notch signalling is relevant to GSEC development and drug discovery in Alzheimer's disease."""


In [None]:
results = await main(answer=answer, question = working_hypothesis_prompt, num_queries=5)

query 0: "Gamma secretase" AND "Alzheimer's disease" AND ("Notch signaling" OR "drug discovery")
query 1: "Gamma secretase" AND "Alzheimer's disease" AND ("amyloid-beta" OR "APP cleavage" OR "Aβ peptides")
query 2: "Gamma secretase" AND "Alzheimer's disease" AND ("Notch signaling" OR "Amyloid-beta" OR "APP cleavage" OR "PS subunit" OR "Aβ peptides")
query 3: "Gamma secretase" AND "Alzheimer's disease" AND ("APP cleavage" OR "amyloid-beta" OR "GSEC processing" OR "Aβ42 plaques")
query 4: "Gamma secretase" AND "Alzheimer's disease" AND ("Notch signaling" OR "APP cleavage" OR "Aβ peptides" OR "amyloid plaques")
best query: '"Gamma secretase" AND "Alzheimer\'s disease" AND ("Notch signaling" OR "APP cleavage" OR "Aβ peptides" OR "amyloid plaques")'
Cleaned search query: "Gamma secretase" AND "Alzheimer's disease" AND ("Notch signaling" OR "APP cleavage" OR "Aβ peptides" OR "amyloid plaques")
pubmed_paperqa called with query: "Gamma secretase" AND "Alzheimer's disease" AND ("Notch signaling

CancelledError: 

In [19]:
num_agents = 1
num_steps = 1
final_output_mode = 'combine_each_step'


In [11]:
import openai

def COT_agent(question):
    """Generates a chain of thought answer for comparison to RATT.
    question: str: the prompt to answer
    draft_prompt: str: the prompt to generate the draft
    system_prompt: str: the prompt to generate the system message"""

    draft_prompt = '''
IMPORTANT:
Try to answer this question/instruction with step-by-step thoughts and make the answer more structured.
Use `\n\n` to split the answer into several paragraphs.
Just respond to the instruction directly. DO NOT add additional explanations or introducement in the answer unless you are asked to.
'''

    # Loop to generate different initial answers
    COT_draft = openai.chat.completions.create(
         model="gpt-4o-mini",
            messages=[
                {
                    "role": "system",
                    "content": chat_prompt # should this be scientific rationale prompt or something less specific?
                },
                {
                    "role": "user",
                    "content": question + draft_prompt
                }
            ],
            temperature=0.5
        ).choices[0].message.content

    return COT_draft


In [15]:
from datetime import datetime


def split_draft(draft, split_char='\n\n'):
    # split_char: '\n\n'
    draft_paragraphs = draft.split(split_char)
    # print(f"The draft answer has {len(draft_paragraphs)}")
    return draft_paragraphs


def get_revise_answer(question, answer, retrieved_data):
    revise_prompt = '''
I want to revise the answer according to retrieved related text of the question in WIKI pages.
You need to check whether the answer is correct.
If you find some errors in the answer, revise the answer to make it better.
If you find some necessary details are ignored, add it to make the answer more plausible according to the related text.
If you find that a part of the answer is correct and does not require any additional details, maintain that part of the answer unchanged. Directly output the original content of that part without any modifications.
**IMPORTANT**
Try to keep the structure (multiple paragraphs with its subtitles) in the revised answer and make it more structual for understanding.
Split the paragraphs with `\n\n` characters.
Just output the revised answer directly. DO NOT add additional explanations or annoucement in the revised answer unless you are asked to.
'''
    revised_answer = openai.chat.completions.create(
        model="gpt-4o-mini",
        messages=[
            {
                "role": "system",
                "content": chat_prompt
            },
            {
                "role": "user",
                "content": f"##Pubmed central retrieved articles: {retrieved_data}\n\n##Question: {question}\n\n##previous Answer: {answer}\n\n##Instruction: {revise_prompt}"
            }
        ],
        temperature=0.5
    ).choices[0].message.content
    
    
    return revised_answer



async def RAG(question, draft_paragraphs):
    """ args:
    question: str: the prompt to answer
    draft_paragraphs: list: the list of paragraphs from the initial n drafts
    """
    answer = ""

    for i, paragraph in enumerate(draft_paragraphs):
        answer += '\n\n' + paragraph

        api_response = await main(question, answer, num_queries=2)  # Now using the entire answer instead of just the paragraph

        revised_answer = get_revise_answer(question, answer, api_response)  # Using the entire answer
        if revised_answer != answer:
            diff_html = generate_diff_html(answer, revised_answer)
            display(HTML(diff_html))
            answer = revised_answer
        
        print(f"Completed iteration {i+1}/{len(draft_paragraphs)}")

        print('+'* 80 + '\n\n')
        print(f"RESULT OF PUBMED API:\n{answer}")
    
    return answer




async def get_draft_tot_initial(question: str, num_agents: int):
    """Generates initial answers from multiple agents for comparison"""
    draft_prompt = """
            IMPORTANT:
            Try to answer this question/instruction with step-by-step thoughts and make the answer more structured.
            Use `\n\n` to split the answer into several paragraphs.
            Just respond to the instruction directly. DO NOT add additional explanations or introducement in the answer unless you are asked to.
            """

    refine_prompt = """
            Referencing the answers provided by all agents, synthesize a more detailed and comprehensive response by integrating all relevant details from these answers. 
            Ensure logical coherence and provide ONLY THE MERGED ANSWER AS THE OUTPUT, omitting any discussion of the comparison process or analytical thoughts."""

    agent_drafts = []
    for i in range(num_agents):
        draft = openai.chat.completions.create(
            model="gpt-4o",
            messages=[
                {
                    "role": "system",
                    "content": chat_prompt
                },
                {
                    "role": "user",
                    "content": question + draft_prompt
                }
            ],
            temperature=0.5
        ).choices[0].message.content
        print(f"####################draft {i}: {draft}########################################")

        print("Processing draft...")
        draft_paragraphs = split_draft(draft)

        draft_modified = await RAG(question, draft_paragraphs)

        agent_drafts.append(f"Agent{i+1}: {draft_modified}")

        print(f"[INFO] Agent{i + 1}/{num_agents} retrieved draft...")

        agent_input = '\n\n'.join(agent_drafts) + '\n\n' + refine_prompt

        final_draft = openai.chat.completions.create(
            model="gpt-4o-mini",
            messages=[
                {
                    "role": "system",
                    "content": chat_prompt
                },
                {
                    "role": "user",
                    "content": agent_input
                }
            ],
            temperature=0.5
        ).choices[0].message.content

        print(f"{datetime.now()} - Final draft: {final_draft}")


    return final_draft

# FIX BELOW FUNCTION NOT USING PREVIOUS ANSWER ?? ALSO STRING CHAR LIST WITHIN LIST - SHOULD BE STRING


async def get_draft_tot(question, previous_answer, num_agents):

    draft_prompt = """ Base your response on the provided question and the previous answer. Expand the answer by adding more details to enhance its comprehensiveness. Ensure that the expansion maintains logical coherence and enriches the details, making the response more thorough and well-structured.
        Question: {question}
        Previous Answer: {previous_answer}
        IMPORTANT:
        Answer the full question with step-by-step thoughts and make the answer more structural.
        Use `\n\n` to split the answer into several paragraphs.
        Just respond to the instruction directly. DO NOT add additional explanations or introducement in the answer unless you are asked to. """

    refine_prompt = """ 
        Referencing the answers provided by all agents, synthesize a more detailed and comprehensive response by integrating all relevant details from these answers. Ensure logical coherence and provide ONLY THE MERGED ANSWER AS THE OUTPUT, omitting any discussion of the comparison process or analytical thoughts.
"""

    agents_drafts = []
    for i in range(num_agents):
        draft = openai.chat.completions.create(
            model="gpt-4o-mini",
            messages=[
                {
                    "role": "system",
                    "content": chat_prompt
                },
                {
                    "role": "user",
                    "content": draft_prompt
                }
            ],
            temperature=0.5
        ).choices[0].message.content

        draft_paragraphs = split_draft(draft)

        draft_modified = await RAG(question, draft_paragraphs)

        agents_drafts.append(f"Agent{i+1}: {draft_modified}")
    
    agents_input = '\n\n'.join(agents_drafts) + '\n\n' + refine_prompt

    final_draft_raw = openai.chat.completions.create(
        model="gpt-4o-mini",
        messages=[
            {
                "role": "system",
                "content": chat_prompt
            },
            {
                "role": "user",
                "content": agents_input
            }
        ],
        temperature=0.5
    ).choices[0].message.content

    print(f"##########Final draft raw #########################: {final_draft_raw}...")

    revise_prompt = """
            Based on the original answer and an additional supplementary answer, generate a response that is richer in detail and logically coherent. Review the original answer:
        1. If any part of the answer is correct and requires no further details, retain that portion unchanged and output it directly as it is.
        2. For parts that may be improved or lack necessary details, enhance them by integrating information from the supplementary answer to make the response more comprehensive and accurate.
        3. If you identify any errors within the answers, correct these errors while ensuring that the revised content remains logically coherent.
        Original Answer: {previous_answer}
        Supplementary Answer: {final_draft_raw}

        **IMPORTANT**
        Ensure the revised answer maintains a structured format (multiple paragraphs with subtitles) for better clarity. 
        Separate the paragraphs with `\n\n` characters. Output only the enhanced answer directly, without any extra explanations or a
        nnouncements unless specifically requested."""
    
    final_draft = openai.chat.completions.create(
        model="gpt-4o-mini",
        messages=[
            {
                "role": "system",
                "content": chat_prompt
            },
            {
                "role": "user",
                "content": revise_prompt
            }
        ],
        temperature=0.5
    ).choices[0].message.content

    return final_draft





In [20]:
from datetime import datetime


async def ratt(question, num_agents):
    step_num = num_steps
    print(f"{datetime.now()} [INFO] Retrieving Step 1 draft...")

    draft = await get_draft_tot_initial(question,num_agents)
    
    print(f"{datetime.now()} [INFO] Step 1 draft returned")
    print(f"##################### DRAFT #######################")
    print(draft)
    print(f"#####################  END  #######################")

    print(f"{datetime.now()} [INFO] Processing draft...")
    draft_paragraphs = split_draft(draft)
    print(f"{datetime.now()} [INFO] Draft split into {len(draft_paragraphs)} parts")

    answer_first_state = await RAG(question, draft_paragraphs)

    previous_answer = answer_first_state

    each_step_drafts = [f"Step 1 \n: {previous_answer}"]

    for iteration in range(1, step_num):
        print(f"{datetime.now()} [INFO] Retrieving Step {iteration + 1} draft...")
        draft = await get_draft_tot(question, previous_answer, num_agents=num_agents)
        print(f"{datetime.now()} [INFO] Step {iteration + 1} draft returned")
        print(f"##################### DRAFT #######################")
        print(draft)
        print(f"#####################  END  #######################")

        print(f"{datetime.now()} [INFO] Processing draft...")
        draft_paragraphs = split_draft(draft)
        print(f"{datetime.now()} [INFO] Draft split into {len(draft_paragraphs)} parts")

        # filtered_paragraphs = filter_paragraphs(draft_paragraphs, iteration, step_num)
        final_answer = await RAG(question, draft_paragraphs)

        each_step_drafts.append(f"Step {iteration + 1} \n: {final_answer}")

        # Update previous_answer for the current iteration's response
        previous_answer = final_answer

    draft_cot = COT_agent(question) # for comparison

    if final_output_mode == 'combine_each_step':
        final_draft = '\n\n'.join(each_step_drafts)
        refine_prompt = f"""
                        Referencing the answers provided by each step, synthesize a more detailed and comprehensive response by integrating all relevant details from these answers. Ensure logical coherence and provide ONLY THE MERGED ANSWER AS THE OUTPUT, omitting any discussion of the comparison process or analytical thoughts.
                        """
        previous_answer = openai.chat.completions.create(
            model="gpt-4o-mini",
            messages=[
                {
                    "role": "system",
                    "content": chat_prompt
                },
                {
                    "role": "user",
                    "content": final_draft + '\n\n' + refine_prompt
                }
            ],
            temperature=1.0
        ).choices[0].message.content

    return draft_cot, previous_answer



In [21]:
import weave
 

# answer_cot, answer_ratt = ratt(chatgpt_prompt, num_agents=1)

@weave.op()
async def main_ratt(question, num_agents):
    answer_cot, answer_ratt = await ratt(question, num_agents)

    return answer_cot, answer_ratt

answer_cot, answer_ratt = asyncio.run(main_ratt(working_hypothesis_prompt, num_agents=1))

weave.init('RATT')

2024-08-06 23:23:40.115473 [INFO] Retrieving Step 1 draft...
####################draft 0: ## Scientific Rationale for Gamma Secretase in Alzheimer's Disease

### Working Hypothesis

Gamma secretase inhibition can reduce the production of amyloid-beta (Aβ) peptides, particularly Aβ42, which is implicated in the formation of amyloid plaques in Alzheimer's disease (AD). By targeting gamma secretase, it is possible to decrease the aggregation and deposition of Aβ42, potentially slowing the progression of AD.

### Detailed Description of the Idea

Gamma secretase is a key enzyme in the production of Aβ peptides from the amyloid precursor protein (APP). The enzyme cleaves APP to produce various Aβ peptides, including Aβ42, which is highly prone to aggregation and is a major component of amyloid plaques found in AD patients. Inhibiting gamma secretase could reduce the levels of Aβ42, thereby decreasing plaque formation and neurotoxicity associated with AD.

### Unmet Medical Need

Alzheimer's

Completed iteration 1/16
++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++


RESULT OF PUBMED API:
## Scientific Rationale for Gamma Secretase in Alzheimer's Disease

### Working Hypothesis
The gamma-secretase (GSEC) complex plays a pivotal role in the pathogenesis of Alzheimer's disease (AD) through its function in the proteolytic processing of the amyloid precursor protein (APP). GSEC cleaves APP to generate amyloid-beta (Aβ) peptides, particularly Aβ42 and Aβ43, which are critical in the formation of amyloid plaques, a hallmark of AD. The accumulation of these longer Aβ peptides in the brain is believed to trigger neurotoxic cascades leading to synaptic dysfunction and neuronal death. This highlights the importance of GSEC as a therapeutic target in AD, aiming to modulate its activity to reduce the production of pathogenic Aβ species.

 

### Unmet Medical Need
Currently, there is no cure for Alzheimer's disease, and existing treatments primarily focus

Completed iteration 2/16
++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++


RESULT OF PUBMED API:
### Working Hypothesis
The gamma-secretase (GSEC) complex plays a pivotal role in the pathogenesis of Alzheimer's disease (AD) through its function in the proteolytic processing of the amyloid precursor protein (APP). GSEC cleaves APP to generate amyloid-beta (Aβ) peptides, particularly Aβ42 and Aβ43, which are critical in the formation of amyloid plaques, a hallmark of AD. The accumulation of these longer Aβ peptides in the brain is believed to trigger neurotoxic cascades leading to synaptic dysfunction and neuronal death. This highlights the importance of GSEC as a therapeutic target in AD, aiming to modulate its activity to reduce the production of pathogenic Aβ species.

### Unmet Medical Need
Currently, there is no cure for Alzheimer's disease, and existing treatments primarily focus on symptomatic relief rather than addressing the underlying disease me

Completed iteration 3/16
++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++


RESULT OF PUBMED API:
### Working Hypothesis
The gamma-secretase (GSEC) complex plays a pivotal role in the pathogenesis of Alzheimer's disease (AD) through its function in the proteolytic processing of the amyloid precursor protein (APP). GSEC cleaves APP to generate amyloid-beta (Aβ) peptides, particularly Aβ42 and Aβ43, which are critical in the formation of amyloid plaques, a hallmark of AD. The accumulation of these longer Aβ peptides in the brain is believed to trigger neurotoxic cascades leading to synaptic dysfunction and neuronal death. This highlights the importance of GSEC as a therapeutic target in AD, aiming to modulate its activity to reduce the production of pathogenic Aβ species.

### Unmet Medical Need
Currently, there is no cure for Alzheimer's disease, and existing treatments primarily focus on symptomatic relief rather than addressing the underlying disease me

Completed iteration 4/16
++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++


RESULT OF PUBMED API:
### Working Hypothesis
The gamma-secretase (GSEC) complex plays a pivotal role in the pathogenesis of Alzheimer's disease (AD) through its function in the proteolytic processing of the amyloid precursor protein (APP). GSEC cleaves APP to generate amyloid-beta (Aβ) peptides, particularly Aβ42 and Aβ43, which are critical in the formation of amyloid plaques, a hallmark of AD. The accumulation of these longer Aβ peptides in the brain is believed to trigger neurotoxic cascades leading to synaptic dysfunction and neuronal death. This highlights the importance of GSEC as a therapeutic target in AD, aiming to modulate its activity to reduce the production of pathogenic Aβ species.

### Unmet Medical Need
Currently, there is no cure for Alzheimer's disease, and existing treatments primarily focus on symptomatic relief rather than addressing the underlying disease me

Completed iteration 5/16
++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++


RESULT OF PUBMED API:
### Working Hypothesis
The gamma-secretase (GSEC) complex plays a pivotal role in the pathogenesis of Alzheimer's disease (AD) through its function in the proteolytic processing of the amyloid precursor protein (APP). GSEC cleaves APP to generate amyloid-beta (Aβ) peptides, particularly Aβ42 and Aβ43, which are critical in the formation of amyloid plaques, a hallmark of AD. The accumulation of these longer Aβ peptides in the brain is believed to trigger neurotoxic cascades leading to synaptic dysfunction and neuronal death. This highlights the importance of GSEC as a therapeutic target in AD, aiming to modulate its activity to reduce the production of pathogenic Aβ species.

### Unmet Medical Need
Currently, there is no cure for Alzheimer's disease, and existing treatments primarily focus on symptomatic relief rather than addressing the underlying disease me

Completed iteration 6/16
++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++


RESULT OF PUBMED API:
### Working Hypothesis
The gamma-secretase (GSEC) complex plays a pivotal role in the pathogenesis of Alzheimer's disease (AD) through its function in the proteolytic processing of the amyloid precursor protein (APP). GSEC cleaves APP to generate amyloid-beta (Aβ) peptides, particularly Aβ42 and Aβ43, which are critical in the formation of amyloid plaques, a hallmark of AD. The accumulation of these longer Aβ peptides in the brain is believed to trigger neurotoxic cascades leading to synaptic dysfunction and neuronal death. This highlights the importance of GSEC as a therapeutic target in AD, aiming to modulate its activity to reduce the production of pathogenic Aβ species.

### Unmet Medical Need
Currently, there is no cure for Alzheimer's disease, and existing treatments primarily focus on symptomatic relief rather than addressing the underlying disease me

Completed iteration 7/16
++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++


RESULT OF PUBMED API:
### Working Hypothesis
The gamma-secretase (GSEC) complex plays a pivotal role in the pathogenesis of Alzheimer's disease (AD) through its function in the proteolytic processing of the amyloid precursor protein (APP). GSEC cleaves APP to generate amyloid-beta (Aβ) peptides, particularly Aβ40 and Aβ42, which are critical in the formation of amyloid plaques, a hallmark of AD. The accumulation of these longer Aβ peptides in the brain is believed to trigger neurotoxic cascades leading to synaptic dysfunction and neuronal death. This highlights the importance of GSEC as a therapeutic target in AD, aiming to modulate its activity to reduce the production of pathogenic Aβ species.

### Unmet Medical Need
Currently, there is no cure for Alzheimer's disease, and existing treatments primarily focus on symptomatic relief rather than addressing the underlying disease me

Completed iteration 8/16
++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++


RESULT OF PUBMED API:
### Working Hypothesis
The gamma-secretase (GSEC) complex plays a pivotal role in the pathogenesis of Alzheimer's disease (AD) through its function in the proteolytic processing of the amyloid precursor protein (APP). GSEC cleaves APP to generate amyloid-beta (Aβ) peptides, particularly Aβ40 and Aβ42, which are critical in the formation of amyloid plaques, a hallmark of AD. The accumulation of these longer Aβ peptides in the brain is believed to trigger neurotoxic cascades leading to synaptic dysfunction and neuronal death. This highlights the importance of GSEC as a therapeutic target in AD, aiming to modulate its activity to reduce the production of pathogenic Aβ species.

### Unmet Medical Need
Currently, there is no cure for Alzheimer's disease, and existing treatments primarily focus on symptomatic relief rather than addressing the underlying disease me

Completed iteration 10/16
++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++


RESULT OF PUBMED API:
### Working Hypothesis
The gamma-secretase (GSEC) complex plays a pivotal role in the pathogenesis of Alzheimer's disease (AD) through its function in the proteolytic processing of the amyloid precursor protein (APP). GSEC cleaves APP to generate amyloid-beta (Aβ) peptides, particularly Aβ40 and Aβ42, which are critical in the formation of amyloid plaques, a hallmark of AD. The accumulation of these longer Aβ peptides in the brain is believed to trigger neurotoxic cascades leading to synaptic dysfunction and neuronal death. This highlights the importance of GSEC as a therapeutic target in AD, aiming to modulate its activity to reduce the production of pathogenic Aβ species.

### Unmet Medical Need
Currently, there is no cure for Alzheimer's disease, and existing treatments primarily focus on symptomatic relief rather than addressing the underlying disease m

Completed iteration 11/16
++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++


RESULT OF PUBMED API:
### Working Hypothesis
The gamma-secretase (GSEC) complex plays a pivotal role in the pathogenesis of Alzheimer's disease (AD) through its function in the proteolytic processing of the amyloid precursor protein (APP). GSEC cleaves APP to generate amyloid-beta (Aβ) peptides, particularly Aβ40 and Aβ42, which are critical in the formation of amyloid plaques, a hallmark of AD. The accumulation of these longer Aβ peptides in the brain is believed to trigger neurotoxic cascades leading to synaptic dysfunction and neuronal death. This highlights the importance of GSEC as a therapeutic target in AD, aiming to modulate its activity to reduce the production of pathogenic Aβ species.

### Unmet Medical Need
Currently, there is no cure for Alzheimer's disease, and existing treatments primarily focus on symptomatic relief rather than addressing the underlying disease m

Completed iteration 12/16
++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++


RESULT OF PUBMED API:
### Working Hypothesis
The gamma-secretase (GSEC) complex plays a pivotal role in the pathogenesis of Alzheimer's disease (AD) through its function in the proteolytic processing of the amyloid precursor protein (APP). GSEC cleaves APP to generate amyloid-beta (Aβ) peptides, particularly Aβ40 and Aβ42, which are critical in the formation of amyloid plaques, a hallmark of AD. The accumulation of these longer Aβ peptides in the brain is believed to trigger neurotoxic cascades leading to synaptic dysfunction and neuronal death. This highlights the importance of GSEC as a therapeutic target in AD, aiming to modulate its activity to reduce the production of pathogenic Aβ species.

### Unmet Medical Need
Currently, there is no cure for Alzheimer's disease, and existing treatments primarily focus on symptomatic relief rather than addressing the underlying disease m

Completed iteration 13/16
++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++


RESULT OF PUBMED API:
### Working Hypothesis
The gamma-secretase (GSEC) complex plays a pivotal role in the pathogenesis of Alzheimer's disease (AD) through its function in the proteolytic processing of the amyloid precursor protein (APP). GSEC cleaves APP to generate amyloid-beta (Aβ) peptides, particularly Aβ40 and Aβ42, which are critical in the formation of amyloid plaques, a hallmark of AD. The accumulation of these longer Aβ peptides in the brain is believed to trigger neurotoxic cascades leading to synaptic dysfunction and neuronal death. This highlights the importance of GSEC as a therapeutic target in AD, aiming to modulate its activity to reduce the production of pathogenic Aβ species.

### Unmet Medical Need
Currently, there is no cure for Alzheimer's disease, and existing treatments primarily focus on symptomatic relief rather than addressing the underlying disease m

In [None]:
# First, make sure you have all the necessary imports and have defined all the functions from your original code

# Then, create test cells for each function:

# Test split_draft
def test_split_draft():
    test_draft = "Paragraph 1\n\nParagraph 2\n\nParagraph 3"
    result = split_draft(test_draft)
    assert len(result) == 3, f"Expected 3 paragraphs, got {len(result)}"
    print("split_draft test passed")

test_split_draft()

# Test get_revise_answer
def test_get_revise_answer():
    question = "What is the capital of France?"
    answer = "The capital of France is London."
    retrieved_data = "The capital of France is Paris."
    result = get_revise_answer(question, answer, retrieved_data)
    print(f"Revised answer: {result}")
    assert "Paris" in result, f"Expected 'Paris' in the revised answer"
    print("get_revise_answer test passed")

test_get_revise_answer()

# Test RAG
def test_RAG():
    question = "What are the primary colors?"
    draft_paragraphs = ["The primary colors are red and blue.", "Yellow is also a primary color."]
    result = RAG(question, draft_paragraphs)
    print(f"RAG result: {result}")
    assert "red" in result.lower() and "blue" in result.lower() and "yellow" in result.lower(), f"Expected all primary colors in the result"
    print("RAG test passed")

test_RAG()

# Test get_draft_tot_initial
def test_get_draft_tot_initial():
    question = "Explain the water cycle."
    num_agents = 2
    result = get_draft_tot_initial(question, num_agents)
    print(f"Number of drafts: {len(result)}")
    for i, draft in enumerate(result):
        print(f"Draft {i+1}:\n{draft}\n")
    assert len(result) == 2, f"Expected 2 drafts, got {len(result)}"
    print("get_draft_tot_initial test passed")

test_get_draft_tot_initial()

split_draft test passed
Revised answer: The capital of France is Paris. 


get_revise_answer test passed
Completed iteration 1/2


  api_response = main(question, answer, num_queries=2)  # Now using the entire answer instead of just the paragraph
Object allocated at (most recent call last):
  File "/var/folders/02/fvfmdq_j709g1tthj47t5fcm0000gn/T/ipykernel_17808/1479425663.py", lineno 50
    api_response = main(question, answer, num_queries=2)  # Now using the entire answer instead of just the paragraph


Completed iteration 2/2
++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++


RESULT OF PUBMED API:
The primary colors are red, blue, and yellow.

In color theory, primary colors are defined as colors that cannot be created by mixing other colors together. They serve as the foundation for creating a wide range of other colors through various combinations.

In the additive color model, which is used in contexts like television and computer screens, the primary colors are red, green, and blue (RGB). By mixing these colors in different ways, a broad spectrum of colors can be produced.

In contrast, the subtractive color model, commonly used in painting and printing, identifies red, blue, and yellow as the primary colors. When mixed together, these colors can create a variety of other hues.

It's important to note that in the subtractive color model, yellow is indeed considered a primary color, while in the additive model, the primary colors are red, green, and

  result = RAG(question, draft_paragraphs)
Object allocated at (most recent call last):
  File "/var/folders/02/fvfmdq_j709g1tthj47t5fcm0000gn/T/ipykernel_17808/1479425663.py", lineno 50
    api_response = main(question, answer, num_queries=2)  # Now using the entire answer instead of just the paragraph


####################draft 0: The water cycle, also known as the hydrological cycle, is a continuous process that describes the movement of water within the Earth's atmosphere, land, and bodies of water. The cycle can be broken down into several key stages:

1. **Evaporation**: The process begins when the sun heats up water in rivers, lakes, and oceans. This heat causes the water to evaporate, turning it into water vapor that rises into the atmosphere. Plants also contribute to this process through transpiration, where water is absorbed by roots and released as vapor from leaves.

2. **Condensation**: As water vapor rises, it cools and condenses into tiny droplets, forming clouds. This process occurs when the air temperature drops and the water vapor reaches its dew point, leading to the formation of clouds in the atmosphere.

3. **Precipitation**: Eventually, the water droplets in clouds combine to form larger droplets. When they become heavy enough, they fall back to the Earth's surfa

KeyboardInterrupt: 