## Imports, data and model setup

In [1]:
"""Imports"""
from langchain.chat_models import ChatOpenAI
from langchain.prompts.chat import (
    ChatPromptTemplate,
    SystemMessagePromptTemplate,
    AIMessagePromptTemplate,
    HumanMessagePromptTemplate,
)
from langchain.schema import (
    AIMessage,
    HumanMessage,
    SystemMessage
)

from cot import Collection
import yaml

In [4]:
"""data"""

worldtree = Collection(["worldtree"], verbose=False)
worldtree = worldtree.select(split="train", number_samples=1, random_samples=True, seed=0)

medqa = Collection(["med_qa"], verbose=False)
medqa = medqa.select(split="test", number_samples=1, random_samples=True, seed=0)

Loading worldtree...
Loading med_qa...


In [5]:
"""Chat model"""
model = 'gpt-3.5-turbo'
# model = 'gpt-4'
chat = ChatOpenAI(
        temperature=0,
        model_name=model,
        request_timeout=600,
        max_retries=1,
    )

## Initial experimentation

In [16]:
"""Templates"""

with open('../reflection_messages/input_system/sytem_short_list.yaml', 'r') as file:
    template = file.read()
system_message_prompt = SystemMessagePromptTemplate.from_template(template)

human_template="""Answer the question:{question}\nwith the following answer options:{answer_options}"""
human_message_prompt = HumanMessagePromptTemplate.from_template(human_template)

In [7]:
chat_prompt = ChatPromptTemplate.from_messages([system_message_prompt, human_message_prompt])

# get a chat completion from the formatted messages
chat(chat_prompt.format_prompt(answer_options="""
A: Travel to Africa
B: Axillary lymph node involvement
C: Previous radiation therapy
D: Female gender
E: Previous breast cancer""", question="A 77-year-old woman presents to the emergency room with the complaints of fever, malaise, and night sweats. She recently observed an enlargement of her axillary lymph nodes, which she examines on a weekly basis. She has a remote history of breast cancer in her 60s that was treated with radiation and chemotherapy. She also reports a history of extensive travel to Africa and a 30-pack-year history of smoking. On physical exam, several axillary lymph nodes are palpable with a large non-tender palpable mass in her right axilla measuring 10 x 8 cm. Fine-needle aspiration demonstrates what the pathologist describes as \"a centroblastic and immunoblastic cell presence, suspicious for non-Hodgkin’s lymphoma (NHL)–diffuse large B cell variant\". Which of the following risk factors is responsible for this patient’s condition?").to_messages())

AIMessage(content='Reasoning:\nThe patient is a 77-year-old woman who presents with fever, malaise, and night sweats. She has a history of breast cancer in her 60s that was treated with radiation and chemotherapy. She also reports a history of extensive travel to Africa and a 30-pack-year history of smoking. On physical exam, several axillary lymph nodes are palpable with a large non-tender palpable mass in her right axilla measuring 10 x 8 cm. Fine-needle aspiration demonstrates what the pathologist describes as "a centroblastic and immunoblastic cell presence, suspicious for non-Hodgkin’s lymphoma (NHL)–diffuse large B cell variant". We are asked to determine which of the following risk factors is responsible for this patient\'s condition.\n\nAnswer:\nE: Previous breast cancer\n\nRule check:\nRule 1: The reasoning is accurate and breaks down the question to determine which risk factor is responsible for the patient\'s condition.\nRule 2: The response considers all relevant informatio

In [None]:
# observe that GPT-3.5 gives no explanation

"""content='Reasoning:\nThe patient is a 77-year-old woman who presents with fever, malaise, and night sweats. 
She has a history of breast cancer in her 60s that was treated with radiation and chemotherapy. 
She also reports a history of extensive travel to Africa and a 30-pack-year history of smoking. 
On physical exam, several axillary lymph nodes are palpable with a large non-tender palpable mass 
in her right axilla measuring 10 x 8 cm. Fine-needle aspiration demonstrates what the pathologist 
describes as "a centroblastic and immunoblastic cell presence, suspicious for non-Hodgkin’s lymphoma 
(NHL)–diffuse large B cell variant". We are asked to determine which of the following risk factors is 
responsible for this patient\'s condition.\n\nAnswer:\nE: Previous breast cancer\n\n
Rule check:\nRule 1: The reasoning is accurate and breaks down the question to determine which risk factor
 is responsible for the patient\'s condition.\nRule 2: The response considers all relevant information, 
 including the patient\'s history of breast cancer, extensive travel to Africa, and smoking history, 
 as well as the physical exam findings and fine-needle aspiration results.
 \nRule 3: The response is sensitive to the numerical information provided'
"""

### new prompt with answer at the end

In [8]:
system_template="""You are a scientific problem solver.

Your input consist of multiple choice questions. Based on the input, reason about the question and determine which of the answer options is the <final answer>.

Analyse the reasoning and answer conform the following rules, judging whether the reasoning and answer adhere to it.

Based on the judgment, change the reasoning and answer if needed.

Rules:

1. The response interprets the question accurately and breaks it down if necessary. <judgment>
2. The response considers all relevant information, and does not consider irrelevant information. <judgment>
3. The response is sensitive to the numerical information provided by the user, accurately interpreting and incorporating it into the response. <judgment>

Aim for an informative level of detail. Ensure that every step logically follows up on all previous steps and that processes in subsequent steps are informed by previous steps. Format your response in YAML, following this schema:

Reasoning:
<Reasoning about the question>

Rule check:
Rule 1: <judgment>
Rule 2: <judgment>
Rule 3: <judgment>

Final answer:
<final answer based on the reasoning and the judgment>
"""
system_message_prompt = SystemMessagePromptTemplate.from_template(system_template)

In [9]:
model = 'gpt-3.5-turbo'
chat = ChatOpenAI(
        temperature=0,
        model_name=model,
        request_timeout=600,
        max_retries=1,
    )
chat_prompt = ChatPromptTemplate.from_messages([system_message_prompt, human_message_prompt])

# get a chat completion from the formatted messages
chat(chat_prompt.format_prompt(answer_options="""
A: Travel to Africa
B: Axillary lymph node involvement
C: Previous radiation therapy
D: Female gender
E: Previous breast cancer""", question="A 77-year-old woman presents to the emergency room with the complaints of fever, malaise, and night sweats. She recently observed an enlargement of her axillary lymph nodes, which she examines on a weekly basis. She has a remote history of breast cancer in her 60s that was treated with radiation and chemotherapy. She also reports a history of extensive travel to Africa and a 30-pack-year history of smoking. On physical exam, several axillary lymph nodes are palpable with a large non-tender palpable mass in her right axilla measuring 10 x 8 cm. Fine-needle aspiration demonstrates what the pathologist describes as \"a centroblastic and immunoblastic cell presence, suspicious for non-Hodgkin’s lymphoma (NHL)–diffuse large B cell variant\". Which of the following risk factors is responsible for this patient’s condition?").to_messages())

AIMessage(content='Reasoning:\nThe patient is a 77-year-old woman who presents with fever, malaise, and night sweats. She has a history of breast cancer in her 60s that was treated with radiation and chemotherapy. She also reports a history of extensive travel to Africa and a 30-pack-year history of smoking. On physical exam, several axillary lymph nodes are palpable with a large non-tender palpable mass in her right axilla measuring 10 x 8 cm. Fine-needle aspiration demonstrates what the pathologist describes as "a centroblastic and immunoblastic cell presence, suspicious for non-Hodgkin’s lymphoma (NHL)–diffuse large B cell variant". We are asked to determine which of the following risk factors is responsible for this patient\'s condition.\n\nRule check:\nRule 1: The question is interpreted accurately and broken down if necessary.\nRule 2: All relevant information is considered, and irrelevant information is not considered.\nRule 3: There is numerical information provided, but it is 

In [None]:
#gpt-3 listens actually better than gpt-4 in the next section
"""content='Reasoning:\nThe patient is a 77-year-old woman who presents with fever, malaise, and night sweats. 
She has a history of breast cancer in her 60s that was treated with radiation and chemotherapy. 
She also reports a history of extensive travel to Africa and a 30-pack-year history of smoking.
 On physical exam, several axillary lymph nodes are palpable with a large non-tender palpable mass in her right axilla
   measuring 10 x 8 cm. Fine-needle aspiration demonstrates what the pathologist describes as "a centroblastic
     and immunoblastic cell presence, suspicious for non-Hodgkin’s lymphoma (NHL)–diffuse large B cell variant".
       We are asked to determine which of the following risk factors is responsible for this patient\'s condition.
       \n\nRule check:\nRule 1: The question is interpreted accurately and broken down if necessary.
       \nRule 2: All relevant information is considered, and irrelevant information is not considered.
       \nRule 3: There is numerical information provided, but it is not relevant to this question.\n\n
       Final answer:\nThe patient\'s history of previous breast cancer is a risk factor for developing 
       non-Hodgkin\'s lymphoma. Therefore, the correct answer is E: Previous breast cancer.'"""

In [10]:
model = 'gpt-4'
chat = ChatOpenAI(
        temperature=0,
        model_name=model,
        request_timeout=600,
        max_retries=1,
    )

chat_prompt = ChatPromptTemplate.from_messages([system_message_prompt, human_message_prompt])

# get a chat completion from the formatted messages
chat(chat_prompt.format_prompt(answer_options="""
A: Travel to Africa
B: Axillary lymph node involvement
C: Previous radiation therapy
D: Female gender
E: Previous breast cancer""", question="A 77-year-old woman presents to the emergency room with the complaints of fever, malaise, and night sweats. She recently observed an enlargement of her axillary lymph nodes, which she examines on a weekly basis. She has a remote history of breast cancer in her 60s that was treated with radiation and chemotherapy. She also reports a history of extensive travel to Africa and a 30-pack-year history of smoking. On physical exam, several axillary lymph nodes are palpable with a large non-tender palpable mass in her right axilla measuring 10 x 8 cm. Fine-needle aspiration demonstrates what the pathologist describes as \"a centroblastic and immunoblastic cell presence, suspicious for non-Hodgkin’s lymphoma (NHL)–diffuse large B cell variant\". Which of the following risk factors is responsible for this patient’s condition?").to_messages())

AIMessage(content="Reasoning:\nThe question asks us to identify the risk factor responsible for the patient's condition, which is suspected to be non-Hodgkin's lymphoma (NHL)–diffuse large B cell variant. We need to consider the risk factors mentioned in the question and determine which one is most likely to be responsible for the patient's condition.\n\nA: Travel to Africa - While travel to Africa may expose a person to various health risks, it is not a known risk factor for NHL.\nB: Axillary lymph node involvement - This is a symptom of the patient's condition, not a risk factor.\nC: Previous radiation therapy - Radiation therapy is a known risk factor for developing NHL, as it can cause DNA damage and increase the risk of developing cancer.\nD: Female gender - Gender is not a significant risk factor for NHL.\nE: Previous breast cancer - While having a history of cancer may increase the risk of developing another cancer, the radiation therapy used to treat the breast cancer is a more

In [None]:
#gpt-4 does not adhere to answer format

"""content="Reasoning:\nThe question asks us to identify the risk factor responsible for the patient's condition,
 which is suspected to be non-Hodgkin's lymphoma (NHL)–diffuse large B cell variant. We need to consider the risk 
 factors mentioned in the question and determine which one is most likely to be responsible for the patient's condition
 .\n\nA: Travel to Africa - While travel to Africa may expose a person to various health risks, it is not a known risk 
 factor for NHL.\nB: Axillary lymph node involvement - This is a symptom of the patient's condition, not a risk factor.
 \nC: Previous radiation therapy - Radiation therapy is a known risk factor for developing NHL, as it can cause DNA 
 damage and increase the risk of developing cancer.\nD: Female gender - Gender is not a significant risk factor for NHL.
 \nE: Previous breast cancer - While having a history of cancer may increase the risk of developing another cancer, the 
 radiation therapy used to treat the breast cancer is a more direct risk factor for NHL.\n\nRule check:\n
 Rule 1: Accurate interpretation and breakdown of the question. (Pass)\n
 Rule 2: Consideration of all relevant information and exclusion of irrelevant information. (Pass)\n
 Rule 3: Sensitivity to the numerical information"
 """

## Define templates

In [None]:
#call different templates from a json/python file

In [25]:
#1 system template
system_template="""You are a scientific problem solver.

Your input consist of multiple choice questions. Based on the input, reason about the question and determine which of the answer options is the <final answer>.


"""
system_message_prompt = SystemMessagePromptTemplate.from_template(system_template)

In [36]:
#2 system template

system_template="""You are a scientific problem solver.

Your input consist of multiple choice questions. Based on the input, reason about the question and determine which of the answer options is the <final answer>.

Analyse the reasoning and answer conform the following rules, judging whether the reasoning and answer adhere to it.

Based on the judgment, change the reasoning and answer if needed.

<final answer based on the reasoning and the judgment>
"""
system_message_prompt = SystemMessagePromptTemplate.from_template(system_template)

In [7]:
#3 system template

system_template="""You are a scientific problem solver. Your input consists of multiple choice questions. Based on the input, reason about the question and determine which of the answer options is the <answer>.

Analyze the reasoning and answer conform to the defined rules, judging whether the reasoning and answer adhere to it.

Based on the judgment, change the reasoning and answer if needed.

Rules:

1. The response interprets the question accurately and breaks it down if necessary. <judge whether the question could mean somethings else>
2. The response considers all relevant information, and does not consider irrelevant information. <judge whether more or  less information should be considered>
3. The response picks the most logical answer option based on common sense. <judge whether the chosen answer is logical and whether another answer option could be more logical>

Aim for an informative level of detail. Ensure that every step logically follows up on all previous steps and that processes in subsequent steps are informed by previous steps. Format your response following this schema:

Reasoning:
<Reasoning about the question>

Rule check:
Rule 1: <judgment>
Rule 2: <judgment>
Rule 3: <judgment>

Final answer:
<final answer based on the reasoning and the judgment>
"""
system_message_prompt = SystemMessagePromptTemplate.from_template(system_template)

In [18]:
#4 system template

system_template="""You are a scientific problem solver. Your input consists of multiple choice questions. Based on the input, reason about the question and determine which of the answer options is the <answer>.

Then, act as a critical evaluator and judge whether the reasoning and answer adhere to the given rules.

Based on the judgment, change the reasoning and answer if needed.

Rules:

1. The response interprets the question accurately and breaks it down if necessary. <judge whether the question could mean somethings else>
2. The response considers all relevant information, and does not consider irrelevant information. <judge whether more or  less information should be considered>
3. The response picks the most logical answer option based on common sense. <judge whether the chosen answer is logical and whether another answer option could be more logical>

Aim for an informative level of detail. Ensure that every step logically follows up on all previous steps and that processes in subsequent steps are informed by previous steps. Format your response following this schema:

Reasoning:
<Reasoning about the question>

Rule check:
Rule 1: <judgment>
Rule 2: <judgment>
Rule 3: <judgment>

Final answer:
<final answer based on the reasoning and the judgment>
"""
system_message_prompt = SystemMessagePromptTemplate.from_template(system_template)



In [9]:
#human template (json formatting) + gpt 4
human_template="""Answer the question:{question}\nwith the following answer options:{answer_options}\nProvide the final answer as a single letter (A-D)'"""
#in json format with the key 'final_answer
human_message_prompt = HumanMessagePromptTemplate.from_template(human_template)

In [19]:
chat_prompt = ChatPromptTemplate.from_messages([system_message_prompt, human_message_prompt])

In [15]:
import json
input_dict = {'chat':chat,'chat_prompt':chat_prompt}

## Generate reasoning with system prompt

In [16]:
worldtree.system_generate(input_dict,name='worldtree', split='train')

Generating worldtree...


In [None]:
#chatgpt system template 3

"""The question asks which of the given characteristics is a learned behavior. 
Being tall and having curly hair are genetic traits and not learned behaviors. 
Having freckles is also a genetic trait and not a learned behavior. 
Being a good cook, on the other hand, is a skill that can be learned through practice and experience.\n\n
Rule check:\nRule 1: The question is straightforward and does not require any further breakdown.\n
Rule 2: All the given characteristics are considered, and only the relevant information is used to determine the answer.\n
Rule 3: The most logical answer based on common sense is C) being a good cook.\n\n
Final answer:\nC) being a good cook.
"""

In [None]:
#gpt-4 system template 4

"""A learned behavior is a behavior that is acquired through experience or learning, rather than being inherited 
genetically. In this case, we can analyze each characteristic to determine if it is a learned behavior or not:
\nA) Being tall - This is a genetic trait, not a learned behavior.\nB) Having curly hair - This is also a genetic trait,
 not a learned behavior.\nC) Being a good cook - This is a skill that can be learned and improved through practice,
   making it a learned behavior.\nD) Having freckles - This is a genetic trait, not a learned behavior.\n\n
   Rule check:\nRule 1: The question is interpreted accurately and broken down as necessary.\
    nRule 2: All relevant information is considered, and no irrelevant information is considered.\n
    Rule 3: The most logical answer option based on common sense is chosen.\n\n
    Final answer:\nC) being a good cook.
"""

In [17]:
worldtree['worldtree']['train'][0]

{'id': 'worldtree_train_1577',
 'ref_id': '',
 'question': 'A parent and a child share several characteristics. Both individuals are tall, have curly hair, are good cooks, and have freckles. Which of these characteristics is a learned behavior?',
 'type': 'multiplechoice',
 'choices': ['being tall',
  'having curly hair',
  'being a good cook',
  'having freckles'],
 'context': '',
 'cot': ['Skills are learned characteristics.',
  'A behavior is a kind of characteristic.',
  'Cooking is a kind of skill for preparing food.'],
 'answer': ['being a good cook'],
 'generated_cot': [{'id': '90fc13ba-0bd0-4b5e-b848-4845cb9565c3',
   'fragments_version': '0.01',
   'instruction': '',
   'cot_trigger': '',
   'cot_trigger_template': '',
   'prompt_text': '',
   'cot': 'Reasoning: \nThe question asks which of the given characteristics is a learned behavior. Being tall and having curly hair are genetic traits and not learned behaviors. Having freckles is also a genetic trait and not a learned beh

In [32]:
#system prompt 4
medqa.system_generate(input_dict,name='med_qa', split='test')

Generating med_qa...


In [33]:
medqa['med_qa']['test'][0]

{'id': 'med_qa_test_788',
 'ref_id': '',
 'question': 'A 26-year-old gravida 2 para 1 at 24 weeks gestation is admitted to the labor and delivery suite with mild abdominal cramps, uterine contractions, and a watery vaginal discharge. She has a history of preterm birth. The vital signs are as follows: blood pressure 125/80 mm Hg; heart rate 100/min; respiratory rate 13/min; and temperature 36.6℃ (97.9℉). The pelvic examination reveals cervical softening and shortening. Transvaginal ultrasound shows a cervical length of 12 mm, which is consistent with preterm labor. A tocolytic and a single dose of betamethasone are administered. Betamethasone stimulates which fetal cells?',
 'type': 'multiplechoice',
 'choices': ['Type I pneumocytes',
  'Goblet cells',
  'Bronchial epithelial cells',
  'Type II pneumocytes',
  'Vascular smooth myocytes'],
 'context': '',
 'cot': [],
 'answer': ['Type II pneumocytes'],
 'generated_cot': [{'id': 'fbe571bb-7b03-4997-95d1-f02debd9a5af',
   'fragments_versio

In [None]:
""""
Betamethasone is a corticosteroid that is administered to pregnant women at risk of preterm labor to help 
accelerate fetal lung maturation. The primary target of betamethasone in the fetal lungs is the Type II pneumocytes.
 These cells are responsible for producing and secreting surfactant, a substance that reduces surface tension in 
 the alveoli and prevents lung collapse. By stimulating Type II pneumocytes, betamethasone helps to increase 
 surfactant production, which improves the chances of the preterm infant being able to breathe effectively 
 after birth.\n\nRule check:\nRule 1: The question is interpreted accurately and focuses on the fetal cells 
 stimulated by betamethasone.\nRule 2: The relevant information is considered, and no irrelevant information is 
 included.\nRule 3: The most logical answer option based on common sense and medical knowledge is chosen.
 \n\nFinal answer:\nD) Type II pneumocytes'


"""

## Templates from Biosimulator


In [None]:
#question template
"""
Given the scientific context in the paragraph below, 
simulate the following experiment with at least 5 steps and increased novelty: 
"{experiment}". As the final outcome provide an answer to the following question: "{question}"

Scientific context:
{introduction_truncated}"""


In [None]:
#system template:

"""
You are a science-based, comprehensive and mechanistic simulator of biomedical processes across all levels of biology: molecular, cellular, organ, and organism.

Your input consists of simulation parameters. Based on the input, you simulate all relevant processes that unfold step-by-step until a final outcome can be directly inferred from the simulation

Simulation rules:
- Begin the simulation at the level of biology matching the input best.
- Ensure that each step logically informs the next step.
- Use as many steps as necessary.
- Conclude the simulation with a final outcome, once it can be directly inferred from the simulation steps.

Aim for an informative level of detail. Ensure that every step logically follows up on all previous steps and that processes in subsequent steps are informed by previous steps. Format your response in YAML, following this schema:

parameters:
  - <first relevant parameter for simulation>
  - <second relevant parameter for simulation>
  - ...
simulation:
  - step: 1
    level: <Indicate the level of biology of this step.>
    facts: <Provide a comprehensive overview of facts about the entities and processes you are considering, including facts that are not stated in the query. Attempt to include gene regulation, protein interactions, cell types, tissue functions, and organ functions that might influence the step and its consequences. Avoid repeating any facts that you already provided. Mention facts that might become relevant later. Provide references for all facts you list at the end only using the provided structure and indicate it using [1].>
    consequence: <Generate the most probable consequence, given the facts and any previously generated consequences. Given the level of biology of this step, provide fitting specifics like different types of entities (e.g. small molecules, RNA, DNA, proteins, metabolites, cell types, tissues, organs) and processes (e.g., interactions, pathways, biological processes, biochemical reactions, tissue function, organ function). For example: decreased <gene X> expression by <cell type X>, increased <metabolite X> levels in the blood, changed <morphology X> in <tissue X>, increased <organ X> function, systemic <phenotype X>, etc.>
    probability: <The probability of the consequence happening on a scale of 0 to 100.>
    explanation: <Offer a good explanation as evidence that supports the listed consequence.>
    novelty: <How novel or unconventional is your reasoning on a scale of 0 to 100.>
  - step: 2
  ...
conclusion:
  outcome: <Infer the final outcome from the last simulation step.>
  explanation: <Offer a good explanation as evidence that supports the final outcome.>

references:
  "[1]": "<authors> <year> <journal> <title of the publication>"
  "[2]": "<authors> <year> <journal> <title of the publication>"
  ...



"""

## Extra

In [None]:
from cot import Collection
