## Imports, data and model setup

In [2]:
"""Imports"""
from langchain.chat_models import ChatOpenAI
from langchain.prompts.chat import (
    ChatPromptTemplate,
    SystemMessagePromptTemplate,
    AIMessagePromptTemplate,
    HumanMessagePromptTemplate,
)
from langchain.schema import (
    AIMessage,
    HumanMessage,
    SystemMessage
)

from cot import Collection
import yaml

In [24]:
"""data"""

worldtree = Collection(["worldtree"], verbose=False)
worldtree = worldtree.select(split="train", number_samples=1, random_samples=True, seed=0)

medqa = Collection(["med_qa"], verbose=False)
medqa = medqa.select(split="test", number_samples=1, random_samples=True, seed=0)

Loading worldtree...
Loading med_qa...


In [5]:
"""Chat model"""
model = 'gpt-3.5-turbo'
# model = 'gpt-4'
chat = ChatOpenAI(
        temperature=0,
        model_name=model,
        request_timeout=600,
        max_retries=1,
    )

## Initial experimentation

In [16]:
"""Templates"""

with open('../reflection_messages/input_system/sytem_short_list.yaml', 'r') as file:
    template = file.read()
system_message_prompt = SystemMessagePromptTemplate.from_template(template)

human_template="""Answer the question:{question}\nwith the following answer options:{answer_options}"""
human_message_prompt = HumanMessagePromptTemplate.from_template(human_template)

chat_prompt = ChatPromptTemplate.from_messages([system_message_prompt, human_message_prompt])

In [7]:
# get a chat completion from the formatted messages
chat(chat_prompt.format_prompt(answer_options="""
A: Travel to Africa
B: Axillary lymph node involvement
C: Previous radiation therapy
D: Female gender
E: Previous breast cancer""", question="A 77-year-old woman presents to the emergency room with the complaints of fever, malaise, and night sweats. She recently observed an enlargement of her axillary lymph nodes, which she examines on a weekly basis. She has a remote history of breast cancer in her 60s that was treated with radiation and chemotherapy. She also reports a history of extensive travel to Africa and a 30-pack-year history of smoking. On physical exam, several axillary lymph nodes are palpable with a large non-tender palpable mass in her right axilla measuring 10 x 8 cm. Fine-needle aspiration demonstrates what the pathologist describes as \"a centroblastic and immunoblastic cell presence, suspicious for non-Hodgkin’s lymphoma (NHL)–diffuse large B cell variant\". Which of the following risk factors is responsible for this patient’s condition?").to_messages())

AIMessage(content='Reasoning:\nThe patient is a 77-year-old woman who presents with fever, malaise, and night sweats. She has a history of breast cancer in her 60s that was treated with radiation and chemotherapy. She also reports a history of extensive travel to Africa and a 30-pack-year history of smoking. On physical exam, several axillary lymph nodes are palpable with a large non-tender palpable mass in her right axilla measuring 10 x 8 cm. Fine-needle aspiration demonstrates what the pathologist describes as "a centroblastic and immunoblastic cell presence, suspicious for non-Hodgkin’s lymphoma (NHL)–diffuse large B cell variant". We are asked to determine which of the following risk factors is responsible for this patient\'s condition.\n\nAnswer:\nE: Previous breast cancer\n\nRule check:\nRule 1: The reasoning is accurate and breaks down the question to determine which risk factor is responsible for the patient\'s condition.\nRule 2: The response considers all relevant informatio

In [None]:
# observe that GPT-3.5 gives no explanation

"""content='Reasoning:\nThe patient is a 77-year-old woman who presents with fever, malaise, and night sweats. 
She has a history of breast cancer in her 60s that was treated with radiation and chemotherapy. 
She also reports a history of extensive travel to Africa and a 30-pack-year history of smoking. 
On physical exam, several axillary lymph nodes are palpable with a large non-tender palpable mass 
in her right axilla measuring 10 x 8 cm. Fine-needle aspiration demonstrates what the pathologist 
describes as "a centroblastic and immunoblastic cell presence, suspicious for non-Hodgkin’s lymphoma 
(NHL)–diffuse large B cell variant". We are asked to determine which of the following risk factors is 
responsible for this patient\'s condition.\n\nAnswer:\nE: Previous breast cancer\n\n
Rule check:\nRule 1: The reasoning is accurate and breaks down the question to determine which risk factor
 is responsible for the patient\'s condition.\nRule 2: The response considers all relevant information, 
 including the patient\'s history of breast cancer, extensive travel to Africa, and smoking history, 
 as well as the physical exam findings and fine-needle aspiration results.
 \nRule 3: The response is sensitive to the numerical information provided'
"""

In [8]:
with open('../reflection_messages/input_system/sytem_template_0.yaml', 'r') as file:
    template = file.read()
system_message_prompt = SystemMessagePromptTemplate.from_template(template)

In [None]:
#gpt-3 listens actually better than gpt-4 in the next section

#gpt-3.5
"""content='Reasoning:\nThe patient is a 77-year-old woman who presents with fever, malaise, and night sweats. 
She has a history of breast cancer in her 60s that was treated with radiation and chemotherapy. 
She also reports a history of extensive travel to Africa and a 30-pack-year history of smoking.
 On physical exam, several axillary lymph nodes are palpable with a large non-tender palpable mass in her right axilla
   measuring 10 x 8 cm. Fine-needle aspiration demonstrates what the pathologist describes as "a centroblastic
     and immunoblastic cell presence, suspicious for non-Hodgkin’s lymphoma (NHL)–diffuse large B cell variant".
       We are asked to determine which of the following risk factors is responsible for this patient\'s condition.
       \n\nRule check:\nRule 1: The question is interpreted accurately and broken down if necessary.
       \nRule 2: All relevant information is considered, and irrelevant information is not considered.
       \nRule 3: There is numerical information provided, but it is not relevant to this question.\n\n
       Final answer:\nThe patient\'s history of previous breast cancer is a risk factor for developing 
       non-Hodgkin\'s lymphoma. Therefore, the correct answer is E: Previous breast cancer.'"""

In [None]:
#gpt-4 does not adhere to answer format

"""content="Reasoning:\nThe question asks us to identify the risk factor responsible for the patient's condition,
 which is suspected to be non-Hodgkin's lymphoma (NHL)–diffuse large B cell variant. We need to consider the risk 
 factors mentioned in the question and determine which one is most likely to be responsible for the patient's condition
 .\n\nA: Travel to Africa - While travel to Africa may expose a person to various health risks, it is not a known risk 
 factor for NHL.\nB: Axillary lymph node involvement - This is a symptom of the patient's condition, not a risk factor.
 \nC: Previous radiation therapy - Radiation therapy is a known risk factor for developing NHL, as it can cause DNA 
 damage and increase the risk of developing cancer.\nD: Female gender - Gender is not a significant risk factor for NHL.
 \nE: Previous breast cancer - While having a history of cancer may increase the risk of developing another cancer, the 
 radiation therapy used to treat the breast cancer is a more direct risk factor for NHL.\n\nRule check:\n
 Rule 1: Accurate interpretation and breakdown of the question. (Pass)\n
 Rule 2: Consideration of all relevant information and exclusion of irrelevant information. (Pass)\n
 Rule 3: Sensitivity to the numerical information"
 """

In [15]:
import json
input_dict = {'chat':chat,'chat_prompt':chat_prompt}

In [16]:
worldtree.system_generate(input_dict,name='worldtree', split='train')

Generating worldtree...


In [None]:
#chatgpt system template 3

"""The question asks which of the given characteristics is a learned behavior. 
Being tall and having curly hair are genetic traits and not learned behaviors. 
Having freckles is also a genetic trait and not a learned behavior. 
Being a good cook, on the other hand, is a skill that can be learned through practice and experience.\n\n
Rule check:\nRule 1: The question is straightforward and does not require any further breakdown.\n
Rule 2: All the given characteristics are considered, and only the relevant information is used to determine the answer.\n
Rule 3: The most logical answer based on common sense is C) being a good cook.\n\n
Final answer:\nC) being a good cook.
"""

In [None]:
#gpt-4 system template 4

"""A learned behavior is a behavior that is acquired through experience or learning, rather than being inherited 
genetically. In this case, we can analyze each characteristic to determine if it is a learned behavior or not:
\nA) Being tall - This is a genetic trait, not a learned behavior.\nB) Having curly hair - This is also a genetic trait,
 not a learned behavior.\nC) Being a good cook - This is a skill that can be learned and improved through practice,
   making it a learned behavior.\nD) Having freckles - This is a genetic trait, not a learned behavior.\n\n
   Rule check:\nRule 1: The question is interpreted accurately and broken down as necessary.\
    nRule 2: All relevant information is considered, and no irrelevant information is considered.\n
    Rule 3: The most logical answer option based on common sense is chosen.\n\n
    Final answer:\nC) being a good cook.
"""

In [None]:
medqa.system_generate(input_dict)
""""
Betamethasone is a corticosteroid that is administered to pregnant women at risk of preterm labor to help 
accelerate fetal lung maturation. The primary target of betamethasone in the fetal lungs is the Type II pneumocytes.
 These cells are responsible for producing and secreting surfactant, a substance that reduces surface tension in 
 the alveoli and prevents lung collapse. By stimulating Type II pneumocytes, betamethasone helps to increase 
 surfactant production, which improves the chances of the preterm infant being able to breathe effectively 
 after birth.\n\nRule check:\nRule 1: The question is interpreted accurately and focuses on the fetal cells 
 stimulated by betamethasone.\nRule 2: The relevant information is considered, and no irrelevant information is 
 included.\nRule 3: The most logical answer option based on common sense and medical knowledge is chosen.
 \n\nFinal answer:\nD) Type II pneumocytes'


"""

## Experiment template

In [None]:
#select a data sample
medqa = Collection(["med_qa"], verbose=False)
medqa = medqa.select(split="test", number_samples=1, random_samples=True, seed=0)

In [None]:
#Choose a chat model
model = 'gpt-3.5-turbo'

chat = ChatOpenAI(
        temperature=0,
        model_name=model,
        request_timeout=600,
        max_retries=1,
    )

In [None]:
#choose a system template

with open('../reflection_messages/input_system/sytem_short_list.yaml', 'r') as file:
    template = file.read()
system_message_prompt = SystemMessagePromptTemplate.from_template(template)
chat_prompt = ChatPromptTemplate.from_messages([system_message_prompt, human_message_prompt])

In [None]:
#run

import json
input_dict = {'chat':chat,'chat_prompt':chat_prompt}
worldtree.system_generate(input_dict,name='worldtree', split='train')

In [32]:
# Helper function to call OpenAI model

def system_experiment(data,model, path_system_template):
    chat = ChatOpenAI(
        temperature=0,
        model_name=model,
        request_timeout=600,
        max_retries=1,
    )

    human_template="""Answer the question:{question}\nwith the following answer options:{answer_options}"""
    human_message_prompt = HumanMessagePromptTemplate.from_template(human_template)
    
    with open(path_system_template, 'r') as file:
        template = file.read()
    system_message_prompt = SystemMessagePromptTemplate.from_template(template)
    
    chat_prompt = ChatPromptTemplate.from_messages([system_message_prompt, human_message_prompt])  
    input_dict = {'chat':chat,'chat_prompt':chat_prompt} 
    
    return data.system_generate(input_dict)



In [33]:
absolute_path = "/Users/robertpraas/Desktop/ThoughtSource/notebooks/reflection_messages/input_system/system_short_list.yaml"
system_experiment(medqa,'gpt-3.5-turbo',absolute_path)

test

Generating med_qa...
