In [1]:
def create_prompt(entity, text, category, reasoning_style):
    categories = {
        "simple": {
            "positive": f"Determine if the condition '{entity}' is currently affecting the patient as mentioned in the text: '{text}'.",
            "negated": f"Identify if the condition '{entity}' is not present in the patient as mentioned in the text: '{text}'.",
            "possible": f"Assess if there is a possibility of the condition '{entity}' affecting the patient as mentioned in the text: '{text}'.",
            "family": f"Verify if the condition '{entity}' is related to the patient's family member as mentioned in the text: '{text}'.",
            "historical": f"Check if the condition '{entity}' is part of the patient's historical medical records as mentioned in the text: '{text}'.",
            "hypothetical": f"Consider if the condition '{entity}' is mentioned hypothetically as it might affect the patient as mentioned in the text: '{text}'."
        },
        "cot": {
            "positive": f"Analyze step by step if '{entity}' is currently affecting the patient based on the sentence: '{text}'.",
            "negated": f"Sequentially examine if '{entity}' does not exist in the patient's condition as stated in the sentence: '{text}'.",
            "possible": f"Consider the possibility that '{entity}' could affect the patient, as suggested in the sentence: '{text}'.",
            "family": f"Determine if '{entity}' is associated with the patient's family history as mentioned in the sentence: '{text}'.",
            "historical": f"Investigate if '{entity}' is a part of the patient's past conditions as described in the sentence: '{text}'.",
            "hypothetical": f"Assess if '{entity}' is discussed in a hypothetical context in the sentence: '{text}'."
        },
        "tot": {
            "positive": f"Explore various reasons to confirm if '{entity}' is present in the patient as indicated in the sentence: '{text}'.",
            "negated": f"Explore various reasons to confirm if '{entity}' is absent in the patient as indicated in the sentence: '{text}'.",
            "possible": f"Explore various possibilities that '{entity}' might be affecting the patient as indicated in the sentence: '{text}'.",
            "family": f"Trace the family tree to see if '{entity}' is a condition shared with family members as mentioned in the sentence: '{text}'.",
            "historical": f"Trace back through medical history to confirm if '{entity}' is a historical condition as mentioned in the sentence: '{text}'.",
            "hypothetical": f"Consider different hypothetical scenarios where '{entity}' could potentially affect the patient as mentioned in the sentence: '{text}'."
        },
        "self_consistency": {
            "positive": f"Generate multiple reasoning paths to see if there is a consensus that '{entity}' is currently affecting the patient as stated in the sentence: '{text}'.",
            "negated": f"Generate multiple reasoning paths to see if there is a consensus that '{entity}' is not currently affecting the patient as stated in the sentence: '{text}'.",
            "possible": f"Generate multiple reasoning paths to see if there is a consensus on the possibility of '{entity}' affecting the patient as stated in the sentence: '{text}'.",
            "family": f"Generate multiple reasoning paths to check for consistency in references to '{entity}' within the patient's family history as mentioned in the sentence: '{text}'.",
            "historical": f"Generate multiple reasoning paths to verify if '{entity}' is consistently referred to as a historical condition in the sentence: '{text}'.",
            "hypothetical": f"Generate multiple reasoning paths to determine if the mention of '{entity}' is consistently hypothetical as stated in the sentence: '{text}'."
        }
    }

    if reasoning_style.lower() not in categories or category.lower() not in categories[reasoning_style.lower()]:
        raise ValueError(f"The reasoning style '{reasoning_style}' with category '{category}' is not defined.")
    
    reasoning_instructions = categories[reasoning_style.lower()][category.lower()]
    
    prompt = (
        f"Please assess the following statement and determine if it's true or false: '{reasoning_instructions}'"
        " Your response should only contain 'True' or 'False'."
    )
    
    return prompt

# Example usage for the "simple" reasoning style without specific reasoning methods:
entity = "sleep apnea"
text = "The patient has recently been diagnosed with sleep apnea."
category = "negated"
reasoning_style = "simple"

# Generate the prompt
simple_prompt = create_prompt(entity, text, category, reasoning_style)
print(simple_prompt)

# The output should be a prompt asking for a True/False response based on the category and entity.


Please assess the following statement and determine if it's true or false: 'Identify if the condition 'sleep apnea' is not present in the patient as mentioned in the text: 'The patient has recently been diagnosed with sleep apnea.'.' Your response should only contain 'True' or 'False'.


In [2]:
filename = "/home/yuj49/ConText/ConText_LLM/i2b2.json"
import json 
# get a token: https://replicate.com/account
from getpass import getpass
import os

REPLICATE_API_TOKEN = getpass()
os.environ["REPLICATE_API_TOKEN"] = "r8_O1DRLBbf3PpDdwQ06vIGXnttWboYGUt2bfYE4"
import replicate
# get a token: https://replicate.com/account
from getpass import getpass
import os
def generate(prompt):
    output = replicate.run(
      "meta/llama-2-13b-chat:f4e2de70d66816a838a89eeeb621910adffb0dd0baba3976c96980970978018d",
      input={
        "debug": False,
        "top_k": 50,
        "top_p": 1,
        "prompt": prompt,
        "temperature": 0.75,
        "system_prompt": "You are a helpful, respectful and honest assistant. Always answer True or False.",
        "max_new_tokens": 500,
        "min_new_tokens": -1
      }
    )
    res = []
    for j in output:
        res.append(j)  
    # print(" ".join(res))
    return res
    
def generate_and_store_output(file, reasoning_styles):
    new_res = []
    print(len(file))
    for item in file:
        new_i = {"label": item["label"], "entity": item["entity"], "file_name": item["file_name"], "text": item["text"]}
        for style in reasoning_styles:
            prompt = create_prompt(item["entity"], item["text"], item["label"], style)
            # This is where you would interface with your language model API
            output = generate(prompt)
            response = " ".join(output)
            print(response)
            prediction_key = f"pred_{style}_{item['label']}"
            new_i[prediction_key] = response
        new_res.append(new_i)
    return new_res

 ········


In [10]:
filename = "/home/yuj49/ConText/ConText_LLM/i2b2.json"
reasoning_styles = ["simple", "cot", "tot", "self_consistency"]
new_res = []
with open(filename, "r") as f:
    file = json.load(f)
    file = file[-1:]
    print(len(file))
    
    for idx, item in enumerate(file):
        new_i = {"label": item["label"], "entity": item["entity"], "file_name": item["file_name"], "text": item["text"]}
        for style in reasoning_styles:
            prompt = create_prompt(item["entity"], item["text"], item["label"], style)
            # This is where you would interface with your language model API
            output = generate(prompt)
            response = " ".join(output)
            print(response)
            prediction_key = f"pred_{style}_{item['label']}"
            new_i[prediction_key] = response
        print(idx)
        
        new_res.append(new_i)
    # results = generate_and_store_output(file, reasoning_styles)


1
  True .  Based  on  the  text  provided ,  the  patient  has  been  diagn osed  with  p neum onia .
  True .  Based  on  the  given  sentence ,  there  is  a  history  of  tra uma ,  con gest ive  heart  failure ,  fe ver ,  and  p neum onia ,  which  suggests  that  the  patient  may  be  experien cing  sympt oms  related  to  p neum onia .
  Sure !  Here ' s  my  assess ment : 
 
 True
  True .


In [11]:
new_res

[{'label': 'positive',
  'entity': 'pneumonia',
  'file_name': 'record-54',
  'text': 'HISTORY : Trauma , congestive heart failure , fever , pneumonia .\n',
  'pred_simple_positive': '  True .  Based  on  the  text  provided ,  the  patient  has  been  diagn osed  with  p neum onia .',
  'pred_cot_positive': '  True .  Based  on  the  given  sentence ,  there  is  a  history  of  tra uma ,  con gest ive  heart  failure ,  fe ver ,  and  p neum onia ,  which  suggests  that  the  patient  may  be  experien cing  sympt oms  related  to  p neum onia .',
  'pred_tot_positive': "  Sure !  Here ' s  my  assess ment : \n \n True",
  'pred_self_consistency_positive': '  True .'}]

In [8]:
i2b2_llama2 = "/home/yuj49/ConText/ConText_LLM/true_false.csv"
import pandas as pd
df = pd.read_csv(i2b2_llama2)
df_filtered = df[df['llama2'].str.contains('true|false', case=False, na=False)]
print(len(df_filtered))

5322


In [13]:
df_filtered.to_csv("true_false_1.csv", index=False)

In [16]:
new_df = pd.read_csv("/home/yuj49/ConText/ConText_LLM/true_false_1.csv")
# 预处理数据：合并因换行而分开的行
# 如果一行的最后有冒号':'，则将其与下一行合并
for index in new_df.index[:-1]:  # 遍历到倒数第二行
    if str(new_df.loc[index, 'llama2']).strip().endswith(':'):
        new_df.loc[index, 'llama2'] += ' ' + new_df.loc[index + 1, 'llama2']
        new_df.loc[index + 1, 'llama2'] = pd.NA  # 将下一行的内容设置为NaN

# 删除被设置为NaN的行
new_df.dropna(subset=['llama2'], inplace=True)

# 筛选包含"true"或"false"（不区分大小写）的行
df_filtered = new_df[new_df['llama2'].str.contains('true|false', case=False, na=False)]

# 打印筛选后的DataFrame的长度
print(len(df_filtered))


5299


In [5]:
with open("/home/yuj49/ConText/ConText_LLM/llama2_i2b2.json", "w") as f:
    json.dump(results, f)