In [None]:
from glob import glob
import random,os
import pandas as pd
import time

In [None]:
# MTSamples
dataset = 'MTSamples'
files = glob('./data/MTSamples/test/*.bio')
random.seed(42)
#files = random.sample(files,k=100)
len(files)

In [None]:
training_files = glob('./data/MTSamples/train/*.bio')
random.seed(42)

all_sentences = []
for sample in training_files:
    with open(sample) as f_sample:
        all_sentences.append(f_sample.read())

def get_few_shot_examples_merged(all_sentences):
    random.seed(42)
    samples = random.sample(all_sentences,k=5)

    one_shot_sentences = []
    for sentence in samples:
        one_shot_sentence = ''
        for token in sentence.split('\n'):
            one_shot_sentence+=token.split('\t')[0]+' '
        one_shot_sentences.append(one_shot_sentence)
        
    one_shot_entities = []
    for i1,sentence in enumerate(samples):
        one_shot_entity = {}
        for i2,token in enumerate(sentence.split('\n')):
            if token != '':
                if token.split('\t')[1].startswith('B'):
                    entity_type = token.split('\t')[1].split('-')[-1]
                    entity = token.split('\t')[0] + ' '
                    for i3 in range(i2+1,len(sentence.split('\n'))):
                        if sentence.split('\n')[i3].split('\t')[1].startswith('I'):
                            entity += sentence.split('\n')[i3].split('\t')[0] + ' '
                        else:
                            one_shot_entity.update({entity:entity_type})
                            break
        one_shot_entities.append(one_shot_entity)
        
    examples = []
    for sentence, entities in zip(one_shot_sentences,one_shot_entities):
        print(sentence, entities)
        for entity,entity_type in entities.items():
            start_idx = sentence.index(entity)
            sentence = sentence[:start_idx]+f'<span class="{entity_type}">'+entity.strip()+f'</span> ' + sentence[start_idx + len(entity):]

        examples.append(sentence)
        
    return one_shot_sentences,examples

In [None]:
all_sentences = [s for s in all_sentences if 'B-' in s]

In [None]:
one_shot_sentences,examples = get_few_shot_examples_merged(all_sentences)

In [None]:
print (one_shot_sentences[0])
print (examples[0])

In [None]:
for sentence in one_shot_sentences:
    print (sentence)
print ()
for example in examples:
    print (example)

# official ChatGPT API

In [None]:
import openai
from glob import glob

In [None]:
def get_output(prompt,GPT):
    if GPT == 3.5:
        openai.api_key = 'your api key'
        model = 'gpt-3.5-turbo-0301'
        #model = 'gpt-3.5-turbo'
    elif GPT == 4:
        openai.api_key = 'your api key'
        model = 'gpt-4-0314'

    message = openai.ChatCompletion.create(
      model=model,
      temperature=0,
      messages=[
            {"role": "user", "content": prompt}
        ]
    )
    return message['choices'][0]['message']['content']

In [None]:
def run(GPT,prompt,prompt_type):
    for i,file in enumerate(files):
        with open(file,'r') as f_read:
            text = ' '.join([line.split('\t')[0] for line in f_read.read().splitlines()])
        file_name = file.split('/')[-1].split('.')[0]

        dir_path = f'./GPT{GPT}_output/{dataset}/temperature0/{prompt_type}/'
        if not os.path.exists(dir_path):
            os.makedirs(dir_path)

        success = False
        while not success:
            try:
                output = get_output(prompt.format(text),GPT)
                with open(f'/data/yhu5/chatgpt/GPT{GPT}_output/{dataset}/temperature0/{prompt_type}/{file_name}.html','w') as f_write:
                    f_write.write(output)
                    success = True
            except Exception as e:
                print (e)
                pass

# GPT 3.5

### Zero-shot

In [None]:
prompt = '''### Task
Your task is to generate an HTML version of an input text, marking up specific entities related to healthcare. The entities to be identified are: 'medical problems', 'treatments', and 'tests'. Use HTML <span> tags to highlight these entities. Each <span> should have a class attribute indicating the type of the entity.

### Entity Markup Guide
Use <span class="problem"> to denote a medical problem.
Use <span class="treatment"> to denote a treatment.
Use <span class="test"> to denote a test.
Leave the text as it is if no such entities are found.

### Input Text: {}
### Output Text:
'''
GPT = 3.5
prompt_type = 'merged_prompt'
run(GPT,prompt,prompt_type)

### Zero-shot with definition

In [None]:
prompt = '''### Task
Your task is to generate an HTML version of an input text, marking up specific entities related to healthcare. The entities to be identified are: 'medical problems', 'treatments', and 'tests'. Use HTML <span> tags to highlight these entities. Each <span> should have a class attribute indicating the type of the entity.

### Entity Markup Guide
Use <span class="problem"> to denote a medical problem.
Use <span class="treatment"> to denote a treatment.
Use <span class="test"> to denote a test.
Leave the text as it is if no such entities are found.

### Entity Definitions
Medical Problems are defined as: phrases that contain observations made by patients or clinicians about the patient’s body or mind that are thought to be abnormal or caused by a disease. They are loosely based on the UMLS semantic types of pathologic functions, disease or syndrome, mental or behavioral dysfunction, cellormolecular dysfunction, congenital abnormality, acquired abnormality,injury or poisoning, anatomic abnormality, neoplasticprocess, virus/bacterium, sign or symptom, but are not limited by UMLScoverage.
Treatments are defined as: phrases that describe procedures, interventions, and substances given to a patient in an effort to resolve a medical problem. They are loosely based on the UMLS semantic types therapeutic or preventive procedure, medical device, steroid, pharmacologic substance, biomedical or dental material, antibiotic, clinical drug, and drug delivery device.  Other concepts that are treatments but that may not be found in UMLS are also included. Treatments that a patient had, will have, may have in the future, or are explicitly mentioned that the patient will not have are all marked as treatments.
Tests are defined as: phrases that describe procedures, panels, and measures that are done to a patient or a body fluid or sample in order to discover, rule out, or find more information about a medical problem. They are loosely based on the UMLS semantic types laboratory procedure, diagnostic procedure, but also include instances not covered by UMLS.

### Input Text: {}
### Output Text:
'''
GPT = 3.5
prompt_type = 'merged_prompt_definition'
run(GPT,prompt,prompt_type)

### zero-shot with definition and guidelines

In [None]:
prompt = '''### Task
Your task is to generate an HTML version of an input text, marking up specific entities related to healthcare. The entities to be identified are: 'medical problems', 'treatments', and 'tests'. Use HTML <span> tags to highlight these entities. Each <span> should have a class attribute indicating the type of the entity.

### Entity Markup Guide
Use <span class="problem"> to denote a medical problem.
Use <span class="treatment"> to denote a treatment.
Use <span class="test"> to denote a test.
Leave the text as it is if no such entities are found.

### Entity Definitions
Medical Problems are defined as: phrases that contain observations made by patients or clinicians about the patient’s body or mind that are thought to be abnormal or caused by a disease. They are loosely based on the UMLS semantic types of pathologic functions, disease or syndrome, mental or behavioral dysfunction, cellormolecular dysfunction, congenital abnormality, acquired abnormality,injury or poisoning, anatomic abnormality, neoplasticprocess, virus/bacterium, sign or symptom, but are not limited by UMLScoverage.
Treatments are defined as: phrases that describe procedures, interventions, and substances given to a patient in an effort to resolve a medical problem. They are loosely based on the UMLS semantic types therapeutic or preventive procedure, medical device, steroid, pharmacologic substance, biomedical or dental material, antibiotic, clinical drug, and drug delivery device.  Other concepts that are treatments but that may not be found in UMLS are also included. Treatments that a patient had, will have, may have in the future, or are explicitly mentioned that the patient will not have are all marked as treatments.
Tests are defined as: phrases that describe procedures, panels, and measures that are done to a patient or a body fluid or sample in order to discover, rule out, or find more information about a medical problem. They are loosely based on the UMLS semantic types laboratory procedure, diagnostic procedure, but also include instances not covered by UMLS.

### Annotation Guidelines
Only complete noun phrases (NPs) and adjective phrases (APs) should be marked. Terms that fit concept semantic rules, but that are only used as modifiers in a noun phrase should not be marked.
Include all modifiers with concepts when they appear in the same phrase except for assertion modifiers.
You can include up to one prepositional phrase (PP) following a markable concept if the PP does not contain a markable concept and either indicates an organ/body part or can be rearranged to eliminate the PP (we later call this the PP test).
Include articles and possessives.
Conjunctions and other syntax that denote lists should be included if they occur within the modifiers or are connected by a common set of modifiers. If the portions of the list are otherwise independent, they should not be included.  Similarly, when concepts are mentioned in more than one way in the same noun phrase (such as the definition of an acronym or where a generic and a brand name of a drug are used together), the concepts should be marked together.
Concepts should be mentioned in relation to the patient or someone else in the note. Section headers that provide formatting, but that are not specific to a person are not marked.

### Input Text: {}
### Output Text:
'''
GPT = 3.5
prompt_type = 'merged_prompt_definition_guidelines'
run(GPT,prompt,prompt_type)

### 1-shot

In [None]:
prompt = '''### Task
Your task is to generate an HTML version of an input text, marking up specific entities related to healthcare. The entities to be identified are: 'medical problems', 'treatments', and 'tests'. Use HTML <span> tags to highlight these entities. Each <span> should have a class attribute indicating the type of the entity.

### Entity Markup Guide
Use <span class="problem"> to denote a medical problem.
Use <span class="treatment"> to denote a treatment.
Use <span class="test"> to denote a test.
Leave the text as it is if no such entities are found.

### Examples 
Example Input1: At the time of admission , he denied fever , diaphoresis , nausea , chest pain or other systemic symptoms .  
Example Output1: At the time of admission , he denied <span class="problem">fever</span> , <span class="problem">diaphoresis</span> , <span class="problem">nausea</span> , <span class="problem">chest pain</span> or other systemic symptoms .  

### Input Text: {}
### Output Text:
'''
GPT = 3.5
prompt_type = 'merged_prompt_oneshot'
run(GPT,prompt,prompt_type)

### 1-shot with definition

In [None]:
prompt = '''### Task
Your task is to generate an HTML version of an input text, marking up specific entities related to healthcare. The entities to be identified are: 'medical problems', 'treatments', and 'tests'. Use HTML <span> tags to highlight these entities. Each <span> should have a class attribute indicating the type of the entity.

### Entity Markup Guide
Use <span class="problem"> to denote a medical problem.
Use <span class="treatment"> to denote a treatment.
Use <span class="test"> to denote a test.
Leave the text as it is if no such entities are found.

### Entity Definitions
Medical Problems are defined as: phrases that contain observations made by patients or clinicians about the patient’s body or mind that are thought to be abnormal or caused by a disease. They are loosely based on the UMLS semantic types of pathologic functions, disease or syndrome, mental or behavioral dysfunction, cellormolecular dysfunction, congenital abnormality, acquired abnormality,injury or poisoning, anatomic abnormality, neoplasticprocess, virus/bacterium, sign or symptom, but are not limited by UMLScoverage.
Treatments are defined as: phrases that describe procedures, interventions, and substances given to a patient in an effort to resolve a medical problem. They are loosely based on the UMLS semantic types therapeutic or preventive procedure, medical device, steroid, pharmacologic substance, biomedical or dental material, antibiotic, clinical drug, and drug delivery device.  Other concepts that are treatments but that may not be found in UMLS are also included. Treatments that a patient had, will have, may have in the future, or are explicitly mentioned that the patient will not have are all marked as treatments.
Tests are defined as: phrases that describe procedures, panels, and measures that are done to a patient or a body fluid or sample in order to discover, rule out, or find more information about a medical problem. They are loosely based on the UMLS semantic types laboratory procedure, diagnostic procedure, but also include instances not covered by UMLS.

### Examples 
Example Input1: At the time of admission , he denied fever , diaphoresis , nausea , chest pain or other systemic symptoms .  
Example Output1: At the time of admission , he denied <span class="problem">fever</span> , <span class="problem">diaphoresis</span> , <span class="problem">nausea</span> , <span class="problem">chest pain</span> or other systemic symptoms .  

### Input Text: {}
### Output Text:
'''
GPT = 3.5
prompt_type = 'merged_prompt_definition_oneshot'
run(GPT,prompt,prompt_type)

### 1-shot with definition and guidelines

In [None]:
prompt = '''### Task
Your task is to generate an HTML version of an input text, marking up specific entities related to healthcare. The entities to be identified are: 'medical problems', 'treatments', and 'tests'. Use HTML <span> tags to highlight these entities. Each <span> should have a class attribute indicating the type of the entity.

### Entity Markup Guide
Use <span class="problem"> to denote a medical problem.
Use <span class="treatment"> to denote a treatment.
Use <span class="test"> to denote a test.
Leave the text as it is if no such entities are found.

### Entity Definitions
Medical Problems are defined as: phrases that contain observations made by patients or clinicians about the patient’s body or mind that are thought to be abnormal or caused by a disease. They are loosely based on the UMLS semantic types of pathologic functions, disease or syndrome, mental or behavioral dysfunction, cellormolecular dysfunction, congenital abnormality, acquired abnormality,injury or poisoning, anatomic abnormality, neoplasticprocess, virus/bacterium, sign or symptom, but are not limited by UMLScoverage.
Treatments are defined as: phrases that describe procedures, interventions, and substances given to a patient in an effort to resolve a medical problem. They are loosely based on the UMLS semantic types therapeutic or preventive procedure, medical device, steroid, pharmacologic substance, biomedical or dental material, antibiotic, clinical drug, and drug delivery device.  Other concepts that are treatments but that may not be found in UMLS are also included. Treatments that a patient had, will have, may have in the future, or are explicitly mentioned that the patient will not have are all marked as treatments.
Tests are defined as: phrases that describe procedures, panels, and measures that are done to a patient or a body fluid or sample in order to discover, rule out, or find more information about a medical problem. They are loosely based on the UMLS semantic types laboratory procedure, diagnostic procedure, but also include instances not covered by UMLS.

### Annotation Guidelines
Only complete noun phrases (NPs) and adjective phrases (APs) should be marked. Terms that fit concept semantic rules, but that are only used as modifiers in a noun phrase should not be marked.
Include all modifiers with concepts when they appear in the same phrase except for assertion modifiers.
You can include up to one prepositional phrase (PP) following a markable concept if the PP does not contain a markable concept and either indicates an organ/body part or can be rearranged to eliminate the PP (we later call this the PP test).
Include articles and possessives.
Conjunctions and other syntax that denote lists should be included if they occur within the modifiers or are connected by a common set of modifiers. If the portions of the list are otherwise independent, they should not be included.  Similarly, when concepts are mentioned in more than one way in the same noun phrase (such as the definition of an acronym or where a generic and a brand name of a drug are used together), the concepts should be marked together.
Concepts should be mentioned in relation to the patient or someone else in the note. Section headers that provide formatting, but that are not specific to a person are not marked.

### Examples 
Example Input1: At the time of admission , he denied fever , diaphoresis , nausea , chest pain or other systemic symptoms .  
Example Output1: At the time of admission , he denied <span class="problem">fever</span> , <span class="problem">diaphoresis</span> , <span class="problem">nausea</span> , <span class="problem">chest pain</span> or other systemic symptoms .  

### Input Text: {}
### Output Text:
'''
GPT = 3.5
prompt_type = 'merged_prompt_definition_guidelines_oneshot'
run(GPT,prompt,prompt_type)

### 5-shot

In [None]:
prompt = '''### Task
Your task is to generate an HTML version of an input text, marking up specific entities related to healthcare. The entities to be identified are: 'medical problems', 'treatments', and 'tests'. Use HTML <span> tags to highlight these entities. Each <span> should have a class attribute indicating the type of the entity.

### Entity Markup Guide
Use <span class="problem"> to denote a medical problem.
Use <span class="treatment"> to denote a treatment.
Use <span class="test"> to denote a test.
Leave the text as it is if no such entities are found.

### Examples 
Example Input1: At the time of admission , he denied fever , diaphoresis , nausea , chest pain or other systemic symptoms .  
Example Output1: At the time of admission , he denied <span class="problem">fever</span> , <span class="problem">diaphoresis</span> , <span class="problem">nausea</span> , <span class="problem">chest pain</span> or other systemic symptoms .  
Example Input2: He had been diagnosed with osteoarthritis of the knees and had undergone arthroscopy years prior to admission .  
Example Output2: He had been diagnosed with <span class="problem">osteoarthritis of the knees</span> and had undergone <span class="test">arthroscopy</span> years prior to admission .  
Example Input3: After the patient was seen in the office on August 10 , she persisted with high fevers and was admitted on August 11 to Cottonwood Hospital .  
Example Output3: After the patient was seen in the office on August 10 , she persisted with <span class="problem">high fevers</span> and was admitted on August 11 to Cottonwood Hospital .  
Example Input4: HISTORY OF PRESENT ILLNESS : The patient is an 85 - year - old male who was brought in by EMS with a complaint of a decreased level of consciousness .  
Example Output4: HISTORY OF PRESENT ILLNESS : The patient is an 85 - year - old male who was brought in by EMS with a complaint of <span class="problem">a decreased level of consciousness</span> .  
Example Input5: Her lisinopril was increased to 40 mg daily .  
Example Output5: <span class="treatment">Her lisinopril</span> was increased to 40 mg daily .  

### Input Text: {}
### Output Text:
'''
GPT = 3.5
prompt_type = 'merged_prompt_fiveshot'
run(GPT,prompt,prompt_type)

### 5-shot with definition

In [None]:
prompt = '''### Task
Your task is to generate an HTML version of an input text, marking up specific entities related to healthcare. The entities to be identified are: 'medical problems', 'treatments', and 'tests'. Use HTML <span> tags to highlight these entities. Each <span> should have a class attribute indicating the type of the entity.

### Entity Markup Guide
Use <span class="problem"> to denote a medical problem.
Use <span class="treatment"> to denote a treatment.
Use <span class="test"> to denote a test.
Leave the text as it is if no such entities are found.

### Entity Definitions
Medical Problems are defined as: phrases that contain observations made by patients or clinicians about the patient’s body or mind that are thought to be abnormal or caused by a disease. They are loosely based on the UMLS semantic types of pathologic functions, disease or syndrome, mental or behavioral dysfunction, cellormolecular dysfunction, congenital abnormality, acquired abnormality,injury or poisoning, anatomic abnormality, neoplasticprocess, virus/bacterium, sign or symptom, but are not limited by UMLScoverage.
Treatments are defined as: phrases that describe procedures, interventions, and substances given to a patient in an effort to resolve a medical problem. They are loosely based on the UMLS semantic types therapeutic or preventive procedure, medical device, steroid, pharmacologic substance, biomedical or dental material, antibiotic, clinical drug, and drug delivery device.  Other concepts that are treatments but that may not be found in UMLS are also included. Treatments that a patient had, will have, may have in the future, or are explicitly mentioned that the patient will not have are all marked as treatments.
Tests are defined as: phrases that describe procedures, panels, and measures that are done to a patient or a body fluid or sample in order to discover, rule out, or find more information about a medical problem. They are loosely based on the UMLS semantic types laboratory procedure, diagnostic procedure, but also include instances not covered by UMLS.

### Examples 
Example Input1: At the time of admission , he denied fever , diaphoresis , nausea , chest pain or other systemic symptoms .  
Example Output1: At the time of admission , he denied <span class="problem">fever</span> , <span class="problem">diaphoresis</span> , <span class="problem">nausea</span> , <span class="problem">chest pain</span> or other systemic symptoms .  
Example Input2: He had been diagnosed with osteoarthritis of the knees and had undergone arthroscopy years prior to admission .  
Example Output2: He had been diagnosed with <span class="problem">osteoarthritis of the knees</span> and had undergone <span class="test">arthroscopy</span> years prior to admission .  
Example Input3: After the patient was seen in the office on August 10 , she persisted with high fevers and was admitted on August 11 to Cottonwood Hospital .  
Example Output3: After the patient was seen in the office on August 10 , she persisted with <span class="problem">high fevers</span> and was admitted on August 11 to Cottonwood Hospital .  
Example Input4: HISTORY OF PRESENT ILLNESS : The patient is an 85 - year - old male who was brought in by EMS with a complaint of a decreased level of consciousness .  
Example Output4: HISTORY OF PRESENT ILLNESS : The patient is an 85 - year - old male who was brought in by EMS with a complaint of <span class="problem">a decreased level of consciousness</span> .  
Example Input5: Her lisinopril was increased to 40 mg daily .  
Example Output5: <span class="treatment">Her lisinopril</span> was increased to 40 mg daily .  

### Input Text: {}
### Output Text:
'''
GPT = 3.5
prompt_type = 'merged_prompt_definition_fiveshot'
run(GPT,prompt,prompt_type)

### 5-shot with definition and guidelines

In [None]:
prompt = '''### Task
Your task is to generate an HTML version of an input text, marking up specific entities related to healthcare. The entities to be identified are: 'medical problems', 'treatments', and 'tests'. Use HTML <span> tags to highlight these entities. Each <span> should have a class attribute indicating the type of the entity.

### Entity Markup Guide
Use <span class="problem"> to denote a medical problem.
Use <span class="treatment"> to denote a treatment.
Use <span class="test"> to denote a test.
Leave the text as it is if no such entities are found.

### Entity Definitions
Medical Problems are defined as: phrases that contain observations made by patients or clinicians about the patient’s body or mind that are thought to be abnormal or caused by a disease. They are loosely based on the UMLS semantic types of pathologic functions, disease or syndrome, mental or behavioral dysfunction, cellormolecular dysfunction, congenital abnormality, acquired abnormality,injury or poisoning, anatomic abnormality, neoplasticprocess, virus/bacterium, sign or symptom, but are not limited by UMLScoverage.
Treatments are defined as: phrases that describe procedures, interventions, and substances given to a patient in an effort to resolve a medical problem. They are loosely based on the UMLS semantic types therapeutic or preventive procedure, medical device, steroid, pharmacologic substance, biomedical or dental material, antibiotic, clinical drug, and drug delivery device.  Other concepts that are treatments but that may not be found in UMLS are also included. Treatments that a patient had, will have, may have in the future, or are explicitly mentioned that the patient will not have are all marked as treatments.
Tests are defined as: phrases that describe procedures, panels, and measures that are done to a patient or a body fluid or sample in order to discover, rule out, or find more information about a medical problem. They are loosely based on the UMLS semantic types laboratory procedure, diagnostic procedure, but also include instances not covered by UMLS.

### Annotation Guidelines:
Only complete noun phrases (NPs) and adjective phrases (APs) should be marked. Terms that fit concept semantic rules, but that are only used as modifiers in a noun phrase should not be marked.
Include all modifiers with concepts when they appear in the same phrase except for assertion modifiers.
You can include up to one prepositional phrase (PP) following a markable concept if the PP does not contain a markable concept and either indicates an organ/body part or can be rearranged to eliminate the PP (we later call this the PP test).
Include articles and possessives.
Conjunctions and other syntax that denote lists should be included if they occur within the modifiers or are connected by a common set of modifiers. If the portions of the list are otherwise independent, they should not be included.  Similarly, when concepts are mentioned in more than one way in the same noun phrase (such as the definition of an acronym or where a generic and a brand name of a drug are used together), the concepts should be marked together.
Concepts should be mentioned in relation to the patient or someone else in the note. Section headers that provide formatting, but that are not specific to a person are not marked.

### Examples 
Example Input1: At the time of admission , he denied fever , diaphoresis , nausea , chest pain or other systemic symptoms .  
Example Output1: At the time of admission , he denied <span class="problem">fever</span> , <span class="problem">diaphoresis</span> , <span class="problem">nausea</span> , <span class="problem">chest pain</span> or other systemic symptoms .  
Example Input2: He had been diagnosed with osteoarthritis of the knees and had undergone arthroscopy years prior to admission .  
Example Output2: He had been diagnosed with <span class="problem">osteoarthritis of the knees</span> and had undergone <span class="test">arthroscopy</span> years prior to admission .  
Example Input3: After the patient was seen in the office on August 10 , she persisted with high fevers and was admitted on August 11 to Cottonwood Hospital .  
Example Output3: After the patient was seen in the office on August 10 , she persisted with <span class="problem">high fevers</span> and was admitted on August 11 to Cottonwood Hospital .  
Example Input4: HISTORY OF PRESENT ILLNESS : The patient is an 85 - year - old male who was brought in by EMS with a complaint of a decreased level of consciousness .  
Example Output4: HISTORY OF PRESENT ILLNESS : The patient is an 85 - year - old male who was brought in by EMS with a complaint of <span class="problem">a decreased level of consciousness</span> .  
Example Input5: Her lisinopril was increased to 40 mg daily .  
Example Output5: <span class="treatment">Her lisinopril</span> was increased to 40 mg daily .  

### Input Text: {}
### Output Text:
'''
GPT = 3.5
prompt_type = 'merged_prompt_definition_guidelines_fiveshot'
run(GPT,prompt,prompt_type)

# GPT-4

### zero-shot

In [None]:
prompt = '''### Task
Your task is to generate an HTML version of an input text, marking up specific entities related to healthcare. The entities to be identified are: 'medical problems', 'treatments', and 'tests'. Use HTML <span> tags to highlight these entities. Each <span> should have a class attribute indicating the type of the entity.

### Entity Markup Guide
Use <span class="problem"> to denote a medical problem.
Use <span class="treatment"> to denote a treatment.
Use <span class="test"> to denote a test.
Leave the text as it is if no such entities are found.

### Input Text: {}
### Output Text:
'''
GPT = 4
prompt_type = 'merged_prompt'
run(GPT,prompt,prompt_type)

### zero-shot with definition

In [None]:
prompt = '''### Task
Your task is to generate an HTML version of an input text, marking up specific entities related to healthcare. The entities to be identified are: 'medical problems', 'treatments', and 'tests'. Use HTML <span> tags to highlight these entities. Each <span> should have a class attribute indicating the type of the entity.

### Entity Markup Guide
Use <span class="problem"> to denote a medical problem.
Use <span class="treatment"> to denote a treatment.
Use <span class="test"> to denote a test.
Leave the text as it is if no such entities are found.

### Entity Definitions
Medical Problems are defined as: phrases that contain observations made by patients or clinicians about the patient’s body or mind that are thought to be abnormal or caused by a disease. They are loosely based on the UMLS semantic types of pathologic functions, disease or syndrome, mental or behavioral dysfunction, cellormolecular dysfunction, congenital abnormality, acquired abnormality,injury or poisoning, anatomic abnormality, neoplasticprocess, virus/bacterium, sign or symptom, but are not limited by UMLScoverage.
Treatments are defined as: phrases that describe procedures, interventions, and substances given to a patient in an effort to resolve a medical problem. They are loosely based on the UMLS semantic types therapeutic or preventive procedure, medical device, steroid, pharmacologic substance, biomedical or dental material, antibiotic, clinical drug, and drug delivery device.  Other concepts that are treatments but that may not be found in UMLS are also included. Treatments that a patient had, will have, may have in the future, or are explicitly mentioned that the patient will not have are all marked as treatments.
Tests are defined as: phrases that describe procedures, panels, and measures that are done to a patient or a body fluid or sample in order to discover, rule out, or find more information about a medical problem. They are loosely based on the UMLS semantic types laboratory procedure, diagnostic procedure, but also include instances not covered by UMLS.

### Input Text: {}
### Output Text:
'''
GPT = 4
prompt_type = 'merged_prompt_definition'
run(GPT,prompt,prompt_type)

### zero-shot with definition and guidelines

In [None]:
prompt = '''### Task
Your task is to generate an HTML version of an input text, marking up specific entities related to healthcare. The entities to be identified are: 'medical problems', 'treatments', and 'tests'. Use HTML <span> tags to highlight these entities. Each <span> should have a class attribute indicating the type of the entity.

### Entity Markup Guide
Use <span class="problem"> to denote a medical problem.
Use <span class="treatment"> to denote a treatment.
Use <span class="test"> to denote a test.
Leave the text as it is if no such entities are found.

### Entity Definitions
Medical Problems are defined as: phrases that contain observations made by patients or clinicians about the patient’s body or mind that are thought to be abnormal or caused by a disease. They are loosely based on the UMLS semantic types of pathologic functions, disease or syndrome, mental or behavioral dysfunction, cellormolecular dysfunction, congenital abnormality, acquired abnormality,injury or poisoning, anatomic abnormality, neoplasticprocess, virus/bacterium, sign or symptom, but are not limited by UMLScoverage.
Treatments are defined as: phrases that describe procedures, interventions, and substances given to a patient in an effort to resolve a medical problem. They are loosely based on the UMLS semantic types therapeutic or preventive procedure, medical device, steroid, pharmacologic substance, biomedical or dental material, antibiotic, clinical drug, and drug delivery device.  Other concepts that are treatments but that may not be found in UMLS are also included. Treatments that a patient had, will have, may have in the future, or are explicitly mentioned that the patient will not have are all marked as treatments.
Tests are defined as: phrases that describe procedures, panels, and measures that are done to a patient or a body fluid or sample in order to discover, rule out, or find more information about a medical problem. They are loosely based on the UMLS semantic types laboratory procedure, diagnostic procedure, but also include instances not covered by UMLS.

### Annotation Guidelines:
Only complete noun phrases (NPs) and adjective phrases (APs) should be marked. Terms that fit concept semantic rules, but that are only used as modifiers in a noun phrase should not be marked.
Include all modifiers with concepts when they appear in the same phrase except for assertion modifiers.
You can include up to one prepositional phrase (PP) following a markable concept if the PP does not contain a markable concept and either indicates an organ/body part or can be rearranged to eliminate the PP (we later call this the PP test).
Include articles and possessives.
Conjunctions and other syntax that denote lists should be included if they occur within the modifiers or are connected by a common set of modifiers. If the portions of the list are otherwise independent, they should not be included.  Similarly, when concepts are mentioned in more than one way in the same noun phrase (such as the definition of an acronym or where a generic and a brand name of a drug are used together), the concepts should be marked together.
Concepts should be mentioned in relation to the patient or someone else in the note. Section headers that provide formatting, but that are not specific to a person are not marked.

### Input Text: {}
### Output Text:
'''
GPT = 4
prompt_type = 'merged_prompt_definition_guidelines'
run(GPT,prompt,prompt_type)

### 1-shot

In [None]:
prompt = '''### Task
Your task is to generate an HTML version of an input text, marking up specific entities related to healthcare. The entities to be identified are: 'medical problems', 'treatments', and 'tests'. Use HTML <span> tags to highlight these entities. Each <span> should have a class attribute indicating the type of the entity.

### Entity Markup Guide
Use <span class="problem"> to denote a medical problem.
Use <span class="treatment"> to denote a treatment.
Use <span class="test"> to denote a test.
Leave the text as it is if no such entities are found.

### Examples 
Example Input1: At the time of admission , he denied fever , diaphoresis , nausea , chest pain or other systemic symptoms .  
Example Output1: At the time of admission , he denied <span class="problem">fever</span> , <span class="problem">diaphoresis</span> , <span class="problem">nausea</span> , <span class="problem">chest pain</span> or other systemic symptoms .  

### Input Text: {}
### Output Text:
'''
GPT = 4
prompt_type = 'merged_prompt_oneshot'
run(GPT,prompt,prompt_type)

### 1-shot with definition

In [None]:
prompt = '''### Task
Your task is to generate an HTML version of an input text, marking up specific entities related to healthcare. The entities to be identified are: 'medical problems', 'treatments', and 'tests'. Use HTML <span> tags to highlight these entities. Each <span> should have a class attribute indicating the type of the entity.

### Entity Markup Guide
Use <span class="problem"> to denote a medical problem.
Use <span class="treatment"> to denote a treatment.
Use <span class="test"> to denote a test.
Leave the text as it is if no such entities are found.

### Entity Definitions
Medical Problems are defined as: phrases that contain observations made by patients or clinicians about the patient’s body or mind that are thought to be abnormal or caused by a disease. They are loosely based on the UMLS semantic types of pathologic functions, disease or syndrome, mental or behavioral dysfunction, cellormolecular dysfunction, congenital abnormality, acquired abnormality,injury or poisoning, anatomic abnormality, neoplasticprocess, virus/bacterium, sign or symptom, but are not limited by UMLScoverage.
Treatments are defined as: phrases that describe procedures, interventions, and substances given to a patient in an effort to resolve a medical problem. They are loosely based on the UMLS semantic types therapeutic or preventive procedure, medical device, steroid, pharmacologic substance, biomedical or dental material, antibiotic, clinical drug, and drug delivery device.  Other concepts that are treatments but that may not be found in UMLS are also included. Treatments that a patient had, will have, may have in the future, or are explicitly mentioned that the patient will not have are all marked as treatments.
Tests are defined as: phrases that describe procedures, panels, and measures that are done to a patient or a body fluid or sample in order to discover, rule out, or find more information about a medical problem. They are loosely based on the UMLS semantic types laboratory procedure, diagnostic procedure, but also include instances not covered by UMLS.

### Examples 
Example Input1: At the time of admission , he denied fever , diaphoresis , nausea , chest pain or other systemic symptoms .  
Example Output1: At the time of admission , he denied <span class="problem">fever</span> , <span class="problem">diaphoresis</span> , <span class="problem">nausea</span> , <span class="problem">chest pain</span> or other systemic symptoms .  

### Input Text: {}
### Output Text:
'''
GPT = 4
prompt_type = 'merged_prompt_definition_oneshot'
run(GPT,prompt,prompt_type)

### 1-shot with definition and guidelines

In [None]:
prompt = '''### Task
Your task is to generate an HTML version of an input text, marking up specific entities related to healthcare. The entities to be identified are: 'medical problems', 'treatments', and 'tests'. Use HTML <span> tags to highlight these entities. Each <span> should have a class attribute indicating the type of the entity.

### Entity Markup Guide
Use <span class="problem"> to denote a medical problem.
Use <span class="treatment"> to denote a treatment.
Use <span class="test"> to denote a test.
Leave the text as it is if no such entities are found.

### Entity Definitions
Medical Problems are defined as: phrases that contain observations made by patients or clinicians about the patient’s body or mind that are thought to be abnormal or caused by a disease. They are loosely based on the UMLS semantic types of pathologic functions, disease or syndrome, mental or behavioral dysfunction, cellormolecular dysfunction, congenital abnormality, acquired abnormality,injury or poisoning, anatomic abnormality, neoplasticprocess, virus/bacterium, sign or symptom, but are not limited by UMLScoverage.
Treatments are defined as: phrases that describe procedures, interventions, and substances given to a patient in an effort to resolve a medical problem. They are loosely based on the UMLS semantic types therapeutic or preventive procedure, medical device, steroid, pharmacologic substance, biomedical or dental material, antibiotic, clinical drug, and drug delivery device.  Other concepts that are treatments but that may not be found in UMLS are also included. Treatments that a patient had, will have, may have in the future, or are explicitly mentioned that the patient will not have are all marked as treatments.
Tests are defined as: phrases that describe procedures, panels, and measures that are done to a patient or a body fluid or sample in order to discover, rule out, or find more information about a medical problem. They are loosely based on the UMLS semantic types laboratory procedure, diagnostic procedure, but also include instances not covered by UMLS.

### Annotation Guidelines:
Only complete noun phrases (NPs) and adjective phrases (APs) should be marked. Terms that fit concept semantic rules, but that are only used as modifiers in a noun phrase should not be marked.
Include all modifiers with concepts when they appear in the same phrase except for assertion modifiers.
You can include up to one prepositional phrase (PP) following a markable concept if the PP does not contain a markable concept and either indicates an organ/body part or can be rearranged to eliminate the PP (we later call this the PP test).
Include articles and possessives.
Conjunctions and other syntax that denote lists should be included if they occur within the modifiers or are connected by a common set of modifiers. If the portions of the list are otherwise independent, they should not be included.  Similarly, when concepts are mentioned in more than one way in the same noun phrase (such as the definition of an acronym or where a generic and a brand name of a drug are used together), the concepts should be marked together.
Concepts should be mentioned in relation to the patient or someone else in the note. Section headers that provide formatting, but that are not specific to a person are not marked.

### Examples 
#### Example Input1: At the time of admission , he denied fever , diaphoresis , nausea , chest pain or other systemic symptoms .  
#### Example Output1: At the time of admission , he denied <span class="problem">fever</span> , <span class="problem">diaphoresis</span> , <span class="problem">nausea</span> , <span class="problem">chest pain</span> or other systemic symptoms .  

### Input Text: {}
### Output Text:
'''
GPT = 4
prompt_type = 'merged_prompt_definition_guidelines_oneshot'
run(GPT,prompt,prompt_type)

### 5-shot

In [None]:
prompt = '''### Task
Your task is to generate an HTML version of an input text, marking up specific entities related to healthcare. The entities to be identified are: 'medical problems', 'treatments', and 'tests'. Use HTML <span> tags to highlight these entities. Each <span> should have a class attribute indicating the type of the entity.

### Entity Markup Guide
Use <span class="problem"> to denote a medical problem.
Use <span class="treatment"> to denote a treatment.
Use <span class="test"> to denote a test.
Leave the text as it is if no such entities are found.

### Examples 
Example Input1: At the time of admission , he denied fever , diaphoresis , nausea , chest pain or other systemic symptoms .  
Example Output1: At the time of admission , he denied <span class="problem">fever</span> , <span class="problem">diaphoresis</span> , <span class="problem">nausea</span> , <span class="problem">chest pain</span> or other systemic symptoms .  
Example Input2: He had been diagnosed with osteoarthritis of the knees and had undergone arthroscopy years prior to admission .  
Example Output2: He had been diagnosed with <span class="problem">osteoarthritis of the knees</span> and had undergone <span class="test">arthroscopy</span> years prior to admission .  
Example Input3: After the patient was seen in the office on August 10 , she persisted with high fevers and was admitted on August 11 to Cottonwood Hospital .  
Example Output3: After the patient was seen in the office on August 10 , she persisted with <span class="problem">high fevers</span> and was admitted on August 11 to Cottonwood Hospital .  
Example Input4: HISTORY OF PRESENT ILLNESS : The patient is an 85 - year - old male who was brought in by EMS with a complaint of a decreased level of consciousness .  
Example Output4: HISTORY OF PRESENT ILLNESS : The patient is an 85 - year - old male who was brought in by EMS with a complaint of <span class="problem">a decreased level of consciousness</span> .  
Example Input5: Her lisinopril was increased to 40 mg daily .  
Example Output5: <span class="treatment">Her lisinopril</span> was increased to 40 mg daily .  

### Input Text: {}
### Output Text:
'''
GPT = 4
prompt_type = 'merged_prompt_fiveshot'
run(GPT,prompt,prompt_type)

### 5-shot with definition

In [None]:
prompt = '''### Task
Your task is to generate an HTML version of an input text, marking up specific entities related to healthcare. The entities to be identified are: 'medical problems', 'treatments', and 'tests'. Use HTML <span> tags to highlight these entities. Each <span> should have a class attribute indicating the type of the entity.

### Entity Markup Guide
Use <span class="problem"> to denote a medical problem.
Use <span class="treatment"> to denote a treatment.
Use <span class="test"> to denote a test.
Leave the text as it is if no such entities are found.

### Entity Definitions
Medical Problems are defined as: phrases that contain observations made by patients or clinicians about the patient’s body or mind that are thought to be abnormal or caused by a disease. They are loosely based on the UMLS semantic types of pathologic functions, disease or syndrome, mental or behavioral dysfunction, cellormolecular dysfunction, congenital abnormality, acquired abnormality,injury or poisoning, anatomic abnormality, neoplasticprocess, virus/bacterium, sign or symptom, but are not limited by UMLScoverage.
Treatments are defined as: phrases that describe procedures, interventions, and substances given to a patient in an effort to resolve a medical problem. They are loosely based on the UMLS semantic types therapeutic or preventive procedure, medical device, steroid, pharmacologic substance, biomedical or dental material, antibiotic, clinical drug, and drug delivery device.  Other concepts that are treatments but that may not be found in UMLS are also included. Treatments that a patient had, will have, may have in the future, or are explicitly mentioned that the patient will not have are all marked as treatments.
Tests are defined as: phrases that describe procedures, panels, and measures that are done to a patient or a body fluid or sample in order to discover, rule out, or find more information about a medical problem. They are loosely based on the UMLS semantic types laboratory procedure, diagnostic procedure, but also include instances not covered by UMLS.

### Examples 
#### Example Input1: At the time of admission , he denied fever , diaphoresis , nausea , chest pain or other systemic symptoms .  
#### Example Output1: At the time of admission , he denied <span class="problem">fever</span> , <span class="problem">diaphoresis</span> , <span class="problem">nausea</span> , <span class="problem">chest pain</span> or other systemic symptoms .  
#### Example Input2: He had been diagnosed with osteoarthritis of the knees and had undergone arthroscopy years prior to admission .  
#### Example Output2: He had been diagnosed with <span class="problem">osteoarthritis of the knees</span> and had undergone <span class="test">arthroscopy</span> years prior to admission .  
#### Example Input3: After the patient was seen in the office on August 10 , she persisted with high fevers and was admitted on August 11 to Cottonwood Hospital .  
#### Example Output3: After the patient was seen in the office on August 10 , she persisted with <span class="problem">high fevers</span> and was admitted on August 11 to Cottonwood Hospital .  
#### Example Input4: HISTORY OF PRESENT ILLNESS : The patient is an 85 - year - old male who was brought in by EMS with a complaint of a decreased level of consciousness .  
#### Example Output4: HISTORY OF PRESENT ILLNESS : The patient is an 85 - year - old male who was brought in by EMS with a complaint of <span class="problem">a decreased level of consciousness</span> .  
#### Example Input5: Her lisinopril was increased to 40 mg daily .  
#### Example Output5: <span class="treatment">Her lisinopril</span> was increased to 40 mg daily .  

### Input Text: {}
### Output Text:
'''
GPT = 4
prompt_type = 'merged_prompt_definition_fiveshot'
run(GPT,prompt,prompt_type)

### 5-shot with definition and guidelines

In [None]:
prompt = '''### Task
Your task is to generate an HTML version of an input text, marking up specific entities related to healthcare. The entities to be identified are: 'medical problems', 'treatments', and 'tests'. Use HTML <span> tags to highlight these entities. Each <span> should have a class attribute indicating the type of the entity.

### Entity Markup Guide
Use <span class="problem"> to denote a medical problem.
Use <span class="treatment"> to denote a treatment.
Use <span class="test"> to denote a test.
Leave the text as it is if no such entities are found.

### Entity Definitions
Medical Problems are defined as: phrases that contain observations made by patients or clinicians about the patient’s body or mind that are thought to be abnormal or caused by a disease. They are loosely based on the UMLS semantic types of pathologic functions, disease or syndrome, mental or behavioral dysfunction, cellormolecular dysfunction, congenital abnormality, acquired abnormality,injury or poisoning, anatomic abnormality, neoplasticprocess, virus/bacterium, sign or symptom, but are not limited by UMLScoverage.
Treatments are defined as: phrases that describe procedures, interventions, and substances given to a patient in an effort to resolve a medical problem. They are loosely based on the UMLS semantic types therapeutic or preventive procedure, medical device, steroid, pharmacologic substance, biomedical or dental material, antibiotic, clinical drug, and drug delivery device.  Other concepts that are treatments but that may not be found in UMLS are also included. Treatments that a patient had, will have, may have in the future, or are explicitly mentioned that the patient will not have are all marked as treatments.
Tests are defined as: phrases that describe procedures, panels, and measures that are done to a patient or a body fluid or sample in order to discover, rule out, or find more information about a medical problem. They are loosely based on the UMLS semantic types laboratory procedure, diagnostic procedure, but also include instances not covered by UMLS.

### Annotation Guidelines:
Only complete noun phrases (NPs) and adjective phrases (APs) should be marked. Terms that fit concept semantic rules, but that are only used as modifiers in a noun phrase should not be marked.
Include all modifiers with concepts when they appear in the same phrase except for assertion modifiers.
You can include up to one prepositional phrase (PP) following a markable concept if the PP does not contain a markable concept and either indicates an organ/body part or can be rearranged to eliminate the PP (we later call this the PP test).
Include articles and possessives.
Conjunctions and other syntax that denote lists should be included if they occur within the modifiers or are connected by a common set of modifiers. If the portions of the list are otherwise independent, they should not be included.  Similarly, when concepts are mentioned in more than one way in the same noun phrase (such as the definition of an acronym or where a generic and a brand name of a drug are used together), the concepts should be marked together.
Concepts should be mentioned in relation to the patient or someone else in the note. Section headers that provide formatting, but that are not specific to a person are not marked.

### Examples 
#### Example Input1: At the time of admission , he denied fever , diaphoresis , nausea , chest pain or other systemic symptoms .  
#### Example Output1: At the time of admission , he denied <span class="problem">fever</span> , <span class="problem">diaphoresis</span> , <span class="problem">nausea</span> , <span class="problem">chest pain</span> or other systemic symptoms .  
#### Example Input2: He had been diagnosed with osteoarthritis of the knees and had undergone arthroscopy years prior to admission .  
#### Example Output2: He had been diagnosed with <span class="problem">osteoarthritis of the knees</span> and had undergone <span class="test">arthroscopy</span> years prior to admission .  
#### Example Input3: After the patient was seen in the office on August 10 , she persisted with high fevers and was admitted on August 11 to Cottonwood Hospital .  
#### Example Output3: After the patient was seen in the office on August 10 , she persisted with <span class="problem">high fevers</span> and was admitted on August 11 to Cottonwood Hospital .  
#### Example Input4: HISTORY OF PRESENT ILLNESS : The patient is an 85 - year - old male who was brought in by EMS with a complaint of a decreased level of consciousness .  
#### Example Output4: HISTORY OF PRESENT ILLNESS : The patient is an 85 - year - old male who was brought in by EMS with a complaint of <span class="problem">a decreased level of consciousness</span> .  
#### Example Input5: Her lisinopril was increased to 40 mg daily .  
#### Example Output5: <span class="treatment">Her lisinopril</span> was increased to 40 mg daily .  

### Input Text: {}
### Output Text:
'''
GPT = 4
prompt_type = 'merged_prompt_definition_guidelines_fiveshot'
run(GPT,prompt,prompt_type)

### after error analysis

In [None]:
prompt = '''### Task
Your task is to generate an HTML version of an input text, marking up specific entities related to healthcare. The entities to be identified are: 'medical problems', 'treatments', and 'tests'. Use HTML <span> tags to highlight these entities. Each <span> should have a class attribute indicating the type of the entity.

### Entity Markup Guide
Use <span class="problem"> to denote a medical problem.
Use <span class="treatment"> to denote a treatment.
Use <span class="test"> to denote a test.
Leave the text as it is if no such entities are found.

### Entity Definitions
Medical Problems are defined as: phrases that contain observations made by patients or clinicians about the patient’s body or mind that are thought to be abnormal or caused by a disease. They are loosely based on the UMLS semantic types of pathologic functions, disease or syndrome, mental or behavioral dysfunction, cellormolecular dysfunction, congenital abnormality, acquired abnormality,injury or poisoning, anatomic abnormality, neoplasticprocess, virus/bacterium, sign or symptom, but are not limited by UMLScoverage.
Treatments are defined as: phrases that describe procedures, interventions, and substances given to a patient in an effort to resolve a medical problem. They are loosely based on the UMLS semantic types therapeutic or preventive procedure, medical device, steroid, pharmacologic substance, biomedical or dental material, antibiotic, clinical drug, and drug delivery device.  Other concepts that are treatments but that may not be found in UMLS are also included. Treatments that a patient had, will have, may have in the future, or are explicitly mentioned that the patient will not have are all marked as treatments.
Tests are defined as: phrases that describe procedures, panels, and measures that are done to a patient or a body fluid or sample in order to discover, rule out, or find more information about a medical problem. They are loosely based on the UMLS semantic types laboratory procedure, diagnostic procedure, but also include instances not covered by UMLS.

### Annotation Guidelines
Only complete noun phrases (NPs) and adjective phrases (APs) should be marked. Terms that fit concept semantic rules, but that are only used as modifiers in a noun phrase should not be marked.
Include all modifiers with concepts when they appear in the same phrase except for assertion modifiers.
You can include up to one prepositional phrase (PP) following a markable concept if the PP does not contain a markable concept and either indicates an organ/body part or can be rearranged to eliminate the PP (we later call this the PP test).
Include articles and possessives.
Conjunctions and other syntax that denote lists should be included if they occur within the modifiers or are connected by a common set of modifiers. If the portions of the list are otherwise independent, they should not be included.  Similarly, when concepts are mentioned in more than one way in the same noun phrase (such as the definition of an acronym or where a generic and a brand name of a drug are used together), the concepts should be marked together.
Concepts should be mentioned in relation to the patient or someone else in the note. Section headers that provide formatting, but that are not specific to a person are not marked.
Vital signs or vital signs with abnormal readings should be annotated as tests.
Medical specialists, services, or healthcare facilities should not be annotated, even if they might seem to fit into the categories of 'tests', 'treatments', or 'medical problems'. These entities are part of the healthcare delivery system and do not directly denote a test, treatment, or medical problem.
Consultation procedures should not be considered as tests.

### Examples 
Example Input1: At the time of admission , he denied fever , diaphoresis , nausea , chest pain or other systemic symptoms .  
Example Output1: At the time of admission , he denied <span class="problem">fever</span> , <span class="problem">diaphoresis</span> , <span class="problem">nausea</span> , <span class="problem">chest pain</span> or other systemic symptoms .  
Example Input2: He had been diagnosed with osteoarthritis of the knees and had undergone arthroscopy years prior to admission .  
Example Output2: He had been diagnosed with <span class="problem">osteoarthritis of the knees</span> and had undergone <span class="test">arthroscopy</span> years prior to admission .  
Example Input3: After the patient was seen in the office on August 10 , she persisted with high fevers and was admitted on August 11 to Cottonwood Hospital .  
Example Output3: After the patient was seen in the office on August 10 , she persisted with <span class="problem">high fevers</span> and was admitted on August 11 to Cottonwood Hospital .  
Example Input4: HISTORY OF PRESENT ILLNESS : The patient is an 85 - year - old male who was brought in by EMS with a complaint of a decreased level of consciousness .  
Example Output4: HISTORY OF PRESENT ILLNESS : The patient is an 85 - year - old male who was brought in by EMS with a complaint of <span class="problem">a decreased level of consciousness</span> .  
Example Input5: Her lisinopril was increased to 40 mg daily .  
Example Output5: <span class="treatment">Her lisinopril</span> was increased to 40 mg daily .  

### Input Text: {}
### Output Text:
'''
GPT = 3.5
prompt_type = 'merged_prompt_definition_guidelines_fiveshot_after_error_analysis'
run(GPT,prompt,prompt_type)

In [None]:
GPT = 4
prompt_type = 'merged_prompt_definition_guidelines_fiveshot_after_error_analysis'
run(GPT,prompt,prompt_type)

# evaluation

In [None]:
from bs4 import BeautifulSoup as bs
from bs4 import NavigableString, Tag
from glob import glob
import spacy

py_nlp = spacy.load ("en_core_web_lg")

In [None]:
def html2bio(html_path):
    with open(html_path) as f:
        
        html = f.read()
        
        if '***output***' in html.lower():
            html = html[html.lower().index('***output***')+len('***output***')+1:]
        if 'output:' in html.lower():
            html = html[html.lower().index('output:')+len('output:')+1:]
        if 'output text' in html.lower():
            html = html[html.lower().index('output text')+len('output text')+1:]
        if '***Highlighted Text***'  in html.lower():
            html = html[html.lower().index('***Highlighted Text***')+len('***Highlighted Text***')+1:]
        if '<body>' in html:
            html = html[html.index('<body>')+6:html.index('</body>')]
        if '<p>' in html:
            html = html[html.index('<p>')+3:html.index('</p>')]
            
        #print (html_path)
        #print (html,'\n')
        
        # Parse HTML using BeautifulSoup
        soup = bs(html, "html.parser")

        # Extract text under 'p' tags and convert to BIO format
        bio_format = []
        

        for child in soup.children:
            if isinstance(child, NavigableString):
                for word in child.split():
                    bio_format.append(f"{word}\tO\n")
            elif isinstance(child, Tag):
                words = py_nlp (child.get_text())
                try:
                    entity = child.attrs['class'][0]
                except:
                    entity = 'O'
                if len(words) != 0:
                    if entity != 'O' and entity in ['problem','treatment','test']:
                        bio_format.append(f"{words[0]}\tB-{entity}\n")
                        for word in words[1:]:
                            bio_format.append(f"{word}\tI-{entity}\n")
                    else:
                        bio_format.append(f"{words[0]}\tO\n")
                        for word in words[1:]:
                            bio_format.append(f"{word}\tO\n")
    return bio_format

In [None]:
def get_performance(GPT,prompt):
    all_tags = []
    all_tokens = []
    gold_tags = []

    for file in files:
        file_name = file.split('/')[-1].split('.')[0]
        with open(file) as f_gold:
            lines = f_gold.readlines()
            tokens = [line.strip().split('\t')[0] for line in lines]
            tags = [line.strip().split('\t')[-1] for line in lines]

            prediction = f'/data/yhu5/chatgpt/GPT{GPT}_output/{dataset}/temperature0/{prompt}/{file_name}.html'

            bio_2 = html2bio(prediction)
            all_tokens += tokens

            for i,token in enumerate(tokens):
                if token != '':
                    match = False
                    for i2 in range(i,-1,-1):
                        try:
                            token_2,tag_2 = bio_2[i2].strip().split('\t')
                        except:
                            token_2,tag_2 = None, None
                        
                        if token_2!=None:
                            if token in token_2 or token_2 in token:
                                match = True
                                break

                    if not match:
                        tag_2 = 'O'
                else:
                    tag_2 = ''
                #print (token,tags[i],tag_2)
                gold_tags.append(tags[i])
                all_tags.append(tag_2)
    with open('./merged_gold_pre.bio','w') as fg:
        for i,(token,gold_tag,all_tag) in enumerate(zip(all_tokens,gold_tags,all_tags)):
            if token!='':

                fg.write(f'{token}\t{all_tag}\t{gold_tag}\n')
            else:
                fg.write(f'\n')
    !python /data/yhu5/CLAMP/melaxdev-deepmeddocker_tf-28b7a60e460b/dockersimple/evaluate_jianfu_new.py -lf /data/yhu5/chatgpt/merged_gold_pre.bio

In [None]:
GPT = 3.5
prompt = 'merged_prompt'
get_performance(GPT,prompt)
prompt = 'merged_prompt_definition'
get_performance(GPT,prompt)
prompt = 'merged_prompt_definition_guidelines'
get_performance(GPT,prompt)

In [None]:
GPT = 3.5
prompt = 'merged_prompt_oneshot'
get_performance(GPT,prompt)
prompt = 'merged_prompt_definition_oneshot'
get_performance(GPT,prompt)
prompt = 'merged_prompt_definition_guidelines_oneshot'
get_performance(GPT,prompt)

In [None]:
GPT = 3.5
prompt = 'merged_prompt_fiveshot'
get_performance(GPT,prompt)
prompt = 'merged_prompt_definition_fiveshot'
get_performance(GPT,prompt)
prompt = 'merged_prompt_definition_guidelines_fiveshot'
get_performance(GPT,prompt)

In [None]:
GPT = 4
prompt = 'merged_prompt'
get_performance(GPT,prompt)
prompt = 'merged_prompt_definition'
get_performance(GPT,prompt)
prompt = 'merged_prompt_definition_guidelines'
get_performance(GPT,prompt)

In [None]:
GPT = 4
prompt = 'merged_prompt_oneshot'
get_performance(GPT,prompt)
prompt = 'merged_prompt_definition_oneshot'
get_performance(GPT,prompt)
prompt = 'merged_prompt_definition_guidelines_oneshot'
get_performance(GPT,prompt)

In [None]:
GPT = 4
prompt = 'merged_prompt_fiveshot'
get_performance(GPT,prompt)
prompt = 'merged_prompt_definition_fiveshot'
get_performance(GPT,prompt)
prompt = 'merged_prompt_definition_guidelines_fiveshot'
get_performance(GPT,prompt)

In [None]:
GPT = 3.5
prompt = 'merged_prompt_definition_guidelines_fiveshot_after_error_analysis'
print ('\n',prompt)
get_performance(GPT,prompt)

GPT = 4
prompt = 'merged_prompt_definition_guidelines_fiveshot_after_error_analysis'
print ('\n',prompt)
get_performance(GPT,prompt)