In [1]:
import transformers
from transformers import AutoModelForCausalLM, AutoTokenizer, pipeline
import pandas as pd
import torch
import sys

In [2]:
tokenizer = AutoTokenizer.from_pretrained("meta-llama/Llama-2-7b-hf")
model = AutoModelForCausalLM.from_pretrained("meta-llama/Llama-2-7b-hf")
#model = AutoModelForCausalLM.from_pretrained("meta-llama/Llama-2-7b-hf")
tokenizer.pad_token = tokenizer.eos_token
tokenizer.padding_side = "right"

Loading checkpoint shards:   0%|          | 0/2 [00:00<?, ?it/s]

In [5]:
PROMPTS = {
    "after_reading_what_question": """<<SYS>>\nYou're a doctor and you were given the following EMR by another doctor. You have some questions for the doctor who gave that EMR to you to get more details about the patient. \n<</SYS>>\n\nGiven the EMR: \"{text}\"\nAfter reading the above EMR, what question do you have about "{trigger}"?\nQuestion:""",
}
template = PROMPTS["after_reading_what_question"]

In [6]:
device = torch.device("cuda")
model = model.to(device)
output_path = "results/llama2_generated.txt"
results = []
baseline = pd.read_csv("baselines/baseline_with_context.csv", sep='\t')
prompts = []

for index, row in baseline.iterrows():
    context = row["context"]
    trigger = row["trigger"]
    context = context.strip().rstrip(".")
    prompt = "<s>[INST] " + template.format(text=context.strip(), trigger=trigger) + "\n[/INST]\n"
    prompts.append(prompt)

In [8]:
print(prompts[4])

<s>[INST] <<SYS>>
You're a doctor and you were given the following EMR by another doctor. You have some questions for the doctor who gave that EMR to you to get more details about the patient. 
<</SYS>>

Given the EMR: "He was given Zosyn for empiric treatment . Gram stain and culture of the ascites failed to identify any organism . He was weaned from oxygen requirement and was continued on normal saline boluses and. intravenous albumin for treatment of prerenal azotemia . Somatostatin and midodrine were also given for this condition and he also received packed red blood cells . On postoperative day number ten , the patient returned to the floor with improved renal function and without oxygen requirement . The somatostatin and the midodrine were discontinued and the regimen of Lasix and spironolactone was started for diuresis"
After reading the above EMR, what question do you have about "prerenal azotemia"?
Question:
[/INST]



In [13]:
model.eval()
text_gen = pipeline(
    task="text-generation", 
    model=model, 
    tokenizer=tokenizer, 
    max_length=500,
    device=device,
    do_sample=True,
    temperature=0.5,
    top_p=0.5,
)

KeyError: "Unknown task text-completion, available tasks are ['audio-classification', 'automatic-speech-recognition', 'conversational', 'depth-estimation', 'document-question-answering', 'feature-extraction', 'fill-mask', 'image-classification', 'image-segmentation', 'image-to-image', 'image-to-text', 'mask-generation', 'ner', 'object-detection', 'question-answering', 'sentiment-analysis', 'summarization', 'table-question-answering', 'text-classification', 'text-generation', 'text-to-audio', 'text-to-speech', 'text2text-generation', 'token-classification', 'translation', 'video-classification', 'visual-question-answering', 'vqa', 'zero-shot-audio-classification', 'zero-shot-classification', 'zero-shot-image-classification', 'zero-shot-object-detection', 'translation_XX_to_YY']"

In [10]:
outputs = text_gen(prompts)

In [12]:
print(outputs[0][0]['generated_text'])

<s>[INST] <<SYS>>
You're a doctor and you were given the following EMR by another doctor. You have some questions for the doctor who gave that EMR to you to get more details about the patient. 
<</SYS>>

Given the EMR: "Service :. MEDICINE. Allergies :. Patient recorded as having No Known Allergies to Drugs. Attending : Sarah Q. George , M.D. Chief Complaint :. Hip fracture. Major Surgical or Invasive Procedure :. OPERATIVE PROCEDURE :. 1. Closed reduction of right hip with open placement of dynamic hip screw , right hip . 2. Examination under anesthesia with placement of splint , right elbow . PROCEDURE #2 :. 1. Exploratory laparotomy with drainage of intra-abdominal abscess and peritonitis with gross soilage with bile and gastric contents"
After reading the above EMR, what question do you have about "Hip fracture"?
Question:
[/INST]

[INST] <<SYS>>
You're a doctor and you were given the following EMR by another doctor. You have some questions for the doctor who gave that EMR to you t

In [45]:
q_list = []
for i in range(1200):
    txt_file = f"results/llama2/7b/generated/{i}.txt"
    with open(txt_file, "r", encoding="utf-8") as f:
        txt = f.read()
        parts = txt.split('[/INST]')
        if len(parts) > 1:
            q = parts[1].strip()
            a = re.match("(.*?)\?",q)
            if a is None:
                q == ""
            else:
                q = a.group()
        q_list.append(q.replace("\n"," "))

In [48]:
with open(output_path, "w", encoding="utf-8") as f:
    for i, q in enumerate(q_list):
        f.write(q)
        f.write("\n")

In [1]:
from transformers import AutoTokenizer, LongT5ForConditionalGeneration
import pandas as pd
import torch
import sys

model_name = "longt5"
print(model_name)
device = torch.device("cuda")
model = None
tokenizer = None

longt5


In [2]:
if model_name == "longt5":
    tokenizer = AutoTokenizer.from_pretrained("google/long-t5-tglobal-large")
    model = LongT5ForConditionalGeneration.from_pretrained("results/long-t5/checkpoint-2610").to(device)
    output_path = "results/long-t5/longt5_finetuned_generated.txt"

elif model_name == "bart":
    from transformers import BartForConditionalGeneration, BartTokenizer
    model = BartForConditionalGeneration.from_pretrained("discq/model_outputs/bart_triggers/results/checkpoint-1000", forced_bos_token_id=0).to(device)
    tokenizer = BartTokenizer.from_pretrained("facebook/bart-large")
    output_path = "results/bart/bart_finetuned_generated.txt"

else:
    print("Wrong model name")

In [7]:
from tqdm import tqdm
results = []
baseline = pd.read_csv("baselines/baseline_with_context.csv", sep='\t')
for index, row in baseline.iterrows():
    context = row["context"]
    trigger = row["trigger"]
    context = context.strip().rstrip(".")
    prompt = f"{context}\nAfter reading the above EMR, what question do you have about \"{trigger}\"?\nQuestion:"
    inputs = tokenizer(prompt, return_tensors="pt").to(device)
    input_ids = inputs.input_ids
    outputs = model.generate(input_ids,
                             max_length=500,
                             num_beams=3,
                             temperature=0.5,
                             top_p=0.5,
                             do_sample=True,
                             num_return_sequences=1,)
    results.append(tokenizer.decode(outputs[0], skip_special_tokens=True))




KeyboardInterrupt: 

In [None]:
with open(output_path, "w", encoding="utf-8") as f:
    for txt in results:
        f.write(txt.replace("\n", ". ").strip())
        f.write("\n")