In [21]:
from openai import OpenAI

def format_paragraphs_numbered(l):
    to_return = ""
    for i, s in l:
        to_return += f"#{i} - \"{s}\"\n"
    return to_return


# --- The Refined Prompt Template ---
CONSOLIDATED_USER_PROMPT_TEMPLATE = """**Goal:** Create a concise (~70 words) summary answering the **Clinical Question** using *only* the information present in the **Relevant paragraphs**.

The questions are asked through a patient portal.

**Patient Question:**
{patient_question}

**Clinical Question (Derived from patient question):**
{question}

**Relevant paragraphs (with 1-based indices):**
{formatted_relevant_sentences}

**Citation style**: Paragraph's index between | | symbols. For example: |1| or |2,7| or |1,2,3,4| or |5,7,9|. Citations must be comma separated.

**Output rules**:
 - All sentences MUST HAVE citations at the end of the sentence.
 - Use your space wisely, avoid repeating citations unless necessary for clarity and fluency.
 - Try to use all relevant paragraphs given.
 - Your output MUST BE a summary of the information in the relevant paragraphs, given as an answer to the patient question/clinical question.

**Example for reference:**

*Internal Chain of Thought (Example - you don't output this):*
1.  *Understand Question:* Why was the emergency salvage repair surgery done? Need the reason for the surgery.
2.  *Analyze Paragraphs:*
    *   |1| Mentions rupture of thoracoabdominal aortic aneurysm requiring emergent repair. -> Reason for surgery.
    *   |2| Details the type of repair: salvage repair, 34-mm Dacron tube graft, deep hypothermic circulatory arrest. -> How surgery was done.
    *   |3| Mentions immediate transfer to OR upon arrival. -> Urgency context.
    *   |8| Describes wound healing post-op, notes a small open area but generally healing well. -> Outcome/Recovery context, explains lengthy stay indirectly.
3.  *Synthesize & Plan:*
    *   Point 1: The cause was a ruptured aneurysm needing immediate surgery (from |1|).
    *   Point 2: The surgery was a complex salvage repair with specific techniques (from |2|, |3| supports urgency).
    *   Point 3: Recovery involved hospitalization and wound healing (from |8|, implies complexity/severity).
    *   Plan: Sentence 1 combines cause/need (|1|). Sentence 2 describes the surgery (|2|, |3| context implied). Sentence 3 describes recovery/reason for long stay (|8|).
4.  *Draft (Iterative):*
    *   "His aortic aneurysm required emergent surgical intervention because it ruptured." Add |1|. -> "His aortic aneurysm required emergent surgical intervention because it ruptured. |1|" (Word count: 12) - *Refinement: Make it more specific based on |1|.* -> "His surgery was necessary because of a ruptured thoracoabdominal aortic aneurysm, requiring emergent intervention. |1|" (Word count: 16)
    *   "He had a salvage repair with a tube graft and circulatory arrest." Add |2|, |3|. -> "He underwent an emergent salvage repair using a 34-mm Dacron tube graft and deep hypothermic circulatory arrest. |2,3|" (Word count: 20. Total: 36)
    *   "His long hospital stay was due to the surgery's complexity, but the wound is healing." Add |8|. -> "The extended hospital stay reflects the surgery's severity, although his wound is now healing well with only a small open area remaining. |8|" (Word count: 26. Total: 62)
5.  *Final Review:* Answers question (why surgery: rupture). Uses only provided text. All sentences cited correctly. ~62 words. Cites paragraphs 1, 2, 3, 8. Seems good.

**Direct example:**

*Patient Question:*
Took my 59 yo father to ER ultrasound discovered he had an aortic aneurysm. He had a salvage repair (tube graft). Long surgery / recovery for couple hours then removed packs. why did they do this surgery????? After this time he spent 1 month in hospital now sent home. Why did they perform the emergency salvage repair on him?\n\nHe was transferred to the hospital on 2025-1-20 for emergent repair of his ruptured thoracoabdominal aortic aneurysm. He was immediately taken to the operating room where he underwent an emergent salvage repair of ruptured thoracoabdominal aortic aneurysm with a 34-mm Dacron tube graft using deep hypothermic circulatory arrest. Thoracoabdominal wound healing well with exception of very small open area mid wound that is @1cm around and 1/2cm deep, no surrounding erythema.

*Output Answer:*
His aortic aneurysm was caused by the rupture of a thoracoabdominal aortic aneurysm, which required emergent surgical intervention. |1|\nHe underwent a complex salvage repair using a 34-mm Dacron tube graft and deep hypothermic circulatory arrest to address the rupture. |2,3|\nPostoperatively, he required close monitoring in the intensive care unit.\nThe extended recovery time and hospital stay were necessary due to the severity of the rupture and the complexity of the surgery, though his wound is now healing well with only a small open area noted. |8|

Do not add any additional information beside the answer such as "Your summary: ".
**Your answer should be a summary of the information in the relevant paragraphs you found.**
"""

CONSOLIDATED_USER_PROMPT_TEMPLATE_NO_RELEVANT = """**Goal:** Create a concise (~70 words) summary answering the **Clinical Question** using *only* the information present in the **Clinical Note Paragraphs**.

The questions are asked through a patient portal.

**Patient Question:**
{patient_question}

**Clinical Question (Derived from patient question):**
{question}

**Clinical Note Paragraphs (with 1-based indices):**
{formatted_relevant_sentences}

**Citation style**: Paragraph's index between | | symbols. For example: |1| or |2,7| or |1,2,3,4| or |5,7,9|. Citations must be comma separated.

**Output rules**:
 - All sentences MUST HAVE citations at the end of the sentence.
 - Use your space wisely, avoid repeating citations unless necessary for clarity and fluency.
 - Try to use all relevant paragraphs given.
 - Your output MUST BE a summary of the information in the relevant paragraphs, given as an answer to the patient question/clinical question.

**Example for reference:**

*Internal Chain of Thought (Example - you don't output this):*
1.  *Understand Question:* Why was the emergency salvage repair surgery done? Need the reason for the surgery.
2.  *Analyze Paragraphs:*
    *   |1| Mentions rupture of thoracoabdominal aortic aneurysm requiring emergent repair. -> Reason for surgery.
    *   |2| Details the type of repair: salvage repair, 34-mm Dacron tube graft, deep hypothermic circulatory arrest. -> How surgery was done.
    *   |3| Mentions immediate transfer to OR upon arrival. -> Urgency context.
    *   |8| Describes wound healing post-op, notes a small open area but generally healing well. -> Outcome/Recovery context, explains lengthy stay indirectly.
3.  *Synthesize & Plan:*
    *   Point 1: The cause was a ruptured aneurysm needing immediate surgery (from |1|).
    *   Point 2: The surgery was a complex salvage repair with specific techniques (from |2|, |3| supports urgency).
    *   Point 3: Recovery involved hospitalization and wound healing (from |8|, implies complexity/severity).
    *   Plan: Sentence 1 combines cause/need (|1|). Sentence 2 describes the surgery (|2|, |3| context implied). Sentence 3 describes recovery/reason for long stay (|8|).
4.  *Draft (Iterative):*
    *   "His aortic aneurysm required emergent surgical intervention because it ruptured." Add |1|. -> "His aortic aneurysm required emergent surgical intervention because it ruptured. |1|" (Word count: 12) - *Refinement: Make it more specific based on |1|.* -> "His surgery was necessary because of a ruptured thoracoabdominal aortic aneurysm, requiring emergent intervention. |1|" (Word count: 16)
    *   "He had a salvage repair with a tube graft and circulatory arrest." Add |2|, |3|. -> "He underwent an emergent salvage repair using a 34-mm Dacron tube graft and deep hypothermic circulatory arrest. |2,3|" (Word count: 20. Total: 36)
    *   "His long hospital stay was due to the surgery's complexity, but the wound is healing." Add |8|. -> "The extended hospital stay reflects the surgery's severity, although his wound is now healing well with only a small open area remaining. |8|" (Word count: 26. Total: 62)
5.  *Final Review:* Answers question (why surgery: rupture). Uses only provided text. All sentences cited correctly. ~62 words. Cites paragraphs 1, 2, 3, 8. Seems good.

**Direct example:**

*Patient Question:*
Took my 59 yo father to ER ultrasound discovered he had an aortic aneurysm. He had a salvage repair (tube graft). Long surgery / recovery for couple hours then removed packs. why did they do this surgery????? After this time he spent 1 month in hospital now sent home. Why did they perform the emergency salvage repair on him?\n\nHe was transferred to the hospital on 2025-1-20 for emergent repair of his ruptured thoracoabdominal aortic aneurysm. He was immediately taken to the operating room where he underwent an emergent salvage repair of ruptured thoracoabdominal aortic aneurysm with a 34-mm Dacron tube graft using deep hypothermic circulatory arrest. Thoracoabdominal wound healing well with exception of very small open area mid wound that is @1cm around and 1/2cm deep, no surrounding erythema.

*Output Answer:*
His aortic aneurysm was caused by the rupture of a thoracoabdominal aortic aneurysm, which required emergent surgical intervention. |1|\nHe underwent a complex salvage repair using a 34-mm Dacron tube graft and deep hypothermic circulatory arrest to address the rupture. |2,3|\nPostoperatively, he required close monitoring in the intensive care unit.\nThe extended recovery time and hospital stay were necessary due to the severity of the rupture and the complexity of the surgery, though his wound is now healing well with only a small open area noted. |8|

Do not add any additional information beside the answer such as "Your summary: ".
**Your answer should be a summary of the information in the relevant paragraphs you found.**
"""

client = OpenAI(
    base_url="http://localhost:8888/v1",
    api_key="anything"
)

def predict(question, patient_question, sentences, messages=None, relevant_few=None):
    if messages is None:
        formatted_relevant_sentences = format_paragraphs_numbered(sentences)

        if relevant_few is None:
            prompt_content = CONSOLIDATED_USER_PROMPT_TEMPLATE.format(
                question=question,
                patient_question=patient_question,
                formatted_relevant_sentences=formatted_relevant_sentences
            )
        else:
            prompt_content = CONSOLIDATED_USER_PROMPT_TEMPLATE_NO_RELEVANT.format(
                question=question,
                patient_question=patient_question,
                formatted_relevant_sentences=formatted_relevant_sentences
            )

        messages = [
            {
                "role": "user",
                "content": prompt_content
            }
        ]

        if relevant_few is not None and len(relevant_few) > 0:
            messages.append(
                {
                    "role": "user",
                    "content": f"If you can, refer to these paragraphs in your answer:\n {format_paragraphs_numbered(relevant_few)}"
                }
            )

    response = client.chat.completions.create(
            messages=messages,
            stream=False,
            model="loaded-model",
            temperature=0.1,
            max_tokens=350,
        )
    
    messages.append(
        {
            "role": "assistant",
            "content": response.choices[0].message.content
        }
    )
    
    return messages, response.choices[0].message.content

In [22]:
import re

def extract_citations(text):
    to_add = set()
    pattern = re.compile(r'\|(.*?)\|')

    while True:
        match = pattern.search(text)
        if not match:
            break

        content = match.group(1)
        for n in content.split(','):
            n = n.strip()
            if len(n) > 0:
                try:
                    to_add.add(int(n))
                except ValueError:
                    print(f"Error converting citation to int: {n}")
        
        text = text[match.end():]

    return to_add


In [23]:
import re

def expand_range(m):
    start = int(m.group(1))
    end = int(m.group(2))
    return f"|{','.join(str(i) for i in range(start, end + 1))}|"

def move_citations_at_the_end(answer):
    sentences = re.split(r'\n(?=[A-Z])', answer)
    new_sentences = []
    for sentence in sentences:
        sentence = re.sub(r'\|(\d+)\s*-\s*(\d+)\|', expand_range, sentence)

        # Match in-text citations like . |123,456,789|\n
        inline_pattern = r"\.\s\|((?:\d+,?)+)\|\n"
        inline_matches = re.findall(inline_pattern, sentence)

        # Extract all numbers from in-text citations
        inline_citations = {int(num) for group in inline_matches for num in group.split(',') if num}

        # Remove in-text citations from text
        cleaned_text = re.sub(inline_pattern, '', sentence)

        # Match end-of-text citation block
        end_pattern = r"\|((?:\d+,?)+)\|$"
        end_match = re.search(end_pattern, cleaned_text)

        end_citations = set()
        if end_match:
            # Extract and parse existing end citations
            end_citations = {int(num) for num in end_match.group(1).split(',') if num}
            # Remove the existing citation block
            cleaned_text = re.sub(end_pattern, '', cleaned_text)

        # Combine, deduplicate, and sort all citations
        all_citations = sorted(inline_citations | end_citations)
        citation_str = ",".join(map(str, all_citations))

        # Append new citation block
        updated_text = cleaned_text.strip() + f" |{citation_str}|"

        new_sentences.append(updated_text.strip())
    return "\n".join(new_sentences)

def normalize_citations(text):
    text = re.sub(r'\n', ' ', text)  # Normalize whitespace
    text = re.sub(r'\s+', ' ', text)  # Normalize whitespace

    # Normalize spacing inside citation blocks like |123, 456 ,789 |
    text = re.sub(r'\|[\s]*(\d+(?:\s*,\s*\d+)*?)[\s]*\|', 
                lambda m: f"|{','.join(num.strip() for num in m.group(1).split(','))}|", 
                text)
    
    # Step 1: Remove any dot directly after a citation
    text = re.sub(r'(\|\d+(?:,\d+)*\|)\s*\.', r'\1', text)

    # Step 2: Add a dot before a citation if not already preceded by one
    text = re.sub(r'(?<!\.)\s*(\|\d+(?:,\d+)*\|)', r'. \1\n', text)

    text = re.sub(r'\n\s+', '\n', text)  # Normalize whitespace

    text = text.strip("\n")

    text = re.sub(r' . ', ' ', text)

    text = move_citations_at_the_end(text)

    return text

In [24]:
import json

with open("../v1_test.json", "r", encoding="utf-8") as f:
    data = json.load(f)

In [25]:
with open("../2-merge/merged_predictions.json", "r", encoding="utf-8") as f:
    predictions = json.load(f)

In [26]:
submission = []

total_citations_removed = 0

for i, case in predictions.items():
    print("-"*20)
    official_case = data[i]
    question = official_case["clinical_question"]
    patient_question = official_case["patient_narrative"]

    note_excerpt = official_case["note_excerpt"]

    filtered_sentences = []
    for sentence_number, sentence in case.items():
        sentence_number = int(sentence_number)
        if sentence["prediction"] != 1:
            continue

        filtered_sentences.append((sentence_number+1, sentence["sentence"]))

    all_citations = set([sentence_number for sentence_number, _ in filtered_sentences])

    if len(all_citations) < 3:
        print(f"Case {i} has too few relevant sentences: {all_citations}")
        few_relevant = filtered_sentences
        filtered_sentences = [(str(int(sentence_number)+1), sentence) for sentence_number, sentence in case.items()]
    else:
        few_relevant = None

    messages, answer = predict(question, patient_question, filtered_sentences, relevant_few=few_relevant)

    answer = normalize_citations(answer)

    # Recalculate word count on potentially multi-line string
    word_count = len(re.sub(r"\|[\d,]+\|", "", answer).split())
    print(f"Approximate word count (excluding citations): {word_count}")
    max_retries = 3
    while word_count > 75 and max_retries > 0:
        max_retries -= 1
        print(f"Warning: Word count ({word_count}) may exceed the target of ~70 words. Trying again")
        messages.append(
            {
                "role": "user",
                "content": f"Please try making it more compact, the answer is too long.\nOutput the answer directly."
            }
        )
        messages, answer = predict(question, patient_question, filtered_sentences, messages=messages)
        answer = normalize_citations(answer)
        word_count = len(re.sub(r"\|[\d,]+\|", "", answer).split())
        print(f"New word count: {word_count}")

    
    used_citations = extract_citations(answer)

    not_used_citations = all_citations - used_citations
    if len(not_used_citations) > 0:
        print(f"Case {i} has unused citations: {not_used_citations}")
        total_citations_removed += len(not_used_citations)

    hallucination = used_citations - all_citations
    if len(hallucination) > 0:
        print(f"Case {i} hallucinated cites: {hallucination}")

    submission.append({
        "case_id": i,
        "answer": answer,
    })

print(f"\n\nTotal citations removed: {total_citations_removed}")

--------------------
Approximate word count (excluding citations): 77
New word count: 53
Case 21 has unused citations: {2, 10, 12, 18, 24}
--------------------
Approximate word count (excluding citations): 53
--------------------
Case 23 has too few relevant sentences: {5, 6}
Approximate word count (excluding citations): 41
--------------------
Approximate word count (excluding citations): 65
--------------------
Case 25 has too few relevant sentences: {2, 3}
Approximate word count (excluding citations): 76
New word count: 46
--------------------
Case 26 has too few relevant sentences: {3, 4}
Approximate word count (excluding citations): 46
--------------------
Case 27 has too few relevant sentences: {25, 12}
Approximate word count (excluding citations): 66
--------------------
Approximate word count (excluding citations): 66
Case 28 has unused citations: {11, 12}
--------------------
Approximate word count (excluding citations): 61
Case 29 has unused citations: {4}
-------------------

In [29]:
with open("submission.json", "w", encoding="utf-8") as f:
    json.dump(submission, f, indent=4, ensure_ascii=False)