# Import Neccessary Libraries


In [4]:
import langextract as lx
import json
import pandas as pd


In [6]:
#Example 1 — straightforward outpatient note

example = "A 45-year-old man visited the Outpatient department complaining of persistent lower back pain that had lasted for three weeks." \
"He was prescribed ibuprofen 400 mg taken orally twice a day to relieve the discomfort."

#🩺 Example 2 — surgical ward note

example1 = "A 62-year-old woman was admitted to the Surgery department for postoperative wound infection that had been present for five days." \
"The physician started her on cefazolin 1 g intravenous every 8 hours for infection control."

In [7]:

# Define extraction prompt
prompt_description = (
    "Extract patient age, department, problem, How long problem Occured, medication, dosage, route, frequency. Use exact text for extractions. "
    "Do not paraphrase or overlap entities. Do not mix up the examples with input text provided. If you cannot find an entity, leave it blank." \
    "Make sure to only extract what is explicitly mentioned in the text. Make sure to not infer any information that is not directly stated in the text." \
    "Provide the extractions in the order specified. Do not duplicate extractions even if there are multiple answers for an entity just choose one." \
    "Emit each unique span once. If you already output the exact same text for a class, do not repeat it."
)


# Define example data with entities in order of appearance
examples = [
    lx.data.ExampleData(
        text= example,
        extractions=[
            lx.data.Extraction(extraction_class="patient age", extraction_text="45-year-old"),
            lx.data.Extraction(extraction_class="department", extraction_text="Outpatient department"),
            lx.data.Extraction(extraction_class="problem", extraction_text="complaining of persistent lower back pain"),
            lx.data.Extraction(extraction_class="How long problem Occured", extraction_text="for three weeks"),
            lx.data.Extraction(extraction_class="medication", extraction_text="ibuprofen"),
            lx.data.Extraction(extraction_class="dosage", extraction_text="400 mg"),
            lx.data.Extraction(extraction_class="route", extraction_text="orally"),
            lx.data.Extraction(extraction_class="frequency", extraction_text="twice a day"),

        ]
    )
]




In [8]:
# get result using ollaama model llama3.2:latest already download

# increase ollama api timeout
input_text = example

result = lx.extract(
    text_or_documents=input_text,
    prompt_description=prompt_description,
    examples=examples,
    model_id="gemma2:2b",
        model_url="http://localhost:11434",
    fence_output=False,
    use_schema_constraints=False,     extraction_passes=2,  # Multiple passes for better accuracy    
    max_workers=20,         # Parallel processing for speed
                language_model_params={
                "keep_alive": 10*60,   # 10 minutes
                "timeout": 10*60       # 10 minutes
            }
)




In [9]:
# Display entities with positions
print(f"Input: {input_text}\n")
print("Extracted entities:")
for entity in result.extractions:
    position_info = ""
    if entity.char_interval:
        start, end = entity.char_interval.start_pos, entity.char_interval.end_pos
        position_info = f" (pos: {start}-{end})"
    print(f"• {entity.extraction_class.capitalize()}: {entity.extraction_text}{position_info}")



Input: A 45-year-old man visited the Outpatient department complaining of persistent lower back pain that had lasted for three weeks.He was prescribed ibuprofen 400 mg taken orally twice a day to relieve the discomfort.

Extracted entities:
• Patient age: 45-year-old (pos: 2-13)
• Department: Outpatient department (pos: 30-51)
• Problem: complaining of persistent lower back pain (pos: 52-93)
• How long problem occured: for three weeks (pos: 110-125)
• Medication: ibuprofen (pos: 144-153)
• Dosage: 400 mg (pos: 154-160)
• Route: orally (pos: 167-173)
• Frequency: twice a day (pos: 174-185)


Test The Model on a different Example to see the result

In [10]:
# Try on another example to test model results
input_text1 = example1

print(input_text1)

A 62-year-old woman was admitted to the Surgery department for postoperative wound infection that had been present for five days.The physician started her on cefazolin 1 g intravenous every 8 hours for infection control.


In [11]:
result1 = lx.extract(
    text_or_documents=input_text1,
    prompt_description=prompt_description,
    examples=examples,
    model_id="gemma2:2b",
        model_url="http://localhost:11434",
    fence_output=False,
    use_schema_constraints=False,     extraction_passes=2,  # Multiple passes for better accuracy
                language_model_params={
                "keep_alive": 10*60,   # 10 minutes
                "timeout": 10*60       # 10 minutes
            }
)

In [12]:
# Display entities with positions
print(f"Input: {input_text1}\n")
print("Extracted entities:")
for entity in result1.extractions:
    position_info = ""
    if entity.char_interval:
        start, end = entity.char_interval.start_pos, entity.char_interval.end_pos
        position_info = f" (pos: {start}-{end})"
    print(f"• {entity.extraction_class.capitalize()}: {entity.extraction_text}{position_info}")



Input: A 62-year-old woman was admitted to the Surgery department for postoperative wound infection that had been present for five days.The physician started her on cefazolin 1 g intravenous every 8 hours for infection control.

Extracted entities:
• Patient age: 62-year-old (pos: 2-13)
• Department: Surgery department (pos: 40-58)
• Problem: postoperative wound infection (pos: 63-92)
• How long problem occured: five days (pos: 119-128)
• Medication: cefazolin (pos: 158-167)
• Dosage: 1 g (pos: 168-171)
• Route: intravenous (pos: 172-183)
• Frequency: every 8 hours (pos: 184-197)


In [13]:
# Save the results to a JSON file
lx.io.save_annotated_documents([result1], output_name="extraction_results.jsonl", output_dir=".")

html_content = lx.visualize("extraction_results.jsonl")


[94m[1mLangExtract[0m: Saving to [92mextraction_results.jsonl[0m: 1 docs [00:00, 25.02 docs/s]

[92m✓[0m Saved [1m1[0m documents to [92mextraction_results.jsonl[0m



[94m[1mLangExtract[0m: Loading [92mextraction_results.jsonl[0m: 100%|█████████▉| 2.19k/2.20k [00:00<00:00, 104kB/s]

[92m✓[0m Loaded [1m1[0m documents from [92mextraction_results.jsonl[0m





In [14]:
with open("visualization.html", "w", encoding="utf-8") as f:
    f.write(html_content.data)

In [15]:
html_content