In [58]:
!pip install dotenv langchain  langchain-openai

Collecting langchain-openai
  Downloading langchain_openai-1.1.10-py3-none-any.whl.metadata (3.1 kB)
Collecting openai<3.0.0,>=2.20.0 (from langchain-openai)
  Downloading openai-2.21.0-py3-none-any.whl.metadata (29 kB)
Collecting tiktoken<1.0.0,>=0.7.0 (from langchain-openai)
  Downloading tiktoken-0.12.0-cp310-cp310-manylinux_2_28_x86_64.whl.metadata (6.7 kB)
Collecting distro<2,>=1.7.0 (from openai<3.0.0,>=2.20.0->langchain-openai)
  Downloading distro-1.9.0-py3-none-any.whl.metadata (6.8 kB)
Collecting jiter<1,>=0.10.0 (from openai<3.0.0,>=2.20.0->langchain-openai)
  Downloading jiter-0.13.0-cp310-cp310-manylinux_2_17_x86_64.manylinux2014_x86_64.whl.metadata (5.2 kB)
Collecting sniffio (from openai<3.0.0,>=2.20.0->langchain-openai)
  Downloading sniffio-1.3.1-py3-none-any.whl.metadata (3.9 kB)
Downloading langchain_openai-1.1.10-py3-none-any.whl (87 kB)
Downloading openai-2.21.0-py3-none-any.whl (1.1 MB)
[2K   [90m━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━[0m [32m1.1/1.1 MB[0m 

In [59]:
%env PYTORCH_CUDA_ALLOC_CONF=expandable_segments:True

env: PYTORCH_CUDA_ALLOC_CONF=expandable_segments:True


In [68]:
import torch
import gc
from transformers import AutoModelForCausalLM, AutoTokenizer, BitsAndBytesConfig
import time
from huggingface_hub import hf_hub_download
from llama_cpp import Llama
import accelerate
import transformers
import bitsandbytes as bnb

import os

import json

from datasets import load_dataset
import pandas as pd

import dotenv

from langchain_core.messages import HumanMessage
from langchain_core.output_parsers import StrOutputParser, PydanticOutputParser
from langchain_openai import ChatOpenAI
from langchain_core.prompts import ChatPromptTemplate

In [61]:
dotenv.load_dotenv('.env')

True

In [14]:
model_name = 'BioMistral/BioMistral-Safetensors'
results = {}

In [15]:
def clean_memory():
    for var in ['model', 'tokenizer', 'inputs', 'llama', 'inputs', 'output']:
        if var in globals():
            del globals()[var]

    if torch.cuda.is_available():
        torch.cuda.ipc_collect()
        torch.cuda.reset_peak_memory_stats()
        torch.cuda.empty_cache()

    gc.collect()
    print('Memory is cleaned')

In [16]:
def print_memory():
    if torch.cuda.is_available():
        allocated = torch.cuda.memory_allocated(0) / 1024**3
        reserved = torch.cuda.memory_reserved(0) / 1024**3
        print(f'VRAM allocated {allocated}gb, reserved {reserved}gb')
    else:
        print('No cuda')

In [17]:
clean_memory()
print_memory()

Memory is cleaned
VRAM allocated 0.0079345703125gb, reserved 0.0390625gb


In [20]:
clean_memory()
model_name = 'BioMistral/BioMistral-Safetensors'
model = AutoModelForCausalLM.from_pretrained(model_name, torch_dtype=torch.float16, device_map="auto", attn_implementation="flash_attention_2") #Необходимо явно задать параметры, иначе будет другой размер и может не загрузится в видеокарту
model.eval()
tokenizer = AutoTokenizer.from_pretrained(model_name)
print_memory()

with torch.inference_mode():
    input_text = 'My head is sick. Which pill should I drink?'
    inputs = tokenizer(input_text, return_tensors='pt').to(model.device)
    
    start = time.time()
    output = model.generate(**inputs, repetition_penalty=1.2, max_new_tokens = 100)
    end = time.time()
    
    new_tokens = len(output[0]) - len(inputs.input_ids[0])
    tps = new_tokens / (end - start)
    print(f'BF16: new Tokens: {new_tokens}, tokens per second: {tps}')
    
    decoded_text = tokenizer.decode(output[0], skip_special_tokens=True)
    print(decoded_text)
    
    results['BioMistral-7B-BF16'] = {'tps': tps, 'memory': torch.cuda.memory_allocated(0)/1024**3}

Memory is cleaned


Setting `pad_token_id` to `eos_token_id`:2 for open-end generation.


VRAM allocated 13.49671220779419gb, reserved 13.517578125gb
BF16: new Tokens: 49, tokens per second: 25.190632145812824
My head is sick. Which pill should I drink? (A) 10 mg of amoxicillin, (B) 250 mg of metronidazole, and (C) 400 mg of ciprofloxacin.


In [21]:
clean_memory()
print_memory()

Memory is cleaned
VRAM allocated 0.0079345703125gb, reserved 0.0390625gb


Парадоксально, но при bnb 4-bit пик памяти при загрузке может быть выше из-за временных буферов/конвертаций и особенностей размещения. Поэтому BF16 может “влезать”, а 4-bit — падать именно на этапе from_pretrained.
Заливаем модель в видеопамять через квантование на ОЗУ.

In [22]:
clean_memory()
bnb_config = BitsAndBytesConfig(
    load_in_4bit=True,
    bnb_4bit_quant_type='nf4',
    bnb_4bit_compute_dtype=torch.float16,
)
model_name = 'BioMistral/BioMistral-Safetensors'

model = AutoModelForCausalLM.from_pretrained(model_name, quantization_config=bnb_config, device_map='auto', attn_implementation="flash_attention_2")
#model = model.to('cuda')
tokenizer = AutoTokenizer.from_pretrained(model_name)
print_memory()

model.eval()

with torch.inference_mode():

    input_text = 'My head is sick. Which pill should I drink?'
    inputs = tokenizer(input_text, return_tensors='pt').to(model.device)
    
    start = time.time()
    output = model.generate(**inputs, repetition_penalty=1.2, max_new_tokens = 100)
    end = time.time()
    
    new_tokens = len(output[0]) - len(inputs.input_ids[0])
    tps = new_tokens / (end - start)
    print(f'4BIT: new Tokens: {new_tokens}, tokens per second: {tps}')
    
    decoded_text = tokenizer.decode(output[0], skip_special_tokens=True)
    print(decoded_text)
    
    results['BioMistral-7B-4BIT'] = {'tps': tps, 'memory': torch.cuda.memory_allocated(0)/1024**3}

Memory is cleaned


Setting `pad_token_id` to `eos_token_id`:2 for open-end generation.


VRAM allocated 4.153069019317627gb, reserved 4.240234375gb
4BIT: new Tokens: 57, tokens per second: 55.86251962458195
My head is sick. Which pill should I drink? (A) 10 mg of amitriptyline, (B) 25 mg of carbamazepine, (C) 300 mg of valproate, or (D) 400 mg of lamotrigine.


In [23]:
clean_memory()

model_name_gguf = 'BioMistral/BioMistral-7B-GGUF'

model_path = hf_hub_download(
    repo_id=model_name_gguf,
    filename='ggml-model-Q4_K_M.gguf',
    local_dir="./models"
)

print('File downloaded')

llama = Llama(
    model_path=model_path,
    n_gpu_layers=-1,
    n_ctx=32768,
    verbose=False
)

start = time.time()
output = llama.create_chat_completion(
    messages=[{'role': 'user', 'content': 'My head is sick. Which pill should I drink?'}],
    max_tokens=100,
    repeat_penalty=1.2,
)
end = time.time()
print(output)

new_tokens = output['usage']['completion_tokens']
tps = new_tokens / (end - start)
print(f'LLAMACPP: new Tokens: {new_tokens}, tokens per second: {tps}')

results['BioMistral-7B-LLAMACPP (GPU)'] = {'tps': tps, 'memory': 'uncountable'}

Memory is cleaned
File downloaded
{'id': 'chatcmpl-4107ecc9-5b9c-4d56-ae68-16467b1df80e', 'object': 'chat.completion', 'created': 1771678617, 'model': 'models/ggml-model-Q4_K_M.gguf', 'choices': [{'index': 0, 'message': {'role': 'assistant', 'content': ' I’m sorry to hear that. Can you please tell me more about how you are feeling? Are you experiencing a headache, fever or chills, nausea, vomiting, or any other symptoms?'}, 'logprobs': None, 'finish_reason': 'stop'}], 'usage': {'prompt_tokens': 19, 'completion_tokens': 44, 'total_tokens': 63}}
LLAMACPP: new Tokens: 44, tokens per second: 50.4270743618105


In [24]:
results

{'BioMistral-7B-BF16': {'tps': 25.190632145812824,
  'memory': 13.496713638305664},
 'BioMistral-7B-4BIT': {'tps': 55.86251962458195, 'memory': 4.15307092666626},
 'BioMistral-7B-LLAMACPP (GPU)': {'tps': 50.4270743618105,
  'memory': 'uncountable'}}

In [25]:
dataset = load_dataset('omi-health/medical-dialogue-to-soap-summary')

In [26]:
dataset['train'][45]['soap']

"S: The patient is a 79-year-old male with a history of hypertension, valvular heart disease, diabetes mellitus, and stage 4 chronic kidney disease, presenting with lower back pain. He reports previous treatments for bilateral large renal stones, including bilateral double J stent insertion, left extracorporeal shockwave lithotripsy, and FURS with laser stone fragmentation, which did not provide significant relief.\nO: Recent non-contrast CT scan revealed multiple bilateral renal stones with a total stone burden of 3.0 cm in the left kidney and 3.2 cm in the right kidney. The patient underwent simultaneous bilateral FURS and holmium laser lithotripsy under general anesthesia, which lasted 125 minutes. Surgical details include the use of a hydrophilic tip guidewire, a ureteral access sheath, and a Karl Storz flexible ureterorenoscope. Laser settings were 1.0–1.2 joules at 8-12 Hz, with 4200 and 4066 pulses for the left and right sides, respectively.\nA: The primary diagnosis is bilatera

In [27]:
examples = [d['soap'] for i, d in enumerate(dataset['train'].take(12))]

In [28]:
example = examples[1]
example

"S: The patient, a 21-month-old male, presented with weakness in his lower extremities and lumbar pain following a mild upper respiratory tract infection. Initial treatment with anti-inflammatory therapy for suspected transient hips arthritis was ineffective, leading to worsening pain and inability to walk.\nO: Hip ultrasound showed no joint effusion. Spine radiograph revealed slight reduction in the thickness of the L5 soma. MRI indicated increased T1 post-enhancement signals in the L4-L5 anulus, opposite end-plates of L4 and L5, adjacent soft tissues, and osteolytic area of the L5 pedicle. Blood tests showed elevated erythrocyte sedimentation rate (77 mm/h) and C-reactive protein (2.17 mg/dL), with normal white blood cell count, procalcitonin serum concentration, and Quantiferon TB-gold test. After 3 weeks of therapy, the patient developed leukopenia with severe neutropenia (white blood cell count at 5410/mm3).\nA: Primary diagnosis is Spondylodiscitis with associated osteomyelitis. 

In [29]:
def get_ner_prompt(text: str) -> str:
    return """
You are NER model. Find all named entities in text (between tags <text> and </text).

Allowed types ONLY:
- SYMPTOM: patient-reported symptoms or clinical manifestations (e.g., pain, weakness, fever, inability to walk).
- DIAGNOSIS: diseases/conditions (including suspected/differential diagnoses and infections).
- MEDICATION: specific drug names (e.g., meropenem, vancomycin). (Do NOT label tests or procedures.)
- DOSAGE: medication dose/regimen ONLY (e.g., "100 mg/kg/day", "in three doses", "IV", "once daily"). Must be tied to a MEDICATION in the same sentence/phrase.
- DURATION: time spans or timing of therapy/symptoms (e.g., "for 3 weeks", "after 3 weeks", "first week").
- SIDE_EFFECT: adverse effects/complications of treatment (e.g., leukopenia, neutropenia) when presented as developed during/after therapy.

STRICT EXCLUSIONS (do NOT extract):
- Imaging/procedures/tests: MRI, ultrasound, radiograph, CT, blood tests, Quantiferon, procalcitonin, etc.
- Lab values/measurements: ESR 77 mm/h, CRP 2.17 mg/dL, WBC 5410/mm3, normal ranges.
- Radiology/lab findings that are not symptoms (e.g., "increased T1 signals", "osteolytic area").
- Negated findings (e.g., "no joint effusion", "normal WBC") — do not extract them.

Output rules:
- Use exact text spans.
- Do not invent entities.
- Do not output duplicates.
- Split doses and medication titles (this is bad example of medication: meropenem (100 mg/kg/day in three doses))

Coverage checklist BEFORE finalizing the JSON:
1) Did you extract ALL SYMPTOM mentions (including worsening/functional inability)?
2) Did you extract ALL DIAGNOSIS mentions (including suspected/differential)?
3) For EACH antibiotic/drug: did you extract MEDICATION and its DOSAGE separately?
4) Did you extract ALL DURATION mentions related to therapy timing?
5) Did you avoid tests, imaging, lab values, and negated findings?
6) Didn't you mixed up dosage and duration?
7) Don't json have trailing comma after the last list element

<example>
<input>
 Doctor: Hello, what brings you in today?\nPatient: Hi, my 21-month-old son has been experiencing weakness in his lower extremities and lumbar pain after a mild upper respiratory tract infection.\nDoctor: I see. Did you consult any medical professional for this issue?\nPatient: Yes, we took him to the pediatric emergency department. They did a hip ultrasound, but they didn't find any joint effusion. They dismissed it as transient hips arthritis and treated him with anti-inflammatory therapy.\nDoctor: Did the therapy help with his condition?\nPatient: Unfortunately, no. His pain got worse, and now he's unable to walk. That's why we came back here.\nDoctor: We should definitely investigate further. We'll start by admitting him to the hospital. We'll need to perform a spine radiograph and an MRI to determine what's causing his condition. \nPatient: Okay, thank you.\n[After the tests]\nDoctor: I've reviewed the test results. The spine radiograph showed a slight reduction in the thickness of the L5 soma. The MRI revealed increased T1 post-enhancement signals in the L4-L5 anulus, the opposite end-plates of L4 and L5, the adjacent soft tissues, and the osteolytic area of the L5 pedicle. These findings suggest that your son has Spondylodiscitis with associated osteomyelitis.\nPatient: Oh no! What about his blood tests?\nDoctor: The blood tests showed an increase in the erythrocyte sedimentation rate (77 mm/h) and C-reactive protein (2.17 mg/dL). However, his white blood cell count, procalcitonin serum concentration, and Quantiferon TB-gold test are all within normal ranges. \nPatient: What's the treatment plan for his condition?\nDoctor: We'll start him on broad-spectrum intravenous therapy, which includes meropenem (100 mg/kg/day in three doses) and vancomycin (40 mg/kg/day in three doses). We'll continue the anti-inflammatory treatment for the first week and then stop it once his symptoms have resolved and he can walk normally again.\nPatient: How long will the treatment last?\nDoctor: The treatment may last for a few weeks. We'll closely monitor his progress and adjust the treatment as needed.\nPatient: Alright, thank you, doctor.\n[After 3 weeks of therapy]\nDoctor: I wanted to update you on your son's condition. Unfortunately, he has developed leukopenia with severe neutropenia. His white blood cell count dropped to its lowest at 5410/mm3.\nPatient: Oh no, is there anything we can do?\nDoctor: We'll continue to closely monitor his condition and tailor his treatment as necessary. In the meantime, it's important for you to keep an eye on any changes in his symptoms and report them to us immediately. We'll do our best to help him recover.\nPatient: Thank you, doctor. We appreciate your help. 
Use meropenem (100 mg/kg/day in three doses)
</input>
<output>
```json
[
  {{"entity":"weakness in his lower extremities","type":"SYMPTOM"}},
  {{"entity":"lumbar pain","type":"SYMPTOM"}},
  {{"entity":"upper respiratory tract infection","type":"DIAGNOSIS"}},
  {{"entity":"anti-inflammatory therapy","type":"MEDICATION"}},
  {{"entity":"meropenem","type":"MEDICATION"}},
  {{"entity":"100 mg/kg/day in three doses","type":"DOSAGE"}}
]
```
</output>
</example>

Think step by step.

Text:
<input>
{}
</input>

Your answer in json:
""".format(text)

In [30]:
df = pd.DataFrame({'soap': examples})
df['ner_prompt'] = df['soap'].apply(get_ner_prompt)

df

Unnamed: 0,soap,ner_prompt
0,S: The patient's mother reports that her 13-ye...,\nYou are NER model. Find all named entities i...
1,"S: The patient, a 21-month-old male, presented...",\nYou are NER model. Find all named entities i...
2,"S: Patient reports experiencing fatigue, night...",\nYou are NER model. Find all named entities i...
3,"S: Patient D, a 60-year-old African American m...",\nYou are NER model. Find all named entities i...
4,"S: The patient, a married woman with a 7-year ...",\nYou are NER model. Find all named entities i...
5,"S: The patient, a long-term sufferer of acrome...",\nYou are NER model. Find all named entities i...
6,S: The patient reports a history of abdominal ...,\nYou are NER model. Find all named entities i...
7,"S: The patient, who has been experiencing righ...",\nYou are NER model. Find all named entities i...
8,"S: The patient, with a past history of sigmoid...",\nYou are NER model. Find all named entities i...
9,"S: The patient, with a history of asthma, was ...",\nYou are NER model. Find all named entities i...


In [31]:
def get_ner_model():
    model_name = 'mistralai/Mistral-7B-Instruct-v0.3'

    bnb_config = BitsAndBytesConfig(
        load_in_4bit=True,
        bnb_4bit_quant_type='nf4',
        bnb_4bit_compute_dtype=torch.float16,
    )
    
    model = AutoModelForCausalLM.from_pretrained(
                                            model_name,
                                            torch_dtype=torch.float16,
                                            device_map='auto',
                                            quantization_config=bnb_config,
                                            attn_implementation="flash_attention_2"
                                            ) #Необходимо явно задать параметры, иначе будет другой размер и может не загрузится в видеокарту
    tokenizer = AutoTokenizer.from_pretrained(model_name)

    tokenizer.pad_token = tokenizer.eos_token
    model.config.pad_token_id = model.config.eos_token_id
    tokenizer.padding_side = "left"
    
    model.eval()

    

    return model, tokenizer

In [32]:
def get_ner_answer(model_output: str):
    try:
        index = model_output.rfind("```json")
        if index == -1:
            print(model_output)
            raise Exception("Cannot find ```json tag")
        model_output = model_output[index+7:-4]
        return json.loads(model_output)
    except Exception as e:
        print(model_output)
        print(f'cannot parse json output: {e}')
        return []

In [33]:
#uncomment it on the first run
clean_memory()
ner_model, ner_tokenizer = get_ner_model()

Memory is cleaned


Downloading shards:   0%|          | 0/3 [00:00<?, ?it/s]

Loading checkpoint shards:   0%|          | 0/3 [00:00<?, ?it/s]

In [34]:
clean_memory()

input_text = get_ner_prompt(example)

print_memory()

with torch.inference_mode():

    inputs = ner_tokenizer(input_text, return_tensors='pt').to(ner_model.device)
    
    start = time.time()
    output = ner_model.generate(**inputs, max_new_tokens = 4096, temperature=0.0,)
    end = time.time()
    
    new_tokens = len(output[0]) - len(inputs.input_ids[0])
    tps = new_tokens / (end - start)
    print(f'BF16: new Tokens: {new_tokens}, tokens per second: {tps}')
    
    decoded_text = ner_tokenizer.decode(output[0], skip_special_tokens=True)
    print(decoded_text)
    print(get_ner_answer(decoded_text))

Setting `pad_token_id` to `eos_token_id`:2 for open-end generation.


Memory is cleaned
VRAM allocated 4.164787769317627gb, reserved 4.240234375gb
BF16: new Tokens: 422, tokens per second: 46.61262543793246

You are NER model. Find all named entities in text (between tags <text> and </text).

Allowed types ONLY:
- SYMPTOM: patient-reported symptoms or clinical manifestations (e.g., pain, weakness, fever, inability to walk).
- DIAGNOSIS: diseases/conditions (including suspected/differential diagnoses and infections).
- MEDICATION: specific drug names (e.g., meropenem, vancomycin). (Do NOT label tests or procedures.)
- DOSAGE: medication dose/regimen ONLY (e.g., "100 mg/kg/day", "in three doses", "IV", "once daily"). Must be tied to a MEDICATION in the same sentence/phrase.
- DURATION: time spans or timing of therapy/symptoms (e.g., "for 3 weeks", "after 3 weeks", "first week").
- SIDE_EFFECT: adverse effects/complications of treatment (e.g., leukopenia, neutropenia) when presented as developed during/after therapy.

STRICT EXCLUSIONS (do NOT extract):
- I

In [35]:
clean_memory()

Memory is cleaned


In [36]:
print_memory()

VRAM allocated 4.164787769317627gb, reserved 4.240234375gb


In [37]:
def batch_ner(prompts, model, tokenizer):
    inputs = tokenizer(
        prompts,
        return_tensors='pt',
        padding=True,
        max_length=10240,
        truncation=True
    ).to('cuda')

    out = model.generate(
        **inputs,
        max_new_tokens=1024,
        do_sample=False,
    )

    texts = tokenizer.batch_decode(out, skip_special_tokens=True)
    return texts
    

In [38]:
#SMART BATCHING
def batch_process_dataframe(df, batch_size_max, size_diff_to_batch, model, tokenizer):
    i = 0
    while i < len(df):
        j = i
        idx = []

        batch_size = 0

        while j < len(df):
            if df['ner_prompt_len'][df.index[i]] / float(df['ner_prompt_len'][df.index[j]]) >= size_diff_to_batch:
                batch_size = batch_size + 1
                idx.append(df.index[j])
                j = j + 1
                if batch_size >= batch_size_max:
                    break
            else:
                break

        batch = df.loc[idx]
        sizes = batch['ner_prompt_len'].tolist()
        print(sizes)

        prompts = batch['ner_prompt'].tolist()
        ner_by_local_model = batch_ner(prompts, model, tokenizer)
        ner_by_local_model_obj = [get_ner_answer(i) for i in ner_by_local_model]

        df.loc[idx, "ner_result"] = ner_by_local_model
        df.loc[idx, "ner_parsed"] = pd.Series(ner_by_local_model_obj, index=idx, dtype="object")#ner_by_local_model_obj

        i = j

In [39]:
def get_total_tokens_in_prompt(tokenizer):
    def get_total_tokens_in_prompt(prompt):
        inputs = tokenizer(prompt)
        return len(inputs['input_ids'])
    return get_total_tokens_in_prompt

In [40]:
df['ner_prompt_len'] = df['ner_prompt'].apply(get_total_tokens_in_prompt(ner_tokenizer))
df = df.sort_values(by='ner_prompt_len')
df

Unnamed: 0,soap,ner_prompt,ner_prompt_len
3,"S: Patient D, a 60-year-old African American m...",\nYou are NER model. Find all named entities i...,1744
10,"S: The patient, a postpartum woman with a hist...",\nYou are NER model. Find all named entities i...,1919
0,S: The patient's mother reports that her 13-ye...,\nYou are NER model. Find all named entities i...,1924
1,"S: The patient, a 21-month-old male, presented...",\nYou are NER model. Find all named entities i...,1965
6,S: The patient reports a history of abdominal ...,\nYou are NER model. Find all named entities i...,1973
4,"S: The patient, a married woman with a 7-year ...",\nYou are NER model. Find all named entities i...,1973
7,"S: The patient, who has been experiencing righ...",\nYou are NER model. Find all named entities i...,1989
11,"S: The patient, diagnosed with infantile-onset...",\nYou are NER model. Find all named entities i...,2002
9,"S: The patient, with a history of asthma, was ...",\nYou are NER model. Find all named entities i...,2013
2,"S: Patient reports experiencing fatigue, night...",\nYou are NER model. Find all named entities i...,2020


In [42]:
time_batch_logs = []

In [43]:
start = time.time()
batch_size=1
size_diff_to_batch=0.99
batch_process_dataframe(df, batch_size_max=batch_size, size_diff_to_batch=size_diff_to_batch, model=ner_model, tokenizer=ner_tokenizer)
end = time.time()
log = f'Time with batch {batch_size} and max diff {size_diff_to_batch} for df len {len(df)}: {end-start} seconds.'
print(log)
time_batch_logs.append(log)

start = time.time()
batch_size=2
size_diff_to_batch=0.99
batch_process_dataframe(df, batch_size_max=batch_size, size_diff_to_batch=size_diff_to_batch, model=ner_model, tokenizer=ner_tokenizer)
end = time.time()
log = f'Time with batch {batch_size} and max diff {size_diff_to_batch} for df len {len(df)}: {end-start} seconds.'
print(log)
time_batch_logs.append(log)

start = time.time()
batch_size=5
size_diff_to_batch=0.99
batch_process_dataframe(df, batch_size_max=batch_size, size_diff_to_batch=size_diff_to_batch, model=ner_model, tokenizer=ner_tokenizer)
end = time.time()
log = f'Time with batch {batch_size} and max diff {size_diff_to_batch} for df len {len(df)}: {end-start} seconds.'
print(log)
time_batch_logs.append(log)

start = time.time()
batch_size=7
size_diff_to_batch=0.99
batch_process_dataframe(df, batch_size_max=batch_size, size_diff_to_batch=size_diff_to_batch, model=ner_model, tokenizer=ner_tokenizer)
end = time.time()
log = f'Time with batch {batch_size} and max diff {size_diff_to_batch} for df len {len(df)}: {end-start} seconds.'
print(log)
time_batch_logs.append(log)

Setting `pad_token_id` to `eos_token_id`:2 for open-end generation.


[1744]


Setting `pad_token_id` to `eos_token_id`:2 for open-end generation.


[1919]


Setting `pad_token_id` to `eos_token_id`:2 for open-end generation.


[1924]


Setting `pad_token_id` to `eos_token_id`:2 for open-end generation.


[1965]


Setting `pad_token_id` to `eos_token_id`:2 for open-end generation.


[1973]


Setting `pad_token_id` to `eos_token_id`:2 for open-end generation.


[1973]


Setting `pad_token_id` to `eos_token_id`:2 for open-end generation.


[1989]


Setting `pad_token_id` to `eos_token_id`:2 for open-end generation.


[2002]


Setting `pad_token_id` to `eos_token_id`:2 for open-end generation.


[2013]


Setting `pad_token_id` to `eos_token_id`:2 for open-end generation.


[2020]


Setting `pad_token_id` to `eos_token_id`:2 for open-end generation.


[2024]


Setting `pad_token_id` to `eos_token_id`:2 for open-end generation.


[2035]


Setting `pad_token_id` to `eos_token_id`:2 for open-end generation.


Time with batch 1 and max diff 0.99 for df len 12: 102.31229543685913 seconds.
[1744]


Setting `pad_token_id` to `eos_token_id`:2 for open-end generation.


[1919, 1924]


Setting `pad_token_id` to `eos_token_id`:2 for open-end generation.


[1965, 1973]


Setting `pad_token_id` to `eos_token_id`:2 for open-end generation.


[1973, 1989]


Setting `pad_token_id` to `eos_token_id`:2 for open-end generation.


[2002, 2013]


Setting `pad_token_id` to `eos_token_id`:2 for open-end generation.


[2020, 2024]


Setting `pad_token_id` to `eos_token_id`:2 for open-end generation.


[2035]


Setting `pad_token_id` to `eos_token_id`:2 for open-end generation.


Time with batch 2 and max diff 0.99 for df len 12: 223.7220959663391 seconds.
[1744]


Setting `pad_token_id` to `eos_token_id`:2 for open-end generation.


[1919, 1924]


Setting `pad_token_id` to `eos_token_id`:2 for open-end generation.


[1965, 1973, 1973]


Setting `pad_token_id` to `eos_token_id`:2 for open-end generation.


[1989, 2002]


Setting `pad_token_id` to `eos_token_id`:2 for open-end generation.


[2013, 2020, 2024]


Setting `pad_token_id` to `eos_token_id`:2 for open-end generation.


[2035]


Setting `pad_token_id` to `eos_token_id`:2 for open-end generation.


Time with batch 5 and max diff 0.99 for df len 12: 185.73879194259644 seconds.
Time with batch 5 and max diff 0.99 for df len 12: 185.73879194259644 seconds.
[1744]


Setting `pad_token_id` to `eos_token_id`:2 for open-end generation.


[1919, 1924]


Setting `pad_token_id` to `eos_token_id`:2 for open-end generation.


[1965, 1973, 1973]


Setting `pad_token_id` to `eos_token_id`:2 for open-end generation.


[1989, 2002]


Setting `pad_token_id` to `eos_token_id`:2 for open-end generation.


[2013, 2020, 2024]


Setting `pad_token_id` to `eos_token_id`:2 for open-end generation.


[2035]
Time with batch 7 and max diff 0.99 for df len 12: 184.47819900512695 seconds.


In [45]:
for log in time_batch_logs:
    print(log)

Time with batch 1 and max diff 0.99 for df len 12: 102.31229543685913 seconds.
Time with batch 2 and max diff 0.99 for df len 12: 223.7220959663391 seconds.
Time with batch 5 and max diff 0.99 for df len 12: 185.73879194259644 seconds.
Time with batch 7 and max diff 0.99 for df len 12: 184.47819900512695 seconds.


In [46]:
df

Unnamed: 0,soap,ner_prompt,ner_prompt_len,ner_result,ner_parsed
3,"S: Patient D, a 60-year-old African American m...",\nYou are NER model. Find all named entities i...,1744,\nYou are NER model. Find all named entities i...,"[{'entity': 'Patient D', 'type': 'PATIENT'}, {..."
10,"S: The patient, a postpartum woman with a hist...",\nYou are NER model. Find all named entities i...,1919,\nYou are NER model. Find all named entities i...,"[{'entity': 'postpartum woman', 'type': 'PATIE..."
0,S: The patient's mother reports that her 13-ye...,\nYou are NER model. Find all named entities i...,1924,\nYou are NER model. Find all named entities i...,"[{'entity': 'attention deficit disorder', 'typ..."
1,"S: The patient, a 21-month-old male, presented...",\nYou are NER model. Find all named entities i...,1965,\nYou are NER model. Find all named entities i...,"[{'entity': '21-month-old male', 'type': 'PATI..."
6,S: The patient reports a history of abdominal ...,\nYou are NER model. Find all named entities i...,1973,\nYou are NER model. Find all named entities i...,"[{'entity': 'abdominal pain', 'type': 'SYMPTOM..."
4,"S: The patient, a married woman with a 7-year ...",\nYou are NER model. Find all named entities i...,1973,\nYou are NER model. Find all named entities i...,"[{'entity': '7-year history of infertility', '..."
7,"S: The patient, who has been experiencing righ...",\nYou are NER model. Find all named entities i...,1989,\nYou are NER model. Find all named entities i...,"[{'entity': 'right ankle swelling', 'type': 'S..."
11,"S: The patient, diagnosed with infantile-onset...",\nYou are NER model. Find all named entities i...,2002,\nYou are NER model. Find all named entities i...,[{'entity': 'infantile-onset hypophosphatasia'...
9,"S: The patient, with a history of asthma, was ...",\nYou are NER model. Find all named entities i...,2013,\nYou are NER model. Find all named entities i...,"[{'entity': 'asthma', 'type': 'DIAGNOSIS'}, {'..."
2,"S: Patient reports experiencing fatigue, night...",\nYou are NER model. Find all named entities i...,2020,\nYou are NER model. Find all named entities i...,"[{'entity': 'fatigue', 'type': 'SYMPTOM'}, {'e..."


In [47]:
print(df['ner_parsed'][3])

[{'entity': 'Patient D', 'type': 'PATIENT'}, {'entity': '60-year-old African American male', 'type': 'PATIENT'}, {'entity': 'prostate cancer', 'type': 'DIAGNOSIS'}, {'entity': 'family history significant for prostate cancer', 'type': 'DIAGNOSIS'}, {'entity': '62-year-old brother', 'type': 'PATIENT'}, {'entity': 'radiation', 'type': 'TREATMENT'}, {'entity': 'increased risk for prostate cancer', 'type': 'RISK'}, {'entity': 'ethnicity and family history (first-degree relative diagnosed before age 65)', 'type': 'RISK'}, {'entity': 'PSA testing', 'type': 'TEST'}, {'entity': 'varying recommendations from different health organizations', 'type': 'DIAGNOSIS'}, {'entity': 'detailed conversation about PSA testing', 'type': 'PLAN'}, {'entity': 'research further and prepare any questions for a follow-up discussion', 'type': 'PLAN'}, {'entity': 'report any new symptoms or concerns in the interim', 'type': 'PLAN'}]


In [85]:
llm = ChatOpenAI(
    api_key=os.environ['API_KEY'],
    base_url=os.environ['API_BASE_URL'],
    temperature=0.0,
    model='qwen-3-32b'
)
golden_parse_iter = 0

In [89]:
def get_golden_ner_value(query:str):
    global golden_parse_iter
    golden_parse_iter = golden_parse_iter + 1
    start = time.time()
    llm_resp = (ChatPromptTemplate.from_messages([HumanMessage(query)]) | llm | StrOutputParser()).invoke({})
    obj = get_ner_answer(llm_resp)
    end = time.time()
    print(f'Calculating golden result iteration {golden_parse_iter} for time {end - start}')
    return obj

In [90]:
get_golden_ner_value(df['ner_prompt'][3])

Calculating golden result iteration 2 for time 7.536638259887695


[{'entity': 'prostate cancer', 'type': 'DIAGNOSIS'},
 {'entity': 'PSA testing', 'type': 'MEDICATION'}]

In [91]:
golden_parse_iter = 0
df["ner_golden_parsed"] = df['ner_prompt'].apply(get_golden_ner_value)


Calculating golden result iteration 1 for time 0.7592453956604004
Calculating golden result iteration 2 for time 3.228048086166382
Calculating golden result iteration 3 for time 2.8805975914001465
Calculating golden result iteration 4 for time 2.850613594055176
Calculating golden result iteration 5 for time 1.23984694480896
Calculating golden result iteration 6 for time 4.726486682891846
Calculating golden result iteration 7 for time 3.495180606842041
Calculating golden result iteration 8 for time 5.304432153701782
Calculating golden result iteration 9 for time 4.653918743133545
Calculating golden result iteration 10 for time 4.041796445846558
Calculating golden result iteration 11 for time 3.014960527420044
Calculating golden result iteration 12 for time 3.007314920425415


In [93]:
df

Unnamed: 0,soap,ner_prompt,ner_prompt_len,ner_result,ner_parsed,ner_golden_parsed
3,"S: Patient D, a 60-year-old African American m...",\nYou are NER model. Find all named entities i...,1744,\nYou are NER model. Find all named entities i...,"[{'entity': 'Patient D', 'type': 'PATIENT'}, {...","[{'entity': 'prostate cancer', 'type': 'DIAGNO..."
10,"S: The patient, a postpartum woman with a hist...",\nYou are NER model. Find all named entities i...,1919,\nYou are NER model. Find all named entities i...,"[{'entity': 'postpartum woman', 'type': 'PATIE...","[{'entity': 'postpartum woman', 'type': 'DIAGN..."
0,S: The patient's mother reports that her 13-ye...,\nYou are NER model. Find all named entities i...,1924,\nYou are NER model. Find all named entities i...,"[{'entity': 'attention deficit disorder', 'typ...","[{'entity': 'speech and developmental delays',..."
1,"S: The patient, a 21-month-old male, presented...",\nYou are NER model. Find all named entities i...,1965,\nYou are NER model. Find all named entities i...,"[{'entity': '21-month-old male', 'type': 'PATI...",[{'entity': 'weakness in his lower extremities...
6,S: The patient reports a history of abdominal ...,\nYou are NER model. Find all named entities i...,1973,\nYou are NER model. Find all named entities i...,"[{'entity': 'abdominal pain', 'type': 'SYMPTOM...","[{'entity': 'abdominal pain', 'type': 'SYMPTOM..."
4,"S: The patient, a married woman with a 7-year ...",\nYou are NER model. Find all named entities i...,1973,\nYou are NER model. Find all named entities i...,"[{'entity': '7-year history of infertility', '...","[{'entity': 'irregular menstruation', 'type': ..."
7,"S: The patient, who has been experiencing righ...",\nYou are NER model. Find all named entities i...,1989,\nYou are NER model. Find all named entities i...,"[{'entity': 'right ankle swelling', 'type': 'S...","[{'entity': 'right ankle swelling', 'type': 'S..."
11,"S: The patient, diagnosed with infantile-onset...",\nYou are NER model. Find all named entities i...,2002,\nYou are NER model. Find all named entities i...,[{'entity': 'infantile-onset hypophosphatasia'...,"[{'entity': 'recurrent pneumonia', 'type': 'SY..."
9,"S: The patient, with a history of asthma, was ...",\nYou are NER model. Find all named entities i...,2013,\nYou are NER model. Find all named entities i...,"[{'entity': 'asthma', 'type': 'DIAGNOSIS'}, {'...","[{'entity': 'unresponsive', 'type': 'SYMPTOM'}..."
2,"S: Patient reports experiencing fatigue, night...",\nYou are NER model. Find all named entities i...,2020,\nYou are NER model. Find all named entities i...,"[{'entity': 'fatigue', 'type': 'SYMPTOM'}, {'e...","[{'entity': 'fatigue', 'type': 'SYMPTOM'}, {'e..."
