<a href="https://colab.research.google.com/github/Gutzu-Encoder/Anemia-guideline-based-using-Tool-calling-LLM/blob/main/anemia_assistant.ipynb" target="_parent"><img src="https://colab.research.google.com/assets/colab-badge.svg" alt="Open In Colab"/></a>

In [None]:
!pip install -q transformers accelerate bitsandbytes sentencepiece
import torch
print(torch.cuda.get_device_name(0) if torch.cuda.is_available() else "No GPU")
import torch
from transformers import AutoTokenizer, AutoModelForCausalLM, pipeline

MODEL_NAME = "Qwen/Qwen2.5-1.5B-Instruct"

tokenizer = AutoTokenizer.from_pretrained(MODEL_NAME)

model = AutoModelForCausalLM.from_pretrained(
    MODEL_NAME,
    device_map="auto",        # automatically uses GPU
    torch_dtype=torch.float16 # faster + lower memory
)

llm = pipeline(
    "text-generation",
    model=model,
    tokenizer=tokenizer,
    max_new_tokens=256,
    temperature=0.3
)
def llm_generate(prompt: str) -> str:
    output = llm(prompt)[0]["generated_text"]
    return output[len(prompt):].strip()
test = llm_generate("Return JSON: {\"ok\": true}")
print(test)



In [None]:
router_system = """
you are a clinical assistant. Your task is to do lab and history extraction.
Extract values exactly as written in text.
If value is not explicitly written in text, return null. Do NOT guess.
Do NOT modify numbers.
Do NOT correct abnormal or impossible values.
Copy numbers EXACTLY as written. Even if medically impossible.
Extract these extract values from patient findings:
- mcv(mean corpuscular volume): look for mcv or MCV with numbers behind it, output it in float
- reticulocyte_count: look for "Reticulocyte Count", 'reticulocyte count'. 'retic' followed by a number.
- megaloblastic: 1 if mentioned "megaloblastic" cells present in the finding of microscopic. 2 if there is a micropscipic infomation but not shown "megaloblastic" cell. 0 if there's no microscopic infomation.
- CD_history:  1 if mentioned past chonic disease is present in the finding. 2 if finding tell patient "Do not have chronic disease" . 0 if there's no infomation about past chronic disease.
- lead_histroy: 1 if mentioned "working in factory" is present in the finding. 2 if finding tell patient "Do not have experiecne near factory" . 0 if there's no infomation about patient having close contact in factory.
Return only in JSON matching this exact structure. No explanation. """
Router_scheme = """ REQUIRED OUTPUT FORMAT( copy this structure exactly):
{
  "mcv": <number or null>,
  "reticulocyte_count": <number or null>,
  "megaloblastic": <number or null>,
  "CD_history": <number or null>,
  "lead_histroy": <number or null>,
}
Example:
EXAMPLES (study these carefully):

Input: "patient retic 2, mcv 500, no history of chronic disease"
Think: I see "retic 2" so reticulocyte_count = 2 (exact copy)
Think: I see "mcv 500" so mcv = 500 (exact copy, even though medically impossible)
Think: I see "no history of chronic disease" so CD_history = 2
Think: No microscopic info mentioned so megaloblastic = 0
Think: No factory info mentioned so lead_history = 0
Output: {"mcv": 500, "reticulocyte_count": 2, "megaloblastic": 0, "CD_history": 2, "lead_history": 0}

Input: "MCV 78.5, reticulocyte count 3.2, microscopy shows megaloblastic cells"
Think: I see "MCV 78.5" so mcv = 78.5 (exact copy including decimal)
Think: I see "reticulocyte count 3.2" so reticulocyte_count = 3.2 (exact copy)
Think: I see "megaloblastic cells" so megaloblastic = 1
Think: No chronic disease info so CD_history = 0
Think: No factory info so lead_history = 0
Output: {"mcv": 78.5, "reticulocyte_count": 3.2, "megaloblastic": 1, "CD_history": 0, "lead_history": 0}

Input: "patient works in factory, mcv 1000, retic 15"
Think: I see "mcv 1000" so mcv = 1000 (exact copy, even though impossible)
Think: I see "retic 15" so reticulocyte_count = 15 (exact copy)
Think: I see "works in factory" so lead_history = 1
Think: No microscopic info so megaloblastic = 0
Think: No chronic disease info so CD_history = 0
Output: {"mcv": 1000, "reticulocyte_count": 15, "megaloblastic": 0, "CD_history": 0, "lead_history": 1}

Input: "hemoglobin 8.2, platelets normal"
Think: I don't see mcv mentioned so mcv = null
Think: I don't see reticulocyte mentioned so reticulocyte_count = null
Think: No microscopic info so megaloblastic = 0
Think: No chronic disease info so CD_history = 0
Think: No factory info so lead_history = 0
Output: {"mcv": null, "reticulocyte_count": null, "megaloblastic": 0, "CD_history": 0, "lead_history": 0}

Now extract from this input. Show your thinking step by step, then output JSON.
"""

In [None]:
import json
def anemia_router(finding: str) -> str:
  prompt = f"""
  {router_system}
  {Router_scheme}
  Input: {finding}
  Output:
  """.strip()
  response = llm_generate(prompt)
  response = response.replace("```json", "").replace("```", "").strip()
  start = response.rfind("{")
  end = response.rfind("}")
  if start != -1 and end != -1 and end > start:
    response = response[start:end+1]
  try:
    return json.loads(response)
  except Exception:
    return {
        "error": "invalid_json",
        "raw_output": response
    }

In [None]:
x = "patient retic 2, mcv 500, no history of chronic disease"
anemia_router(x)

In [None]:
test_cases = [
    "patient retic 2, mcv 500, no history of chronic disease",
    "MCV 78.5, reticulocyte count 3.2, microscopy shows megaloblastic cells",
    "patient works in factory, mcv 1000, retic 15",
    "mcv 92.3, patient has diabetes",
]

for i, test in enumerate(test_cases, 1):
    print(f"\n{'='*60}")
    print(f"Test {i}: {test}")
    print(f"{'='*60}")
    result = anemia_router(test)
    print(f"Result: {json.dumps(result, indent=2)}")

In [None]:
def anemia_logic_tool(data: dict) -> dict:
    mcv = data.get("mcv", None)
    retic = data.get("reticulocyte_count", None)
    meg = data.get("megaloblastic", None)  # 0/1/2
    def missing(*vals):
        return any(v is None for v in vals)
    if mcv is None:
        return {"label": "need_more_data", "reason": "missing mcv"}
    if mcv < 80:
        if retic is None:
            return {"label": "microcytic_need_retic", "reason": "mcv < 80 but missing reticulocyte_count"}
        if retic > 2.5:
            return {"label": "thalassemia", "reason": "mcv < 80 and retic > 2.5"}
        elif retic == 2.5:
            return {"label": "sideroblastic_or_anemia_of_chronic_disease", "reason": "mcv < 80 and retic == 2.5"}
        else:
            return {"label": "iron_deficiency", "reason": "mcv < 80 and retic < 2.5"}
    if 80 <= mcv <= 100:
        if retic is None:
            return {"label": "normocytic_need_retic", "reason": "80<=mcv<=100 but missing reticulocyte_count"}
        if retic > 2.5:
            return {"label": "hemolysis_or_acute_blood_loss", "reason": "normocytic and retic > 2.5"}
        elif retic == 2.5:
            return {"label": "mixed_or_borderline", "reason": "normocytic and retic == 2.5"}
        else:
            return {"label": "anemia_of_chronic_disease_or_renal_or_bone_marrow", "reason": "normocytic and retic < 2.5"}
    if mcv > 100:
        if meg == 1:
            return {"label": "b12_or_folate_deficiency", "reason": "mcv > 100 and megaloblastic present"}
        if meg == 2:
            return {"label": "non_megaloblastic_macrocytic", "reason": "mcv > 100 and not megaloblastic"}
        return {"label": "macrocytic_need_smear", "reason": "mcv > 100 but no microscopic info"}
    return {"label": "unclassified", "reason": "no rule matched"}


In [None]:
print(anemia_logic_tool({"mcv": 72, "reticulocyte_count": 1.5, "megaloblastic": 0}))


In [None]:
patient_text = "patient reticulocyte 2, mcv 75, no microscopic info"

final_result = anemia_logic_tool(anemia_router(patient_text))

print(final_result)


In [None]:
import gradio as gr

def run_anemia(text):
    extracted = anemia_router(text)
    result = anemia_logic_tool(extracted)
    return result

gr.Interface(
    fn=run_anemia,
    inputs="textbox",
    outputs="json",
    title="Anemia Helper"
).launch()


In [None]:
!ls
