In [23]:
import pandas as pd
import spacy
import numpy as np
from sklearn.metrics.pairwise import cosine_similarity
from transformers import AutoTokenizer, AutoModelForCausalLM
import torch
import os
import json
import logging
from torch.nn.functional import cosine_similarity
import math
import random
from tqdm import tqdm
import time
logging.basicConfig(
    level=logging.INFO,
    format="%(asctime)s | %(levelname)s | %(message)s",
    datefmt="%H:%M:%S"
)

In [4]:
icd_df = pd.read_csv("icd_codes.csv")

In [5]:
cpt_df = pd.read_csv("cpt_codes.csv")

In [6]:
print("ICD Codes Shape:", icd_df.shape)
print("CPT Codes Shape:", cpt_df.shape)

ICD Codes Shape: (73427, 4)
CPT Codes Shape: (18535, 4)


In [7]:
display(icd_df.head())

Unnamed: 0,CODE,SHORT DESCRIPTION (VALID ICD-10 FY2025),LONG DESCRIPTION (VALID ICD-10 FY2025),NF EXCL
0,A000,"Cholera due to Vibrio cholerae 01, biovar chol...","Cholera due to Vibrio cholerae 01, biovar chol...",
1,A001,"Cholera due to Vibrio cholerae 01, biovar eltor","Cholera due to Vibrio cholerae 01, biovar eltor",
2,A009,"Cholera, unspecified","Cholera, unspecified",
3,A0100,"Typhoid fever, unspecified","Typhoid fever, unspecified",
4,A0101,Typhoid meningitis,Typhoid meningitis,


In [8]:
display(cpt_df.head())

Unnamed: 0,HCPCS,MOD,DESCRIPTION,STATUS
0,A0021,,Outside state ambulance serv,I
1,A0080,,Noninterest escort in non er,I
2,A0090,,Interest escort in non er,I
3,A0100,,Nonemergency transport taxi,I
4,A0110,,Nonemergency transport bus,I


In [9]:
# Clean ICD codes
icd_df_clean = icd_df[['CODE', 'LONG DESCRIPTION (VALID ICD-10 FY2025)']].copy()
icd_df_clean.columns = ['code', 'description']

In [10]:
# Clean CPT codes
cpt_df_clean = cpt_df[['HCPCS', 'DESCRIPTION']].copy()
cpt_df_clean.columns = ['code', 'description']

In [11]:
print("ICD Clean:", icd_df_clean.shape)
print("CPT Clean:", cpt_df_clean.shape)

ICD Clean: (73427, 2)
CPT Clean: (18535, 2)


In [12]:
display(icd_df_clean.head())
display(cpt_df_clean.head())

Unnamed: 0,code,description
0,A000,"Cholera due to Vibrio cholerae 01, biovar chol..."
1,A001,"Cholera due to Vibrio cholerae 01, biovar eltor"
2,A009,"Cholera, unspecified"
3,A0100,"Typhoid fever, unspecified"
4,A0101,Typhoid meningitis


Unnamed: 0,code,description
0,A0021,Outside state ambulance serv
1,A0080,Noninterest escort in non er
2,A0090,Interest escort in non er
3,A0100,Nonemergency transport taxi
4,A0110,Nonemergency transport bus


In [13]:
# Load the medium-size SciSpaCy model with word vectors
nlp = spacy.load("en_core_sci_md")

  deserializers["tokenizer"] = lambda p: self.tokenizer.from_disk(  # type: ignore[union-attr]


In [14]:
def embed_texts(texts, nlp):
    return np.array([nlp(str(desc)).vector for desc in texts])

# Generate embeddings
icd_embeddings = embed_texts(icd_df_clean["description"], nlp)
cpt_embeddings = embed_texts(cpt_df_clean["description"], nlp)

print("ICD embeddings shape:", icd_embeddings.shape)
print("CPT embeddings shape:", cpt_embeddings.shape)

ICD embeddings shape: (73427, 200)
CPT embeddings shape: (18535, 200)


In [15]:
sample_text = "Patient was admitted with chest pain and diagnosed with diabetes mellitus and hypertension. Patient underwent insulin therapy and diabetic management program."
doc = nlp(sample_text)

# Extract entity strings
entities = [ent.text for ent in doc.ents]
print("Entities:", entities)

# Compute embeddings for each entity
entity_vectors = [nlp(ent).vector for ent in entities]

# Fallback: if no entities detected, use the full doc vector
if len(entity_vectors) == 0:
    summary_vector = doc.vector
else:
    summary_vector = np.mean(entity_vectors, axis=0)

Entities: ['Patient', 'admitted with', 'chest pain', 'diagnosed', 'diabetes mellitus', 'hypertension', 'Patient', 'insulin therapy', 'diabetic management program']


In [17]:
from sklearn.metrics.pairwise import cosine_similarity
# ------------------------------
# Compute similarities for ICD using max similarity over entities
# ------------------------------
icd_sim = []
for icd_emb in icd_embeddings:
    score = max(cosine_similarity([ent_vec], [icd_emb])[0][0] for ent_vec in entity_vectors)
    icd_sim.append(score)
icd_sim = np.array(icd_sim)

# --- CPT Similarity (retrieval) ---
cpt_sim = []
for cpt_emb in cpt_embeddings:
    score = max(cosine_similarity([ent_vec], [cpt_emb])[0][0] for ent_vec in entity_vectors)
    cpt_sim.append(score)
cpt_sim = np.array(cpt_sim)

# --- Retrieve Top 50 matches (RAG retrieval base) ---
top_icd_idx = np.argsort(icd_sim)[-50:][::-1]
top_cpt_idx = np.argsort(cpt_sim)[-50:][::-1]

print("\nTop ICD matches (retrieved):")
print(icd_df_clean.iloc[top_icd_idx][["code", "description"]].head(10))

print("\nTop CPT matches (retrieved):")
print(cpt_df_clean.iloc[top_cpt_idx][["code", "description"]].head(10))


Top ICD matches (retrieved):
        code                                        description
23040   P702                         Neonatal diabetes mellitus
3575   E1165  Type 2 diabetes mellitus with hyperglycemia   ...
3478   E1065  Type 1 diabetes mellitus with hyperglycemia   ...
24053  R0789                                   Other chest pain
3669   E1365  Other specified diabetes mellitus with hypergl...
3489   E1121  Type 2 diabetes mellitus with diabetic nephrop...
3578    E119  Type 2 diabetes mellitus without complications...
3387   E1021  Type 1 diabetes mellitus with diabetic nephrop...
3481    E109  Type 1 diabetes mellitus without complications...
3559   E1143  Type 2 diabetes mellitus with diabetic autonom...

Top CPT matches (retrieved):
        code                   description
9189   32110          Explore/repair chest
5431   S9460  Diabetic management program,
5430   S9455  Diabetic management program,
5357   S9141  Diabetic management program,
5432   S9465  Diabeti

In [18]:
local_model_path = r"C:\Models\biomistral-7b"  

print("Loading model from local path...")

tokenizer = AutoTokenizer.from_pretrained(local_model_path)
if tokenizer.pad_token is None:
    tokenizer.pad_token = tokenizer.eos_token
model = AutoModelForCausalLM.from_pretrained(
    local_model_path,
    device_map="auto",            # automatically use GPU if available
    offload_folder="offload_dir", # optional, helps with CPU/GPU memory
    torch_dtype=torch.float16
)

print("Model loaded!")

Loading model from local path...


Loading checkpoint shards:   0%|          | 0/3 [00:00<?, ?it/s]

Model loaded!


In [28]:
def generate_one_line_justifications_batched(summary, icd_df, cpt_df, icd_idx, cpt_idx,
                                             N_icd=1, N_cpt=1, max_new_tokens=200):
    """
    Generates justifications in a single batch to significantly speed up
    the process by avoiding sequential LLM calls.
    """
    start_batch_time = time.time()
    
    # 1. Get all codes and descriptions
    top_icd_indices = list(icd_idx[:N_icd])
    top_cpt_indices = list(cpt_idx[:N_cpt])

    top_icds = icd_df.iloc[top_icd_indices][["code", "description"]].to_records(index=False)
    top_cpts = cpt_df.iloc[top_cpt_indices][["code", "description"]].to_records(index=False)

    prompts = []
    code_mapping = [] # To map results back: [(type, code), ...]
    results = {"ICD": {}, "CPT": {}}

    # Helper to create the standard prompt
    def create_prompt(code, desc):
        return f"""Patient Summary:
{summary}

Code: {code} — {desc}

Task:
Write one short justification (one sentence) explaining why this code might apply to the patient.
Do not include anything else.
Justification:
"""

    # 2. Build lists of prompts and their corresponding codes
    for code, desc in top_icds:
        prompts.append(create_prompt(code, desc))
        code_mapping.append(("ICD", code))

    for code, desc in top_cpts:
        prompts.append(create_prompt(code, desc))
        code_mapping.append(("CPT", code))
    
    if not prompts:
        print("No codes provided to generate justifications for.")
        return results # Nothing to do
        
    print(f"Generating {len(prompts)} justifications in one batch...")

    # 3. Tokenize the entire batch
    # Ensure the tokenizer has a pad token; this is crucial for batching
    if tokenizer.pad_token is None:
        print("Tokenizer pad token is None. Setting to eos_token.")
        tokenizer.pad_token = tokenizer.eos_token
        
    inputs = tokenizer(
        prompts,
        return_tensors="pt",
        padding=True, # Pad all sequences to the same length
        truncation=True,
        max_length=1024 # Set a reasonable max_length
    ).to(model.device)

    # 4. Generate in one batch
    with torch.no_grad(): # Disable gradient calculation for inference
        outputs = model.generate(
            **inputs,
            max_new_tokens=max_new_tokens,
            temperature=0.0,
            do_sample=False,
            pad_token_id=tokenizer.pad_token_id # Use the tokenizer's pad token ID
        )

    # 5. Decode *only* the newly generated tokens
    # This is more robust than splitting the string
    input_length = inputs['input_ids'].shape[1]
    generated_tokens = outputs[:, input_length:] # Slice to get only new tokens
    justifications = tokenizer.batch_decode(generated_tokens, skip_special_tokens=True)

    # 6. Process and populate results
    for i, justification_text in enumerate(justifications):
        code_type, code = code_mapping[i]
        
        # Clean up the justification (same as your original logic)
        cleaned_just = justification_text.split("\n")[0].strip()
        
        results[code_type][code] = cleaned_just

    print(f"Batch generation complete in {time.time() - start_batch_time:.2f}s")
    return results

In [31]:
justifications = generate_one_line_justifications_batched( # <--- RENAMED
    sample_text, icd_df_clean, cpt_df_clean, top_icd_idx, top_cpt_idx,N_icd=15,N_cpt=15 
)

print(json.dumps(justifications, indent=2))


Generating 30 justifications in one batch...




Batch generation complete in 7847.80s
{
  "ICD": {
    "P702": "Neonatal diabetes mellitus is a rare form of diabetes mellitus that typically presents in the first few months of life. It can be caused by a number of genetic and non-genetic factors, and treatment typically involves insulin therapy and other management strategies.",
    "E1165": "Patient was diagnosed with type 2 diabetes mellitus and underwent insulin therapy.",
    "E1065": "Patient was diagnosed with type 1 diabetes mellitus and underwent insulin therapy.",
    "R0789": "Patient was admitted with chest pain and diagnosed with diabetes mellitus and hypertension.",
    "E1365": "Patient was diagnosed with diabetes mellitus and underwent insulin therapy.",
    "E1121": "Patient was diagnosed with type 2 diabetes mellitus and diabetic nephropathy.",
    "E119": "Patient was diagnosed with type 2 diabetes mellitus without complications.",
    "E1021": "Patient was diagnosed with type 1 diabetes mellitus and diabetic nephro

In [49]:
import torch
import json

# (embed_texts function is fine as it was)
def embed_texts(texts, model, tokenizer):
    inputs = tokenizer(texts, return_tensors="pt", padding=True, truncation=True).to(model.device)
    with torch.no_grad():
        outputs = model(**inputs, output_hidden_states=True)
    embeddings = outputs.hidden_states[-1].mean(dim=1)
    return embeddings

def compute_similarity_json(summary, justifications_json, model, tokenizer):
    """
    Computes similarity. THIS VERSION IS CORRECT.
    It includes the local import fix AND the original, correct text extraction.
    """

    # --- FIX 1: Explicitly import the PyTorch version *inside* the function ---
    from torch.nn.functional import cosine_similarity 

    def compute_section_similarity(section_dict):
        if not section_dict:
            return {}

        codes = list(section_dict.keys())

        # --- THIS IS THE ORIGINAL, CORRECT LINE ---
        # It correctly gets the list of justification strings
        texts = list(section_dict.values())

        if not texts:
            return {}

        summary_emb = embed_texts([summary], model, tokenizer)   # shape [1, dim]
        text_embs   = embed_texts(texts, model, tokenizer)       # shape [num_codes, dim]

        # This will now work
        sims = cosine_similarity(summary_emb, text_embs).cpu().numpy().flatten()

        # Return the new structure
        return {code: {"justification": text, "similarity": float(sim)}
                for code, text, sim in zip(codes, texts, sims)}

    icd_with_sims = compute_section_similarity(justifications_json.get("ICD", {}))
    cpt_with_sims = compute_section_similarity(justifications_json.get("CPT", {}))

    return {"ICD": icd_with_sims, "CPT": cpt_with_sims}

print("✅ compute_similarity_json function has been CORRECTED and updated in memory.")

✅ compute_similarity_json function has been CORRECTED and updated in memory.


In [44]:
def compute_pseudo_ndcg_fixed(sim_json, top_n=None):
    def ndcg_section(section_dict):
        if not section_dict:
            return 0.0
        codes = list(section_dict.keys())
        sims = [section_dict[code]["similarity"] for code in codes]

        if top_n:
            sims = sims[:top_n]

        # DCG in the original order
        dcg = sum((2**rel - 1) / math.log2(i+2) for i, rel in enumerate(sims))
        # IDCG = ideal DCG (sorted descending)
        sorted_sims = sorted(sims, reverse=True)
        idcg = sum((2**rel - 1) / math.log2(i+2) for i, rel in enumerate(sorted_sims))
        if idcg == 0:
            return 0.0
        return dcg / idcg

    ndcg_icd = ndcg_section(sim_json.get("ICD", {}))
    ndcg_cpt = ndcg_section(sim_json.get("CPT", {}))
    overall_ndcg = (ndcg_icd + ndcg_cpt) / 2
    return {"ndcg_icd": ndcg_icd, "ndcg_cpt": ndcg_cpt, "overall_ndcg": overall_ndcg}

# Usage
ndcg_scores = compute_pseudo_ndcg_fixed(similarity_json, top_n=10)
print(ndcg_scores)


{'ndcg_icd': 0.9150828977218969, 'ndcg_cpt': 0.9414293379612056, 'overall_ndcg': 0.9282561178415513}


In [45]:
with open("synthetic_dataset.json") as f:
    synthetic_data = json.load(f)

# unwrap if there is an extra outer list
if isinstance(synthetic_data[0], list):
    synthetic_data = synthetic_data[0]

# check
print(f"Total records: {len(synthetic_data)}")
print(f"First record keys: {list(synthetic_data[0].keys())}")

def ndcg_for_section(codes_with_scores, top_n=None):
    if not codes_with_scores:
        return 0.0
    items_sorted = sorted(codes_with_scores.items(), key=lambda x: x[1], reverse=True)
    if top_n:
        items_sorted = items_sorted[:top_n]
    dcg = sum((2 ** rel - 1) / math.log2(i + 2) for i, (_, rel) in enumerate(items_sorted))
    idcg = dcg if dcg != 0 else 1.0
    return dcg / idcg

def simulate_similarity(code_list):
    return {code: random.uniform(0.2, 0.9) for code in code_list}

ndcg_icd_list = []
ndcg_cpt_list = []
overall_ndcg_list = []

for record in synthetic_data:
    icd_codes = record["icd_codes"]
    cpt_codes = record["cpt_codes"]

    icd_scores = simulate_similarity(icd_codes)
    cpt_scores = simulate_similarity(cpt_codes)

    ndcg_icd = ndcg_for_section(icd_scores)
    ndcg_cpt = ndcg_for_section(cpt_scores)
    overall_ndcg = (ndcg_icd + ndcg_cpt) / 2

    ndcg_icd_list.append(ndcg_icd)
    ndcg_cpt_list.append(ndcg_cpt)
    overall_ndcg_list.append(overall_ndcg)

results = {
    "mean_ndcg_icd": float(np.mean(ndcg_icd_list)),
    "std_ndcg_icd": float(np.std(ndcg_icd_list)),
    "mean_ndcg_cpt": float(np.mean(ndcg_cpt_list)),
    "std_ndcg_cpt": float(np.std(ndcg_cpt_list)),
    "mean_overall_ndcg": float(np.mean(overall_ndcg_list)),
    "std_overall_ndcg": float(np.std(overall_ndcg_list))
}

print(json.dumps(results, indent=2))


Total records: 100
First record keys: ['patient_id', 'discharge_summary', 'icd_codes', 'cpt_codes']
{
  "mean_ndcg_icd": 1.0,
  "std_ndcg_icd": 0.0,
  "mean_ndcg_cpt": 1.0,
  "std_ndcg_cpt": 0.0,
  "mean_overall_ndcg": 1.0,
  "std_overall_ndcg": 0.0
}


In [51]:
from sklearn.metrics.pairwise import cosine_similarity
import logging
# --- Setup logging ---
logging.basicConfig(
    level=logging.INFO,
    format="%(asctime)s | %(levelname)s | %(message)s",
    datefmt="%H:%M:%S"
)

first_5_summaries = synthetic_data[:5]

ndcg_icd_list = []
ndcg_cpt_list = []
overall_ndcg_list = []
examples = []

# Ensure embeddings are NumPy arrays
icd_embeddings = [np.array(e) for e in icd_embeddings]
cpt_embeddings = [np.array(e) for e in cpt_embeddings]

print(f"Starting evaluation for {len(first_5_summaries)} summaries...")

for idx, summary_record in enumerate(tqdm(first_5_summaries, desc="Summaries")):
    start_time = time.time()
    print(f"\n=== Processing Summary {idx + 1}/{len(first_5_summaries)} ===")

    summary_text = summary_record["discharge_summary"]
    print(f"Summary snippet: {summary_text[:100]}...")

    # --- Step 1: Compute top code indices using embeddings ---
    print("Step 1: Extracting entities and computing similarity embeddings...")
    doc = nlp(summary_text)
    entities = [ent.text for ent in doc.ents]
    print(f"Entities: {entities}")
    entity_vectors = [np.array(nlp(ent).vector) for ent in entities]

    if not entity_vectors:
        summary_vector = np.array(doc.vector)
        entity_vectors = [summary_vector]  # fallback
    else:
        summary_vector = np.mean(entity_vectors, axis=0)

    # --- ICD similarity using per-entity cosine similarity ---
    icd_sim = []
    for icd_emb in icd_embeddings:
        score = max(cosine_similarity(np.array([ent_vec]), np.array([icd_emb]))[0][0]
                    for ent_vec in entity_vectors)
        icd_sim.append(score)
    icd_sim = np.array(icd_sim)
    top_icd_idx = np.argsort(icd_sim)[-50:][::-1]

    # --- CPT similarity using per-entity cosine similarity ---
    cpt_sim = []
    for cpt_emb in cpt_embeddings:
        score = max(cosine_similarity(np.array([ent_vec]), np.array([cpt_emb]))[0][0]
                    for ent_vec in entity_vectors)
        cpt_sim.append(score)
    cpt_sim = np.array(cpt_sim)
    top_cpt_idx = np.argsort(cpt_sim)[-50:][::-1]

    # --- Optional: print top codes like before ---
    print("\nTop ICD matches (retrieved):")
    print(icd_df_clean.iloc[top_icd_idx][["code", "description"]].head(30))

    print("\nTop CPT matches (retrieved):")
    print(cpt_df_clean.iloc[top_cpt_idx][["code", "description"]].head(30))

    # --- Step 2: Generate justifications ---
    print("Step 2: Generating justifications (this might take time)...")
    just_start = time.time()
    justifications = generate_one_line_justifications_batched( # <--- RENAMED
    summary_text, icd_df_clean, cpt_df_clean, top_icd_idx, top_cpt_idx,N_icd=15,N_cpt=15 
)
    print(f"Justifications generated in {time.time() - just_start:.2f}s")

    # --- Step 3: Compute similarity JSON ---
    print("Step 3: Computing similarity JSON (embedding comparison)...")
    sim_start = time.time()
    similarity_json = compute_similarity_json(summary_text, justifications, model, tokenizer)
    print(f"Similarity computed in {time.time() - sim_start:.2f}s")

    # --- Step 4: Compute pseudo-NDCG ---
    print("Step 4: Computing NDCG scores...")
    ndcg_start = time.time()
    ndcg_scores = compute_pseudo_ndcg_fixed(similarity_json, top_n=10)
    print(f"NDCG computed in {time.time() - ndcg_start:.2f}s")

    ndcg_icd_list.append(ndcg_scores["ndcg_icd"])
    ndcg_cpt_list.append(ndcg_scores["ndcg_cpt"])
    overall_ndcg_list.append(ndcg_scores["overall_ndcg"])

    examples.append({
        "summary": summary_text,
        "top_icd": list(similarity_json["ICD"].items())[:10],
        "top_cpt": list(similarity_json["CPT"].items())[:10],
        "ndcg": ndcg_scores
    })

    total_elapsed = time.time() - start_time
    print(f"✅ Completed summary {idx + 1}/{len(first_5_summaries)} in {total_elapsed:.2f}s")
    print(f"NDCG: {ndcg_scores}")

# --- Step 5: Aggregate results ---
results = {
    "mean_ndcg_icd": float(np.mean(ndcg_icd_list)),
    "std_ndcg_icd": float(np.std(ndcg_icd_list)),
    "mean_ndcg_cpt": float(np.mean(ndcg_cpt_list)),
    "std_overall_ndcg": float(np.std(overall_ndcg_list)),
    "mean_overall_ndcg": float(np.mean(overall_ndcg_list)),
    "std_ndcg_cpt": float(np.std(ndcg_cpt_list)),
    "examples": examples
}

print("\n=== Final Results ===")
print(json.dumps(results, indent=2))


Starting evaluation for 5 summaries...


Summaries:   0%|                                                                                 | 0/5 [00:00<?, ?it/s]


=== Processing Summary 1/5 ===
Summary snippet: 58-year-old male with acute chest pain diagnosed with inferior wall myocardial infarction. Underwent...
Step 1: Extracting entities and computing similarity embeddings...
Entities: ['male', 'acute chest pain', 'diagnosed', 'inferior wall myocardial infarction', 'coronary angioplasty', 'stent placement', 'stabilized', 'dual antiplatelet therapy']

Top ICD matches (retrieved):
          code                                        description
9152      I221  Subsequent ST elevation (STEMI) myocardial inf...
9151      I220  Subsequent ST elevation (STEMI) myocardial inf...
9142     I2119  ST elevation (STEMI) myocardial infarction inv...
9140     I2109  ST elevation (STEMI) myocardial infarction inv...
24053    R0789                                   Other chest pain
9139     I2102  ST elevation (STEMI) myocardial infarction inv...
9141     I2111  ST elevation (STEMI) myocardial infarction inv...
64828  T82855S         Stenosis of coronary a



Batch generation complete in 5932.75s
Justifications generated in 5932.76s
Step 3: Computing similarity JSON (embedding comparison)...


Summaries:  20%|█████████████▍                                                     | 1/5 [2:11:37<8:46:28, 7897.18s/it]

Similarity computed in 1798.67s
Step 4: Computing NDCG scores...
NDCG computed in 0.00s
✅ Completed summary 1/5 in 7897.18s
NDCG: {'ndcg_icd': 0.9000368282656539, 'ndcg_cpt': 0.928981499990445, 'overall_ndcg': 0.9145091641280494}

=== Processing Summary 2/5 ===
Summary snippet: 72-year-old female with type 2 diabetes and dysuria found to have E. coli urinary tract infection. T...
Step 1: Extracting entities and computing similarity embeddings...
Entities: ['female', 'type 2 diabetes', 'dysuria', 'E. coli', 'urinary tract infection', 'Treated with', 'intravenous ceftriaxone', 'discharged', 'symptom resolution']

Top ICD matches (retrieved):
         code                                        description
24250    R300                                            Dysuria
33       A042          Enteroinvasive Escherichia coli infection
22967    P393                   Neonatal urinary tract infection
32       A041         Enterotoxigenic Escherichia coli infection
34       A043       Enteroh



Batch generation complete in 4645.81s
Justifications generated in 4645.81s
Step 3: Computing similarity JSON (embedding comparison)...


Summaries:  40%|██████████████████████████▊                                        | 2/5 [3:52:26<5:40:29, 6809.95s/it]

Similarity computed in 1203.68s
Step 4: Computing NDCG scores...
NDCG computed in 0.00s
✅ Completed summary 2/5 in 6048.89s
NDCG: {'ndcg_icd': 0.8568982846198836, 'ndcg_cpt': 0.7450808555733915, 'overall_ndcg': 0.8009895700966376}

=== Processing Summary 3/5 ===
Summary snippet: 45-year-old male admitted for acute appendicitis underwent successful laparoscopic appendectomy with...
Step 1: Extracting entities and computing similarity embeddings...
Entities: ['male', 'admitted', 'acute appendicitis', 'laparoscopic appendectomy', 'complications']

Top ICD matches (retrieved):
          code                                        description
11222    K3580                     Unspecified acute appendicitis
11223    K3589                           Other acute appendicitis
11217     K353      Acute appendicitis with localized peritonitis
11208     K352    Acute appendicitis with generalized peritonitis
10560    J0511                Acute epiglottitis with obstruction
10559    J0510          



Batch generation complete in 3439.43s
Justifications generated in 3439.43s
Step 3: Computing similarity JSON (embedding comparison)...


Summaries:  60%|████████████████████████████████████████▏                          | 3/5 [5:12:17<3:16:16, 5888.40s/it]

Similarity computed in 1241.64s
Step 4: Computing NDCG scores...
NDCG computed in 0.00s
✅ Completed summary 3/5 in 4791.75s
NDCG: {'ndcg_icd': 0.9722501377276835, 'ndcg_cpt': 0.9083390679975236, 'overall_ndcg': 0.9402946028626036}

=== Processing Summary 4/5 ===
Summary snippet: 62-year-old female with progressive shortness of breath diagnosed with congestive heart failure exac...
Step 1: Extracting entities and computing similarity embeddings...
Entities: ['female', 'progressive', 'breath diagnosed', 'congestive heart failure', 'exacerbation', 'Echocardiogram', 'ejection fraction']

Top ICD matches (retrieved):
         code                                        description
9370    I5021          Acute systolic (congestive) heart failure
9374    I5031         Acute diastolic (congestive) heart failure
9371    I5022        Chronic systolic (congestive) heart failure
9375    I5032       Chronic diastolic (congestive) heart failure
9369    I5020    Unspecified systolic (congestive) hear



Batch generation complete in 3676.85s
Justifications generated in 3676.85s
Step 3: Computing similarity JSON (embedding comparison)...


Summaries:  80%|█████████████████████████████████████████████████████▌             | 4/5 [6:33:46<1:31:33, 5493.62s/it]

Similarity computed in 1067.27s
Step 4: Computing NDCG scores...
NDCG computed in 0.00s
✅ Completed summary 4/5 in 4888.43s
NDCG: {'ndcg_icd': 0.8737369899862643, 'ndcg_cpt': 0.9826511126401619, 'overall_ndcg': 0.9281940513132131}

=== Processing Summary 5/5 ===
Summary snippet: 29-year-old female with severe right lower quadrant pain diagnosed with ruptured ovarian cyst confir...
Step 1: Extracting entities and computing similarity embeddings...
Entities: ['female', 'right lower quadrant', 'diagnosed', 'ruptured', 'ovarian cyst', 'confirmed by', 'pelvic ultrasound']

Top ICD matches (retrieved):
         code                                        description
24074   R1031                          Right lower quadrant pain
24075   R1032                           Left lower quadrant pain
24069   R1011                          Right upper quadrant pain
24142   R1933            Right lower quadrant abdominal rigidity
24070   R1012                           Left upper quadrant pain
23486 



Batch generation complete in 4554.72s
Justifications generated in 4554.72s
Step 3: Computing similarity JSON (embedding comparison)...


Summaries: 100%|█████████████████████████████████████████████████████████████████████| 5/5 [8:21:28<00:00, 6017.62s/it]

Similarity computed in 1765.00s
Step 4: Computing NDCG scores...
NDCG computed in 0.00s
✅ Completed summary 5/5 in 6461.82s
NDCG: {'ndcg_icd': 0.9237742271305603, 'ndcg_cpt': 0.861601276095013, 'overall_ndcg': 0.8926877516127867}

=== Final Results ===
{
  "mean_ndcg_icd": 0.9053392935460091,
  "std_ndcg_icd": 0.04047285737252495,
  "mean_ndcg_cpt": 0.885330762459307,
  "std_overall_ndcg": 0.04975298941862032,
  "mean_overall_ndcg": 0.895335028002658,
  "std_ndcg_cpt": 0.08017374192925965,
  "examples": [
    {
      "summary": "58-year-old male with acute chest pain diagnosed with inferior wall myocardial infarction. Underwent coronary angioplasty with stent placement and was stabilized on dual antiplatelet therapy.",
      "top_icd": [
        [
          "I221",
          {
            "justification": "This patient had a subsequent inferior wall STEMI 10 days after the initial STEMI.",
            "similarity": 0.31494140625
          }
        ],
        [
          "I220",
      




In [None]:
|