#### using actual notes and questions from the eConsult_QA table and test LLM performance on  the icd 10 extract 
### check description table for the chief complaints


In [27]:
# Import the API and other necessary libraries
import sys
sys.path.append('..')  # Add parent directory to path to import the API

# Data manipulation and display
import pandas as pd
pd.set_option('display.max_columns', None)  # Show all columns in the output
from IPython.display import Image, display
import random
import numpy as np
import matplotlib.pyplot as plt
import json
import os
from dotenv import load_dotenv

# Google BigQuery related imports
from google.cloud import bigquery
from google.cloud.bigquery import dbapi
%load_ext google.cloud.bigquery

# Local API imports
from api.bigquery_api import BigQueryAPI
from importlib import reload
from api import bigquery_api
reload(bigquery_api)

# Initialize BigQuery client
client = bigquery.Client("som-nero-phi-jonc101")

# import LLM API: use langgraph as of now
# from groq import Groq
from typing import TypedDict, Annotated, Sequence
from langgraph.graph import StateGraph, END
from langchain_core.messages import HumanMessage, AIMessage
from langchain_groq import ChatGroq
from langchain_openai import ChatOpenAI
from langchain_core.runnables import RunnableLambda
import operator
import json
import re
load_dotenv()





The google.cloud.bigquery extension is already loaded. To reload it, use:
  %reload_ext google.cloud.bigquery




True

# Query

In [28]:
%%bigquery --use_rest_api eConsult_QA
select * from som-nero-phi-jonc101.Digital_Medical_Con.eConsult_QA limit 100

Query is running:   0%|          |

Downloading:   0%|          |

In [54]:
eConsult_QA.iloc[[random.randint(0, len(eConsult_QA))]]

Unnamed: 0,anon_id,jittered_note_date,question,answer,prov_type,dept_specialty,dept_name
2,JC701846,2024-04-11 17:30:00,My Clinical Question (condition: growth/lesion...,- based on chart review only Recommendatio...,PHYSICIAN,DERMATOLOGY,DERMATOLOGY HOOVER


In [30]:
%%bigquery --use_rest_api top_200_icd10_codes
select distinct icd10, dx_name, dm.specialty, count(icd10) as count from som-nero-phi-jonc101.shc_core_2024.diagnosis as dx
JOIN `som-nero-phi-jonc101.shc_core_2024.dep_map` dm
  ON dx.dept_id = dm.department_id
where dm.specialty IN ('Infectious Diseases', 'Endocrinology', 'Hematology')
group by icd10,dx_name,dm.specialty
order by count desc
limit 1000

Query is running:   0%|          |

Downloading:   0%|          |

In [31]:
top_200_icd10_codes

Unnamed: 0,icd10,dx_name,specialty,count
0,M81.0,Age-related osteoporosis without current patho...,Endocrinology,133009
1,Z79.4,Long term (current) use of insulin,Endocrinology,90259
2,E11.65,Type 2 diabetes mellitus with hyperglycemia,Endocrinology,88002
3,E03.9,"Hypothyroidism, unspecified type",Endocrinology,85715
4,E03.9,"Hypothyroidism, unspecified",Endocrinology,80150
...,...,...,...,...
995,E88.09,"Other disorders of plasma-protein metabolism, ...",Hematology,808
996,D35.02,Benign neoplasm of left adrenal gland,Endocrinology,807
997,R74.01,Transaminitis,Infectious Diseases,806
998,E66.9,"Obesity, unspecified",Infectious Diseases,806


In [32]:
top_200_icd10_codes_cleaned = top_200_icd10_codes[['icd10']].drop_duplicates()[:400]

# Logging Setup

In [33]:
import logging
import datetime

# Set up logging
logging.basicConfig(
    level=logging.INFO,
    format='%(asctime)s - %(levelname)s - %(message)s',
    handlers=[
        logging.FileHandler(f'clinical_workflow_{datetime.datetime.now().strftime("%Y%m%d_%H%M%S")}.log'),
        logging.StreamHandler()
    ]
)


In [34]:
def log_stage(stage_name: str, input_data: dict, output_data: dict):
    """Log the input and output of each stage."""
    # Create copies of the data to avoid modifying the original
    input_copy = input_data.copy()
    output_copy = output_data.copy()
    
    # Convert DataFrame to string representation if present
    if 'icd10_codes' in input_copy and isinstance(input_copy['icd10_codes'], pd.DataFrame):
        input_copy['icd10_codes'] = input_copy['icd10_codes'].to_string()
    if 'icd10_codes' in output_copy and isinstance(output_copy['icd10_codes'], pd.DataFrame):
        output_copy['icd10_codes'] = output_copy['icd10_codes'].to_string()
    
    logging.info(f"\n{'='*50}")
    logging.info(f"Stage: {stage_name}")
    logging.info(f"Input: {json.dumps(input_copy, indent=2)}")
    logging.info(f"Output: {json.dumps(output_copy, indent=2)}")
    logging.info(f"{'='*50}\n")

# Build Langgraph to parse patient info

In [35]:
# api_key = os.getenv("GROQ_API_KEY")
api_key = os.getenv("OPENAI_API_KEY")

In [36]:
def clean_output(output):
    """
    Clean up the output by removing content wrapped in <think> tags and extracting only the actual response.
    """
    # If the output is already a DataFrame, return it directly
    if isinstance(output, pd.DataFrame):
        return output
    
    # Remove all content between <think> tags
    cleaned_output = re.sub(r'<think>.*?</think>', '', output, flags=re.DOTALL)
    
    # Remove any leading/trailing whitespace
    cleaned_output = cleaned_output.strip()
    
    return cleaned_output

In [37]:
# Define the state type
class ClinicalState(TypedDict):
    clinical_question: str
    clinical_notes: str
    icd10_codes: pd.DataFrame
    patient_age: int | None
    patient_gender: str | None
    icd10_code: str | None
    rationale: str | None
    error: str | None

In [38]:
def extract_patient_info(state: dict) -> dict:
    """Extract patient age and gender from clinical notes."""
    input_state = state.copy()
    
    # llm = ChatGroq(
    #     model_name="Deepseek-R1-Distill-Llama-70b",
    #     temperature=0.3,
    #     api_key=api_key
    # )
    llm = ChatOpenAI(model="gpt-4", temperature=0.3)
    prompt = f"""
    Extract the patient's age and gender from the following clinical notes.
    Return ONLY a JSON object with 'age' and 'gender' fields.
    DO NOT include any other text, thinking process, or explanation.
    The response should start with {{ and end with }}.
    
    Example of expected format:
    {{"age": 55, "gender": "male"}}
    
    Clinical Notes: {state.get('clinical_notes')}
    """
    logging.info(f"LLM Prompt for extract_patient_info:\n{prompt}")
    
    
    response = llm.invoke([HumanMessage(content=prompt)])
    logging.info(f"LLM Response for extract_patient_info:\n{response.content}")

    try:
        # Clean the response to handle markdown code block
        content = clean_output(response.content)        
        info = json.loads(content)
        # print("Parsed JSON:", info)  # Debug print
        state['patient_age'] = info['age']
        state['patient_gender'] = info['gender']
    except Exception as e:
        # print("Error occurred:", str(e))  # Debug print
        state['error'] = f"Failed to extract patient information: {str(e)}"
    
    log_stage("extract_patient_info", input_state, state)
    return state


In [39]:
def match_icd10_code(state: dict) -> dict:
    """Match clinical information to ICD-10 code."""

    # if state.get('error'):
    #     return state
    
    state['error'] = None
    
    input_state = state.copy()
        
    # llm = ChatGroq(
    #     model="Deepseek-R1-Distill-Llama-70b",
    #     api_key= api_key
    # )
    llm = ChatOpenAI(model="gpt-4", temperature=0.3)
    
    prompt = f"""
    Match the clinical information to the most appropriate ICD-10 code from the provided list.
    Return ONLY a JSON object with exactly two fields: 'icd10_code' and 'rationale'.
    DO NOT include any other text, thinking process, or explanation.
    The response should start with {{ and end with }}.

    Example of expected format:
    {{"icd10_code": "xxx", "rationale": "xxxxx"}}

    
    Clinical Question: {state.get('clinical_question')}
    Clinical Notes: {state.get('clinical_notes')}
    Patient Age: {state.get('patient_age')}
    Patient Gender: {state.get('patient_gender')}
    
    Available ICD-10 Codes:
    {state.get('icd10_codes')}
    """
    logging.info(f"LLM Prompt for match_icd10_code:\n{prompt}")
    response = llm.invoke([HumanMessage(content=prompt)])
    logging.info(f"LLM Response for match_icd10_code:\n{response.content}")
    try:
        output = clean_output(response.content)
        match = json.loads(output)
        # print("now is printing match")
        # print(match)
        state['icd10_code'] = match['icd10_code']
        state['rationale'] = match['rationale']
        # print("now is printing state")
        # print(state)
    except:
        state['error'] = "Failed to match ICD-10 code"
    log_stage("match_icd10_code", input_state, state)
    return state

In [40]:
def validate_icd10_code_exists(state: dict) -> dict:
    """Validate if the ICD-10 code exists in the provided list."""
    input_state = state.copy()
    # Check if the code is in the provided list
    valid_codes = state['icd10_codes']['icd10'].tolist()
    if state.get('icd10_code') not in valid_codes:
        logging.warning(f"Invalid code {state.get('icd10_code')}, will rerun matching...")
        print(f"Invalid code {state.get('icd10_code')}, will rerun matching...")
        state['error'] = f"Invalid code {state.get('icd10_code')}, not in provided list"
        state['icd10_code'] = None
        state['rationale'] = None
        # return state
    else:
        # Clear any previous errors if validation passes
        state['error'] = None
    log_stage("validate_icd10_code_exists", input_state, state)
    return state

In [41]:
def validate_icd10_clinical_match(state: dict) -> dict:
    """Validate if the matched ICD-10 code is clinically appropriate."""
    input_state = state.copy()
    # llm = ChatGroq(
    #     model_name="Deepseek-R1-Distill-Llama-70b",
    #     temperature=0.3,
    #     api_key= api_key
    # )
    llm = ChatOpenAI(model="gpt-4", temperature=0.3)
    
    prompt = f"""
    Validate if the matched ICD-10 code is appropriate for the clinical case.
    Return ONLY a JSON object with exactly two fields: 'is_valid' (boolean) and 'reason' (string).
    DO NOT include any other text, thinking process, or explanation.

    Example of expected format:
    {{"is_valid": true, "reason": "The code I10 matches the patient's hypertension diagnosis"}}
    or
    {{"is_valid": false, "reason": "The code I10 is too general for this specific case"}}

    Current Match:
    ICD-10 Code: {state.get('icd10_code')}
    Rationale: {state.get('rationale')}

    Clinical Question: {state.get('clinical_question')}
    Clinical Notes: {state.get('clinical_notes')}
    Patient Age: {state.get('patient_age')}
    Patient Gender: {state.get('patient_gender')}
    
    Available ICD-10 Codes:
    {state['icd10_codes'].to_string()}
    """
    logging.info(f"LLM Prompt for validate_icd10_clinical_match:\n{prompt}")
    
    response = llm.invoke([HumanMessage(content=prompt)])
    logging.info(f"LLM Response for validate_icd10_clinical_match:\n{response.content}")
    try:
        output = clean_output(response.content)
        validation = json.loads(output)
        logging.info(f"Validation result: {validation}")
        
        if not validation['is_valid']:
            print("Invalid match, will rerun matching...")
            state['error'] = f"Invalid match: {validation['reason']}"
            state['icd10_code'] = None
            state['rationale'] = None
            return state
        else:
             # Clear any previous errors if validation passes
            state['error'] = None
    except Exception as e:
        logging.error(f"Validation error: {str(e)}")
        state['error'] = f"Failed to validate ICD-10 code: {str(e)}"
        return state
    
   
    log_stage("validate_icd10_clinical_match", input_state, state)
    return state

In [42]:
def create_clinical_graph() -> StateGraph:
    workflow = StateGraph(dict)
    
    # Add nodes
    workflow.add_node("extract_patient_info", RunnableLambda(extract_patient_info))
    workflow.add_node("match_icd10_code", RunnableLambda(match_icd10_code))
    workflow.add_node("validate_icd10_code_exists", RunnableLambda(validate_icd10_code_exists))
    workflow.add_node("validate_icd10_clinical_match", RunnableLambda(validate_icd10_clinical_match))
    
    # Add basic edges
    workflow.add_edge("extract_patient_info", "match_icd10_code")
    workflow.add_edge("match_icd10_code", "validate_icd10_code_exists")
    
    # Define conditional edges
    workflow.add_conditional_edges(
        "validate_icd10_code_exists",
        lambda x: "match_icd10_code" if x.get("error") else "validate_icd10_clinical_match",
        {
            "match_icd10_code": "match_icd10_code",
            "validate_icd10_clinical_match": "validate_icd10_clinical_match"
        }
    )
    
    # ensure it goes back to match_icd10_code when validation fails
    workflow.add_conditional_edges(
        "validate_icd10_clinical_match",
        lambda x: "match_icd10_code" if x.get("error") else END,  # When error, go back to matching
        {
            "match_icd10_code": "match_icd10_code",  # Map the return value to the actual node
            END: END
        }
    )
    
    # Set entry point
    workflow.set_entry_point("extract_patient_info")
    
    return workflow.compile()

In [43]:
# Example usage
def process_clinical_case(clinical_question: str, clinical_notes: str, icd10_codes_df: pd.DataFrame) -> dict:
    # Create the graph
    """Process a clinical case through the workflow."""
    logging.info(f"\n{'='*50}")
    logging.info("Starting new clinical case processing")
    logging.info(f"Clinical Question: {clinical_question}")
    logging.info(f"Clinical Notes: {clinical_notes}")
    logging.info(f"{'='*50}\n")
    graph = create_clinical_graph()
    
    # Initialize state
    initial_state = {
        "clinical_question": clinical_question,
        "clinical_notes": clinical_notes,
        "icd10_codes": icd10_codes_df,
        "patient_age": None,
        "patient_gender": None,
        "icd10_code": None,
        "rationale": None,
        "error": None
    }
    # print("Initial state:", initial_state)  # Debug print
    
    # Run the graph
    config = {"recursion_limit": 100}  # Increase from default 25 to 100
    result = graph.invoke(initial_state, config=config)
    result = graph.invoke(initial_state)
    clean_result = {
        "patient_age": result.get("patient_age"),
        "patient_gender": result.get("patient_gender"),
        "icd10_code": result.get("icd10_code"),
        "rationale": result.get("rationale"),
        "error": result.get("error")
    }
    # Log final result
    logging.info(f"\n{'='*50}")
    logging.info("Final Result:")
    logging.info(json.dumps(clean_result, indent=2))
    logging.info(f"{'='*50}\n")
    
    # Return the result
    return clean_result


In [44]:
# clinical_question ="Could this patient's chronic upper abdominal discomfort and iron deficiency anemia indicate a peptic ulcer or upper GI malignancy, and is EGD indicated?"

# clinical_notes = "47-year-old male with no significant past medical history presents with 3-month history of epigastric discomfort, early satiety, and unintentional 10 lb weight loss. Denies NSAID use, alcohol, or overt GI bleeding. Labs show iron deficiency anemia (Hgb 10.5, MCV 74, ferritin 12). Physical exam unremarkable. Concern for peptic ulcer disease or less likely gastric cancer. Seeking input on need for upper endoscopy."
# Process the case
clinical_question = "In a patient with persistent fever, night sweats, and weight loss despite broad-spectrum antibiotics, could this represent disseminated mycobacterial infection or an atypical fungal process, and what diagnostic workup is indicated?"

clinical_notes = (
    "52‑year‑old female with rheumatoid arthritis on methotrexate and low‑dose prednisone presents with "
    "6‑week history of daily fevers up to 102°F, drenching night sweats, and a 12‑lb unintentional weight loss. "
    "Initial blood cultures and chest X‑ray were unrevealing. She denies cough, dyspnea, or focal pain. "
    "Lab results show mild anemia (Hgb 11.2), elevated ESR (85 mm/hr), and CRP (12 mg/dL). "
    "CT chest/abdomen reveals multiple small pulmonary nodules and hepatosplenic lesions. "
    "Concern for disseminated non‑tuberculous mycobacteria vs. histoplasmosis. Input on biopsy site selection and empiric therapy is requested."
)
result = process_clinical_case(clinical_question, clinical_notes, top_200_icd10_codes_cleaned)
print(json.dumps(result, indent=2))

2025-05-12 01:47:36,386 - INFO - 
2025-05-12 01:47:36,387 - INFO - Starting new clinical case processing
2025-05-12 01:47:36,388 - INFO - Clinical Question: In a patient with persistent fever, night sweats, and weight loss despite broad-spectrum antibiotics, could this represent disseminated mycobacterial infection or an atypical fungal process, and what diagnostic workup is indicated?
2025-05-12 01:47:36,388 - INFO - Clinical Notes: 52‑year‑old female with rheumatoid arthritis on methotrexate and low‑dose prednisone presents with 6‑week history of daily fevers up to 102°F, drenching night sweats, and a 12‑lb unintentional weight loss. Initial blood cultures and chest X‑ray were unrevealing. She denies cough, dyspnea, or focal pain. Lab results show mild anemia (Hgb 11.2), elevated ESR (85 mm/hr), and CRP (12 mg/dL). CT chest/abdomen reveals multiple small pulmonary nodules and hepatosplenic lesions. Concern for disseminated non‑tuberculous mycobacteria vs. histoplasmosis. Input on bio

{
  "patient_age": 52,
  "patient_gender": "female",
  "icd10_code": "Z79.4",
  "rationale": "The patient is on long-term (current) use of methotrexate and low-dose prednisone, which is represented by the ICD-10 code Z79.4.",
  "error": null
}


In [45]:
# # Example usage
# clinical_question = "Could this patient's recurrent, exertional chest pain with recent ECG abnormalities suggest underlying ischemic heart disease, and would further cardiac workup (e.g., stress testing or angiography) be appropriate at this time?"

# clinical_notes = """55-year-old male with a history of hypertension and hyperlipidemia presents with 2-month history of intermittent chest discomfort described as a pressure-like sensation localized to the left chest, occasionally radiating to the jaw, occurring primarily during brisk walking or stair climbing. Denies associated nausea, diaphoresis, or syncope. Symptoms improve with rest. No prior cardiac history. Vital signs stable. Physical exam unremarkable. Recent resting ECG showed nonspecific ST changes. Lipid panel elevated; LDL 145 mg/dL. Concerned about possible stable angina. Requesting input on next steps for diagnostic evaluation and whether referral to cardiology is appropriate."""

# # print("Clinical notes before processing:", clinical_notes)  # Debug print

# # Process the case
# result = process_clinical_case(clinical_question, clinical_notes, top_200_icd10_codes_cleaned)
# print(json.dumps(result, indent=2))

In [46]:
# Initialize the API
api = BigQueryAPI()

2025-05-12 01:47:52,472 - INFO - Successfully initialized BigQuery client for project som-nero-phi-jonc101


In [47]:
result

{'patient_age': 52,
 'patient_gender': 'female',
 'icd10_code': 'Z79.4',
 'rationale': 'The patient is on long-term (current) use of methotrexate and low-dose prednisone, which is represented by the ICD-10 code Z79.4.',
 'error': None}

NameError: name 'llm' is not defined

In [48]:
api.get_orders(
    params=result,
    result_type='proc',
    limit=100
)


2025-05-12 01:47:52,510 - INFO - Building query for params={'patient_age': 52, 'patient_gender': 'female', 'icd10_code': 'Z79.4', 'rationale': 'The patient is on long-term (current) use of methotrexate and low-dose prednisone, which is represented by the ICD-10 code Z79.4.', 'error': None}, type=proc, year=2024
2025-05-12 01:47:52,531 - INFO - Executing BigQuery query...
2025-05-12 01:47:55,968 - INFO - Query completed successfully. Returned 75 rows.


Unnamed: 0,itemId,description,order_procdepartment,encounter_department,patientRate,encounterRate,nPatientscohortItem,nEncounterscohortItem,nPatientsCohortTotal,nEncountersCohortTotal
0,LABUALB,"ALBUMIN WITH CREATININE, URINE (RANDOM)",Endocrinology,Endocrinology,27.11,10.63,810,1990,2988,18715
1,LABA1C,HEMOGLOBIN A1C,Endocrinology,Endocrinology,26.77,16.12,800,3017,2988,18715
2,LABMETB,"METABOLIC PANEL, BASIC",Endocrinology,Endocrinology,22.49,10.12,672,1894,2988,18715
3,LABMETC,"METABOLIC PANEL, COMPREHENSIVE",Endocrinology,Endocrinology,19.44,6.66,581,1247,2988,18715
4,LABLPDC,LIPID PANEL WITH CALCULATED LDL,Endocrinology,Endocrinology,15.76,5.75,471,1076,2988,18715
...,...,...,...,...,...,...,...,...,...,...
70,LABGLNF,"GLUCOSE NON-FASTING, SERUM/PLASMA",Endocrinology,Endocrinology,0.37,0.06,11,11,2988,18715
71,LABANTITG,THYROGLOBULIN AB ULTRA-SENSITIVE,Endocrinology,Endocrinology,0.37,0.06,11,12,2988,18715
72,EXTK,POTASSIUM (MANUAL ENTRY),Endocrinology,Endocrinology,0.37,0.06,11,11,2988,18715
73,LABT3,"T3, TOTAL",Endocrinology,Endocrinology,0.37,0.07,11,14,2988,18715


In [55]:
api.get_orders(
    params=result,
    result_type='med',
    limit=10
)

2025-05-12 11:31:22,483 - INFO - Building query for params={'patient_age': 52, 'patient_gender': 'female', 'icd10_code': 'Z79.4', 'rationale': 'The patient is on long-term (current) use of methotrexate and low-dose prednisone, which is represented by the ICD-10 code Z79.4.', 'error': None}, type=med, year=2024
2025-05-12 11:31:22,512 - INFO - Executing BigQuery query...
2025-05-12 11:31:30,606 - INFO - Query completed successfully. Returned 10 rows.


Unnamed: 0,itemId,description,department,patientRate,encounterRate,nPatientscohortItem,nEncounterscohortItem,nPatientsCohortTotal,nEncountersCohortTotal
0,28995,METFORMIN 500 MG PO TB24,Endocrinology,5.79,1.46,173,274,2988,18715
1,201231,LANTUS SOLOSTAR U-100 INSULIN 100 UNIT/ML (3 M...,Endocrinology,5.66,1.38,169,258,2988,18715
2,233242,TRULICITY 1.5 MG/0.5 ML SC PNIJ,Endocrinology,4.45,0.95,133,177,2988,18715
3,203639,HUMALOG KWIKPEN INSULIN 100 UNIT/ML SC INPN,Endocrinology,4.45,1.13,133,212,2988,18715
4,24398,"METFORMIN 1,000 MG PO TABS",Endocrinology,4.02,1.2,120,224,2988,18715
5,233241,TRULICITY 0.75 MG/0.5 ML SC PNIJ,Endocrinology,4.02,0.75,120,140,2988,18715
6,232958,JARDIANCE 10 MG PO TABS,Endocrinology,4.02,0.84,120,157,2988,18715
7,232959,JARDIANCE 25 MG PO TABS,Endocrinology,3.25,0.82,97,154,2988,18715
8,239231,BASAGLAR KWIKPEN U-100 INSULIN 100 UNIT/ML (3 ...,Endocrinology,2.91,0.99,87,186,2988,18715
9,247917,TRULICITY 3 MG/0.5 ML SC PNIJ,Endocrinology,2.74,0.61,82,115,2988,18715


In [50]:
# def find_closest_icd10_code(description, icd10_codes_df):
#     """Find the closest matching ICD-10 code based on description similarity."""
#     from sklearn.feature_extraction.text import TfidfVectorizer
#     from sklearn.metrics.pairwise import cosine_similarity
#     import numpy as np
    
#     # Create TF-IDF vectors
#     vectorizer = TfidfVectorizer()
#     descriptions = icd10_codes_df['description'].tolist()
#     tfidf_matrix = vectorizer.fit_transform(descriptions + [description])
    
#     # Calculate similarity
#     similarity_scores = cosine_similarity(tfidf_matrix[-1:], tfidf_matrix[:-1])[0]
#     best_match_idx = np.argmax(similarity_scores)
    
#     return icd10_codes_df.iloc[best_match_idx]['icd10_code']

# def parse_clinical_info(clinical_question, clinical_notes, icd10_codes_df, max_retries=3):
#     # ... (previous code) ...
    
#     try:
#         result = json.loads(response)
        
#         # Validate the ICD-10 code
#         if validate_icd10_code(result['icd10_code'], icd10_codes_df):
#             return result
#         else:
#             print(f"Attempt {attempt + 1}: Invalid ICD-10 code {result['icd10_code']}. Finding closest match...")
#             # Find the closest matching code
#             closest_code = find_closest_icd10_code(result['rationale'], icd10_codes_df)
#             result['icd10_code'] = closest_code
#             result['rationale'] += f"\nNote: Original code {result['icd10_code']} was not in the list. Using closest match {closest_code} instead."
#             return result
                
#     except json.JSONDecodeError:
#         print(f"Attempt {attempt + 1}: Failed to parse JSON response. Retrying...")
#         continue

In [56]:
pwd

'/Users/wenyuanchen/Desktop/Stanford/HealthRex/CDSS_aim4/scripts/antibiotic-susceptibility/sql/queries/aim_4/AIM4/Recommender_API/Notebook'