In [1]:
import anthropic
import os
from dotenv import load_dotenv
import openai

load_dotenv(".env")

False

In [2]:
def get_step_1_text(topic):
    return f"""
    You are an expert medical doctor with a specialty in {topic}. You have specifically been the final arbiter for managing prior authorizations for decades.
    I have a medical guideline from an insurance provider, written in markdown. The guidelines lay out all of the criteria by which a procedure or often multiple procedures might be considered "medically necessary". There are usually multiple paths that will yield this result.
    You will be hiring a team of medical interns to help you perform more prior authorizations accurately. Your task is to make these medical guidelines as simple and easy to follow for your new team as possible.
    To do this, you'll want to:
    1. Read the guideline all the way through, carefully
    2. Read the rationale for context, but do not create any questions based on it.
    3. Break the guidelines out into sub guidelines based on the procedure being requested.
    4. For each sub guideline, identify the questions that will comprise the checklists.
    5. Guidelines sometimes highlight when a procedure is particularly useful. These are NOT requirements.Do not include questions in these cases.
    6. Formulate a series of checklists, at least 1 for each procedure
    7. Make sure that the questions in the checklists are broken down into their most fundamental parts. For example: 'Is this a follow-up of an equivocal post-treatment PET scan, no sooner than 4 weeks after the study?' should be split into 'Is this a follow up of an equivocal post-treatment PET scan?' and 'Was the previous scan conducted at least 4 weeks ago?'.
    8. Because these are checklists, you'll want to phrase your questions such that the desired answer is "Yes" or "True".
    9. Label the questions with an alpha ID, starting with A
    If your medical interns can answer "Yes" or "True" to all of the questions in any of your checklists, it means that the procedure is medically necessary, and they should approve it.
    """

def get_step_3_text():
    return """
  Now your job will be to create a logicial representation of the checklists. But there are a few rules you must follow:
  Rule 1: Reference the checklist items by their alpha ids in the logic statement.
  Rule 2: Ensure that all questions are broken down into their most fundamental parts.
  Rule 3: Avoid duplicating logic where possible.
  Rule 4: Only use the logical operators AND or OR
  Rule 5: Organize the logical statement using parentheses.
  Rule 6: DO NOT use the NOT opperator (or ~). If there is the need for negating a checklist item, an additional item must be added to the list that phrases the question such that the answer is affirmative.
  Rule 7: Return only the logic statement with no additional text or explanation. For example: "(A AND (B OR C OR D) and E)" is a valid response.
  Rule 8: The logic statement must be all one line and all connected by logical opperators.
  Rule 9: Your full response should be a json. The first key should be "logic_statement" with your logic statement as the value. The second key should be "questions". This value should be a dictionary where they keys are the alpha ids and they values are the questions. 
  """

In [3]:
client = anthropic.Anthropic(
    api_key=os.getenv("CLAUDE_KEY")
)

In [4]:
import json
import hashlib
import diskcache as dc

cache = dc.Cache("data")

def hash_parameters(*args, **kwargs):
    """Create a hash of the function parameters."""
    kwargs.pop("response_format", None)
    params = json.dumps((args, kwargs))
    return hashlib.md5(params.encode()).hexdigest()

def cached(func):
    def wrapper(*args, **kwargs):
        cache_kwargs = kwargs.copy()
        if "client" in cache_kwargs:
            cache_kwargs.pop("client")
        key = f"{func.__name__}:{hash_parameters(*args, **cache_kwargs)}"
        if key in cache:
            return cache[key]
        result = func(*args, **kwargs)
        cache[key] = result
        return result

    return wrapper

In [5]:
@cached
def get_claude_response(messages):
    response = client.messages.create(
        model="claude-3-5-sonnet-20240620",
        max_tokens=1024,
        messages=messages,
        temperature=0.0,
        top_p=0.95,
    )
    return response

In [6]:
import backoff
import requests


# Implement backoff logic that will retry the request
@backoff.on_predicate(
    backoff.runtime,  # Exponential backoff
    predicate=lambda r: r.status_code == 429,
    value=lambda r: int(r.headers.get("Retry-After")),
)
@cached
def get_gpt_response_api(prompt, model_name, endpoint, key, temperature):
    headers = {
        # "authorization": f"Bearer {key}",
        "api-key": key,
        "Content-Type": "application/json"
    }

    modified_schema = {
        "type": "object",
        "required": ["results"],
        "additionalProperties": False,
        "properties": {
            "results": {
                "type": "object",
                    "required": ["summary", "discussion", "answers"],
                    "additionalProperties": False,
                    "properties": {
                        "summary": {
                            "type": "string",
                            "description": "Summary of the patient information focusing on aspects that are relevant to the question(s)"
                        },
                        "discussion": {
                            "type": "string",
                            "description": "How does the provided information help or not help to answer the questions?"
                        },
                        "answers": {
                            "type": "array",
                            "items": {
                                "type": "object",
                                "required": ["answer", "supporting evidence"],
                                "additionalProperties": False,
                                "properties": {
                                    "answer": {
                                        "type": "string",
                                        "description": "The correct answer choice among the multiple choices"
                                    },
                                    "supporting evidence": {
                                        "type": "string",
                                        "description": "A direct quote from the patient information that supports the answer if one exists."
                                    }
                                }
                            }
                        }
                    }
                }
            }
        }

    # The data being sent in the request, including the schema and prompt
    data = {
        "model": model_name,
        "messages": prompt,
        "temperature": temperature,
        # "logprobs": True,
        # "top_logprobs": 5,
        # "response_format": {
        #     "type": "json_schema",
        #     "json_schema": {
        #         "name": "QuestionAnswers",
        #         "strict": True,
        #         "schema": modified_schema
        #     }
        # }
    }

    # Send the request
    print(f"ENDPOINT: {endpoint}")
    response = requests.post(endpoint, headers=headers, json=data)
    return response

@cached
def get_gpt_response(messages, model_name, endpoint, key):
    try:
        return openai.ChatCompletion.create(
            api_key=key,
            api_base=endpoint,
            api_version=os.getenv("AZURE_OPENAI_API_VERSION"),
            api_type="azure",
            engine=model_name,
            messages=messages,
            temperature=0.0,
            top_p=0.95,
            frequency_penalty=0,
            presence_penalty=0,
            stop=None,
        )
    except openai.InvalidRequestError:
        return {"choices": [{"message": {"content": "Question 1: The request was deemed invalid and likely violated the content filters."}}]}


In [7]:

url = "https://guidelines.carelonmedicalbenefitsmanagement.com/imaging-of-the-chest-2024-04-14/"
markdown = "#Other thoracic mass lesions Advanced imaging is considered medically necessary for diagnosis and management of **ANY** of the following findings or conditions: - Mediastinal mass (see separate indication for lymphadenopathy) - Pancoast tumor - Pleural mass - Thymoma - Benign tumors (pediatric only) **IMAGING STUDY** \*\*_ADULT_ \*\* - CT chest - MRI chest for evaluation of mediastinal and hilar masses when CT is insufficient for problem solving or for evaluation of chest wall extension in Pancoast tumor \*\*_PEDIATRIC_ \*\* - CT or MRI chest"
topic = "Other thoracic mass lesions"
def get_mermaid_from_gpt(topic, markdown):
    message = [{"role": "system", "content": get_step_1_text(topic)},
            {"role": "user", "content": markdown}]
    response = get_gpt_response(message, 
                                    model_name="gpt-4o-latest", 
                                    endpoint=os.getenv("AZURE_OPENAI_ENDPOINT_EAST2"), 
                                    key=os.getenv("AZURE_OPENAI_API_KEY_EAST2"), 
                                    # temperature=0.0
                                    )
    message.append({"role": "system",
                    "content": response["choices"][0]["message"]["content"]})
    message.append({"role": "user",
                    "content": get_step_3_text()})
    second_response = get_gpt_response(message,
                                    model_name="gpt-4o-latest",
                                    endpoint=os.getenv("AZURE_OPENAI_ENDPOINT_EAST2"),
                                    key=os.getenv("AZURE_OPENAI_API_KEY_EAST2"))
    return second_response["choices"][0]["message"]["content"]

def get_mermaid_from_claude(topic, markdown):
    message = [{"role": "user", "content": get_step_1_text(topic)},
            {"role": "user", "content": markdown}]
    response = get_claude_response(message)
    message.append({"role": "assistant",
                    "content": response.content[0].text})
    message.append({"role": "user",
                    "content": get_step_3_text()})
    second_response = get_claude_response(message)
    return second_response.content[0].text

import time
start_time = time.time()
get_mermaid_from_claude(topic, markdown)
print(f"Finished in {(time.time()-start_time):.4f} seconds")


Finished in 0.0003 seconds


  markdown = "#Other thoracic mass lesions Advanced imaging is considered medically necessary for diagnosis and management of **ANY** of the following findings or conditions: - Mediastinal mass (see separate indication for lymphadenopathy) - Pancoast tumor - Pleural mass - Thymoma - Benign tumors (pediatric only) **IMAGING STUDY** \*\*_ADULT_ \*\* - CT chest - MRI chest for evaluation of mediastinal and hilar masses when CT is insufficient for problem solving or for evaluation of chest wall extension in Pancoast tumor \*\*_PEDIATRIC_ \*\* - CT or MRI chest"


In [8]:
import scorcery
import scorcery.create_local_overrides

override_path = "overrides.db"
if not os.path.isfile(override_path):
    scorcery.create_local_overrides.migrate_postgres_to_sqlite(override_path)

In [17]:
import scorcery.final_db
from peewee import SqliteDatabase
import scorcery.final_db.guideline_repo
import scorcery.flows
import graph_tools


target_db = SqliteDatabase(override_path)
count = 0
match_count = 0
score_total = 0
target_title = "Penile, Vaginal, and Vulvar Cancers"
for target in scorcery.final_db.guideline_repo.GuidelineRepo.all(target_db):
    count += 1
    response = get_mermaid_from_claude(target.title, target.id)
    response = json.loads(response)
    # print(json.dumps(response, indent=4))
    # print("*"*100)
    try:
        # score = scorcery.flows.score_flows(target.flow, response)
        # print(f"{score:.4f} - {target.title}")
        print(target.title)
        g1 = graph_tools.graph.EMGraph.from_mermaid(target.flow)
        g2 = graph_tools.graph.EMGraph.from_logic(response["logic_statement"], response["questions"])
        g2.as_logic_graph()
        similarity = g1.is_similar_to(g2)
        score = (similarity["node overlap"] + similarity["outcome overlap"]) / 2.0
        print(f"SCORE: {score}")
        score_total += score
        match_count += 1
        if score >= 0.4 and score < 0.5:
            print("&"*100)
            print(target.flow)
            print("*"*50)
            print(g2.get_mermaid_text())
            print(response["logic_statement"])
            for key, value in response["questions"].items():
                print(f"\t{key} - {value}")
            print("&"*100)
    
    except Exception as e:
        print("&"*100)
        print(f"Error: {target.title} failed because: {e}")
        print("*"*50)
        print(target.flow)
        print("*"*50)
        print(response)
        print("&"*100)
    
    # break

print(count)
print(match_count)


Head and Neck Cancer
SCORE: 0.4499875
&&&&&&&&&&&&&&&&&&&&&&&&&&&&&&&&&&&&&&&&&&&&&&&&&&&&&&&&&&&&&&&&&&&&&&&&&&&&&&&&&&&&&&&&&&&&&&&&&&&&
graph TD
    A["Start"] --> B{"Is CT Primary Site and Neck Requested?"}
    A --> C{"Is CT Chest / CT Abdomen and Pelvis Requested?"}
    A --> D{"Is MRI Primary Site and Neck Requested?"}
    A --> E{"Is FDG-PET/CT Requested?"}

    B --> F{"Is there documented head and neck cancer?"}
    F --> G{"Is this for initial diagnostic workup?"}
    F --> H{"Is this to assess response to neoadjuvant treatment or after concurrent chemoradiotherapy?"}
    F --> I{"Is this for surveillance imaging?"}
    G --> J["Procedure Medically Necessary"]
    H --> J
    I --> J

    C --> K{"Is there documented head and neck cancer?"}
    K --> L{"Is this for initial diagnostic workup?"}
    K --> M{"Is this part of the management strategy?"}
    K --> N{"Is this for surveillance imaging?"}
    L --> J
    M --> J
    N --> J

    D --> O{"Is there documented head and 

In [13]:

print("CLAUDE")
print(score_total)
print(score_total / count)
print(score_total / match_count)

CLAUDE
28.86850231447986
0.6873452932019014
0.6873452932019014


In [14]:

print("gpt-4o-latest")
print(score_total)
print(score_total / count)
print(score_total / match_count)

gpt-4o-latest
28.86850231447986
0.6873452932019014
0.6873452932019014
