## Setup

In [None]:
import warnings
from typing import *
import os
from dotenv import load_dotenv
from langgraph.checkpoint.memory import MemorySaver
from langchain_openai import ChatOpenAI
from transformers import logging
import sys

ROOT = ... # path to the root of the project
sys.path.append(ROOT)


from medDerm.agent import *
from medDerm.tools import *
from medDerm.utils import *
from experiments.Ham10k.experiment_utils import *


import json
import os
import glob
import logging
from datetime import datetime
import tqdm
import base64

warnings.filterwarnings("ignore")
_ = load_dotenv()

In [None]:
PROMPT_FILE = f"{ROOT}/medDerm/docs/system_prompts.txt"
BENCHMARK_DIR = f"{ROOT}/datasets/ISIC2018_Task3_Test_Input"
BENCHMARK_GT_FILE = f"{ROOT}/datasets/ISIC2018_Task3_Test_GroundTruth/ISIC2018_Task3_Test_GroundTruth.csv"
DATASET_PATH = f"{ROOT}/datasets/ISIC_ImageNet"


model_name = "medDerm"
temperature = 0.2
medDerm_logs = f"{ROOT}/experiments/medDerm_logs"

os.makedirs(medDerm_logs, exist_ok=True)

log_filename = f"{medDerm_logs}/{model_name}_{datetime.now().strftime('%Y%m%d_%H%M%S')}.json"
logging.basicConfig(filename=log_filename, level=logging.INFO, format="%(message)s", force=True)
device = "cuda:3" # Adjust based on your GPU availability

Class esplaination:
- MEL: Melanoma is a malignant neoplasm derived from melanocytes that may appear in different variants.

- NV: Melanocytic nevi are benign neoplasms of melanocytes and appear in a myriad of variants, which all are included in our series.

- BCC: Basal cell carcinoma is a common variant of epithelial skin cancer that rarely metastasizes but grows destructively if untreated.

- AKIEC: Actinic Keratoses (Solar Keratoses) and Intraepithelial Carcinoma (Bowen’s disease) are common non-invasive, variants of squamous cell carcinoma that can be treated locally without surgery.

- BKL: "Benign keratosis" is a generic class that includes seborrheic keratoses ("senile wart"), solar lentigo

- DF: Dermatofibroma is a benign skin lesion regarded as either a benign proliferation or an inflammatory reaction to minimal trauma

- VASC: Vascular skin lesions in the dataset range from cherry angiomas to angiokeratomas31 and pyogenic granulomas32. Hemorrhage is also included in this category.

## Utility functions

In [None]:
def get_tools():
    classification_tool = MuteClassifierTool(
        pretrained=False,
        device=device,
        config_path=f"{ROOT}/checkpoints/exp-HAM+Derm7pt-all+BCN+HAM-bin+DermNet+Fitzpatrick.yaml",
        output_head="All",
    )
    return [
        classification_tool,
    ]

def get_agent(openai_kwargs,model_name="gpt-4o",tools=[]):
    prompts = load_prompts_from_file(PROMPT_FILE)
    prompt = prompts["ISIC_CLASSIFICATION_GPT_BINARY"]
    checkpointer = MemorySaver()
    model = ChatOpenAI(model=model_name, temperature=temperature, top_p=0.95,**openai_kwargs)
    agent = Agent(
        model,
        tools=tools,
        log_tools=True,
        log_dir=f"{ROOT}/logs",
        system_prompt=prompt,
        checkpointer=checkpointer,
    )
    thread = {"configurable": {"thread_id": "1"}}
    return agent, thread

def run_medrax(agent, thread, prompt, image_path, use_tools=False):
    """
    Executes the medrax model with images passed as url in the prompt in order to able the agent to send it to the classification model .

    Args:
        agent: The medDerm agent.
        thread: The thread configuration.
        prompt: The prompt to send to the model.
        image_path:image path.

    Returns:
        Final result and agent state.
    """
    messages = []
    with open(image_path, "rb") as img_file:
        img_base64 = base64.b64encode(img_file.read()).decode("utf-8")
    if use_tools:
        messages.append({"role": "user", "content": f"the image is located at: {image_path}"}) #for the tool

    messages.append(
        {
            "role": "user",
            "content": [
                {
                    "type": "image_url",
                    "image_url": {"url": f"data:image/jpeg;base64,{img_base64}"},
                }
            ],
        }
    )
    messages.append(
        {"role": "user", "content": [{"type": "text", "text": prompt}]}
    )
    final_response = None
    for event in agent.workflow.stream({"messages": messages}, thread):
        for v in event.values():
            final_response = v
    final_response = final_response["messages"][-1].content.strip()
    agent_state = agent.workflow.get_state(thread)

    return final_response, str(agent_state)


## Evaluate the model

In [None]:
def evaluate_medDerm(model_name, image_dir,n_samples=10,output_file=None,classification_type="multi-class",openai_kwargs=None, use_tools=False):
    """
    Evaluates medDerm on a set of images and classifies each one into one of the seven classes.

    Args:
        tools: list of tools to include in the agent.
        model_name: the name of the model used as central agent.
        image_dir: Directory containing the images to classify.
        n_samples: Number of images to classify.

    Returns:
        A list of JSON objects, each containing the classification result for an image.
    """
    
    results = []
    starts_from=0
    
    if os.path.exists(output_file):
        with open(output_file, "r") as f:
            results = json.load(f)
            print(f"Already processed {len(results)} images, starting from {len(results)}")
            starts_from = len(results)
            n_samples = len(results) + n_samples
    image_paths = glob.glob(os.path.join(image_dir, "*.jpg"))
    image_paths += glob.glob(os.path.join(image_dir, "*.JPEG"))
    if n_samples > len(image_paths):
        n_samples = len(image_paths)
    print (f"number of samples to execute: {n_samples-len(results)}")
    image_paths = image_paths[starts_from:n_samples]

    if classification_type=="multi-class":
            prompt = "Classify this image into one of the seven classes: MEL, NV, BCC, AKIEC, BKL, DF, VASC." \
                    "output the class name only, without any additional text or explanation."
    elif classification_type=="binary":
        if use_tools:
            prompt = "If the image refer to skin lesions, " \
            "respond 'Yes' and perform the classification with the tools " \
            "and use this information added to the initial text to answer the question in detail " \
            "and explain the characteristics observed in the image."
        else:
            prompt = "If the image contain a skin leasion answer YES, otherwise NO" \
            "output [YES,NO] only, without any additional text or explanation."
        
    if use_tools:
        tools = get_tools()
    else :
        tools = []
    
    for image_path in tqdm.tqdm(image_paths, desc="Processing images"):

        # re-initialize the agent for each image in order to avoid too long prompt using the agent as a single chatbot
        if not model_name == None:
            agent, thread = get_agent(openai_kwargs,model_name=model_name,tools=tools)
        else:
            agent, thread = get_agent(openai_kwargs)

        try:
            response, _ = run_medrax(agent, thread, prompt, image_path=image_path,use_tools=use_tools)
            classification = response.strip()
            if use_tools:
                logs = glob.glob(os.path.join(ROOT,agent.log_path, "*.json"))
                used = False
                for l in logs:
                    with open(l, 'r') as file:
                        mute_answer = json.load(file)[0]
                    
                    image_name=mute_answer['args']['image_path'].split("/")[-1]
                    if image_name in os.path.basename(image_path):
                        used = True
                    
                result={
                    "image_name": os.path.basename(image_path),
                    "predicted_class": classification,
                    "use_tools": used
                }
            else:
                result={
                    os.path.basename(image_path): classification
                }
            results.append(result)
            with open(output_file, "w") as f:
                json.dump(results, f, indent=4)
        except Exception as e:
            print(f"Error processing {image_path}: {e}")
            results.append({os.path.basename(image_path): "Error"})
    

    print(f"Classification results saved to {output_file}")
    return results

## HAM10K Classification

In [None]:

model="gpt"
if not load_dotenv(f"{ROOT}/{model}_env.env"):
    print(f"Error loading environment variables from {model}_env.env")
    exit(1)
openai_kwargs = {}
if api_key := os.getenv("OPENAI_API_KEY"):
    openai_kwargs["api_key"] = api_key

if base_url := os.getenv("OPENAI_BASE_URL"):
    openai_kwargs["base_url"] = base_url

model_name = os.getenv("OPENAI_MODEL_NAME")
classification_type="multi-class"

output_file = os.path.join(ROOT, "experiments","results", "medDerm_classification_results_gpt4o.json")
evaluate_medDerm(model_name, DATASET_PATH,output_file=output_file,n_samples=10000,classification_type=classification_type)
postProcessingResults(output_file, benchmark_gt_file_path=BENCHMARK_GT_FILE, benchmark_binary_directory=DATASET_PATH, classification_type=classification_type)


## Binary Classification (No Tool)
Lesion/not lesion

In [None]:

model="gpt"
if not load_dotenv(f"{ROOT}/{model}_env.env"):
    print(f"Error loading environment variables from {model}_env.env")
    exit(1)
openai_kwargs = {}
if api_key := os.getenv("OPENAI_API_KEY"):
    openai_kwargs["api_key"] = api_key

if base_url := os.getenv("OPENAI_BASE_URL"):
    openai_kwargs["base_url"] = base_url

model_name = os.getenv("OPENAI_MODEL_NAME")
classification_type="binary"
use_tools=False

output_file = os.path.join(ROOT, "experiments","results", "medDerm_classification_results_gpt4o_binary.json")
#Evaluate medDerm on the benchmark dataset
evaluate_medDerm(model_name, DATASET_PATH,output_file=output_file,n_samples=10000,classification_type=classification_type,openai_kwargs=openai_kwargs,use_tools=use_tools)
postProcessingResults(output_file, benchmark_gt_file_path=BENCHMARK_GT_FILE, benchmark_binary_directory=DATASET_PATH, classification_type=classification_type)


## Binary Classification (Tool)
Lesion/not lesion

In [None]:
model="gpt"
if not load_dotenv(f"{ROOT}/{model}_env.env"):
    print(f"Error loading environment variables from {model}_env.env")
    exit(1)
openai_kwargs = {}
if api_key := os.getenv("OPENAI_API_KEY"):
    openai_kwargs["api_key"] = api_key

if base_url := os.getenv("OPENAI_BASE_URL"):
    openai_kwargs["base_url"] = base_url

model_name = os.getenv("OPENAI_MODEL_NAME")
classification_type="binary"
use_tools=True

output_file = os.path.join(ROOT, "experiments","results", "medDerm_classification_results_gpt4o_binary_tool.json")
# #Evaluate medDerm on the benchmark dataset
evaluate_medDerm(model_name, DATASET_PATH,output_file=output_file,n_samples=200000,classification_type=classification_type,openai_kwargs=openai_kwargs,use_tools=use_tools)
postProcessingResults(output_file, benchmark_gt_file_path=BENCHMARK_GT_FILE, benchmark_binary_directory=DATASET_PATH, classification_type=classification_type)
