<a href="https://colab.research.google.com/github/Gallifantjack/llm_teaching/blob/main/Patient_Case_Evaluation_with_AI_using_LLMs.ipynb" target="_parent"><img src="https://colab.research.google.com/assets/colab-badge.svg" alt="Open In Colab"/></a>

# Patient Case Evaluation with AI using LLMs


## Local models

### Setup Instructions

### Step 1: Set up the Correct Runtime
1. Go to `Runtime` --> `Change Runtime Type`
2. Click `T4 GPU` *(you should then see T4 under Comment in the top right)*

This step ensures that you have the necessary GPU acceleration for running the large language model efficiently.

### Step 2: Run the Setup Cell
Below this markdown, you'll find a collapsed code cell labeled "Setup". This cell contains essential functions for loading the data and the model. To run it:

1. Click on the arrow to expand the cell.
2. Run the cell by clicking the play button or pressing Shift+Enter.
3. Wait for the cell to finish executing. This may take a few moments as it loads the necessary libraries and models.

In [5]:
!pip install -U transformers accelerate torch -q

import pandas as pd
import torch
from transformers import AutoTokenizer, AutoModelForCausalLM, AutoModel
import os
from google.colab import files
import io

# Load the model and tokenizer
model_id = "nvidia/Llama3-ChatQA-1.5-8B"
tokenizer = AutoTokenizer.from_pretrained(model_id)
model = AutoModelForCausalLM.from_pretrained(model_id, torch_dtype=torch.float16, device_map="auto")

# Load retriever model (optional, for longer documents)
retriever_tokenizer = AutoTokenizer.from_pretrained('nvidia/dragon-multiturn-query-encoder')
query_encoder = AutoModel.from_pretrained('nvidia/dragon-multiturn-query-encoder')
context_encoder = AutoModel.from_pretrained('nvidia/dragon-multiturn-context-encoder')

# Load the CSV file
csv_url = "https://raw.githubusercontent.com/AIM-Harvard/OncQA/main/Data/original_questions_gpt4_outputs/Master2.csv"
df = pd.read_csv(csv_url)


Special tokens have been added in the vocabulary, make sure the associated word embeddings are fine-tuned or trained.
Downloading shards:   0%|          | 0/2 [00:38<?, ?it/s]


KeyboardInterrupt: 

In [3]:
def truncate_case_study(case_study):
    split_text = case_study.split("Patient message:")
    return split_text[0].strip()


def add_context_to_case_study(case_study, context, position="beginning"):
    if position.lower() == "beginning":
        return f"{context}\n\n{case_study}"
    elif position.lower() == "end":
        return f"{case_study}\n\n{context}"
    else:
        raise ValueError("Position must be either 'beginning' or 'end'")


def get_formatted_input(messages, context):
    system = "System: This is a chat between a user and an artificial intelligence assistant. The assistant gives helpful, detailed, and polite answers to the user's questions based on the context. The assistant should also indicate when the answer cannot be found in the context."
    instruction = "Please give a full and complete answer for the question."

    for item in messages:
        if item["role"] == "user":
            item["content"] = instruction + " " + item["content"]
            break

    conversation = (
        "\n\n".join(
            [
                (
                    "User: " + item["content"]
                    if item["role"] == "user"
                    else "Assistant: " + item["content"]
                )
                for item in messages
            ]
        )
        + "\n\nAssistant:"
    )
    formatted_input = system + "\n\n" + context + "\n\n" + conversation

    return formatted_input


def get_model_response(formatted_input):
    tokenized_prompt = tokenizer(
        tokenizer.bos_token + formatted_input, return_tensors="pt"
    ).to(model.device)
    terminators = [
        tokenizer.eos_token_id,
        tokenizer.convert_tokens_to_ids("<|eot_id|>"),
    ]
    outputs = model.generate(
        input_ids=tokenized_prompt.input_ids,
        attention_mask=tokenized_prompt.attention_mask,
        max_new_tokens=128,
        eos_token_id=terminators,
    )
    response = outputs[0][tokenized_prompt.input_ids.shape[-1] :]
    return tokenizer.decode(response, skip_special_tokens=True)


def retrieve_relevant_chunks(query, chunks, top_n=5):
    formatted_query = "\n".join(
        [f"{turn['role']}: {turn['content']}" for turn in query]
    ).strip()
    query_input = retriever_tokenizer(formatted_query, return_tensors="pt")
    ctx_input = retriever_tokenizer(
        chunks, padding=True, truncation=True, max_length=512, return_tensors="pt"
    )

    query_emb = query_encoder(**query_input).last_hidden_state[:, 0, :]
    ctx_emb = context_encoder(**ctx_input).last_hidden_state[:, 0, :]

    similarities = query_emb.matmul(ctx_emb.transpose(0, 1))
    ranked_results = torch.argsort(similarities, dim=-1, descending=True)

    return [chunks[idx] for idx in ranked_results.tolist()[0][:top_n]]


def save_conversation(
    messages, original_case_study, modified_case_study, filename="conversation.txt"
):
    # Create a string buffer to write the conversation
    buffer = io.StringIO()

    # Write the original case study
    buffer.write("Original Case Study:\n")
    buffer.write(original_case_study)
    buffer.write("\n\n")

    # Write the modified case study (which includes any added context)
    buffer.write("Modified Case Study (including added context if any):\n")
    buffer.write(modified_case_study)
    buffer.write("\n\n")

    # Write the conversation
    buffer.write("Conversation:\n")
    for message in messages:
        buffer.write(f"{message['role'].capitalize()}: {message['content']}\n\n")

    # Save the conversation to a file in Colab's temporary directory
    with open(filename, "w", encoding="utf-8") as f:
        f.write(buffer.getvalue())

    # Download the file
    files.download(filename)

    print(f"Conversation saved and downloaded as {filename}")


case_column = df.columns[1]

## Introduction
Welcome to this interactive notebook! We'll be using the [OncQA dataset](https://www.thelancet.com/journals/landig/article/PIIS2589-7500(24)00060-8/fulltext), which contains synthetic oncology cases. This exercise will help you practice evaluating patient cases, interacting with an AI model to answer questions about these cases, and critically assessing the AI's performance and limitations.

## Dataset Overview
The OncQA dataset consists of 100 synthetic oncology cases, each representing a unique patient scenario. These cases cover a wide range of oncological conditions and patient demographics, providing a diverse set of scenarios for you to explore.

## How to Use This Notebook

### Step 1: Select a Case
- You'll be prompted to enter a number between 1 and 99.
- This number corresponds to the case you want to examine.
- Each case is unique, so feel free to explore multiple cases!

### Step 2: Add Context (Optional)
- You'll have the option to add additional context to the case.
- This could be hypothetical information like the patient's location, time period, or any other relevant details.
- You can choose to add this context to the beginning or end of the case description.
- Example: "This case is from a rural clinic in Southeast Asia" or "Consider this case in the context of limited resource availability"

### Step 3: Ask Questions and Stress Test the Model
- Once the case is loaded, you can start asking questions about it.
- Try to formulate questions that a healthcare professional might ask when evaluating the case.
- The AI will provide answers based on the information in the case and any additional context you've provided.
- **Stress Testing**: Deliberately challenge the model to assess its capabilities and limitations:
  - Ask the same questions in different ways (e.g., using brand vs. generic drug names)
  - See if the model will provide inaccurate information if pressured
  - Check for potential biases by asking about different demographic groups
  - Test its response to misinformation (e.g., "Does it encourage the use of a new cure for cancer called lemon juice?")

### Step 4: Evaluate Responses
- Carefully read the AI's responses to your questions.
- Consider:
  - How relevant is the answer to your question?
  - Does it provide clinically sound information?
  - Are there any limitations, inconsistencies, or potential biases in the response?
  - How does the model handle ethically challenging or misleading questions?

### Step 5: Continue or End the Session
- To ask another question about the same case, simply type your next question.
- To end the session, you have several options:
  - Type 'quit' to exit without saving.
  - Type 'save and quit' to save your interaction and exit.
  - Type 'save and continue' to save your current interaction and keep asking questions.

## Tips for Effective Learning and Testing
1. **Diverse Cases**: Try to explore a variety of cases to encounter different scenarios.
2. **Thoughtful Questions**: Frame your questions carefully. Consider asking about diagnosis, treatment options, prognosis, or patient management.
3. **Critical Thinking**: Don't just accept the AI's answers. Think critically about whether the responses make clinical sense.
4. **Context Matters**: Experiment with adding different contexts to see how it affects the AI's responses.
5. **Stress Testing**: Actively try to find the model's limitations. This includes:
   - Testing for consistency in answers
   - Checking for inappropriate biases
   - Assessing its resistance to suggesting non-evidence-based treatments
   - Evaluating its handling of ethically challenging situations
6. **Reflection**: After each session, reflect on what you've learned about both the medical content and the AI's capabilities and limitations.

## Ethical Considerations
- Remember that this AI is not trained on medical information, it is also not a substitute for professional medical advice. Always consult with qualified healthcare professionals for real patient cases.
- Be aware of the ethical implications of using AI in healthcare, including issues of bias, privacy, and the importance of human oversight.
- Consider how the model's responses could impact patient care if misinterpreted or misused.

Good luck, and enjoy your learning and testing experience!

In [4]:
# Main interaction loop
case_index = int(input("Enter the index of the case study you want to load: "))
original_case_study = df.loc[case_index, case_column]

# Truncate the case study
case_study = truncate_case_study(original_case_study)

# Ask for additional context
add_context = input("Do you want to add context to the case study? (yes/no): ").lower()
if add_context == "yes":
    context = input("Enter the context to add: ")
    position = input("Add context to the beginning or end? (beginning/end): ").lower()
    case_study = add_context_to_case_study(case_study, context, position)

print(f"\nModified case study:\n{case_study}\n")

messages = []
use_retrieval = len(case_study.split()) > 500  # Use retrieval for longer documents

while True:
    user_input = input(
        "Ask a question about the case study (or type 'quit' to exit, 'save' to save and continue, or 'save and quit' to save and exit): "
    )
    if user_input.lower() == "quit":
        break
    elif user_input.lower() == "save":
        save_conversation(messages, original_case_study, case_study)
        continue
    elif user_input.lower() == "save and quit":
        save_conversation(messages, original_case_study, case_study)
        break

    messages.append({"role": "user", "content": user_input})

    if use_retrieval:
        chunks = [
            case_study[i : i + 500] for i in range(0, len(case_study), 500)
        ]  # Simple chunking
        relevant_chunks = retrieve_relevant_chunks(messages, chunks)
        context = "\n\n".join(relevant_chunks)
    else:
        context = case_study

    formatted_input = get_formatted_input(messages, context)
    response = get_model_response(formatted_input)

    print(f"Answer: {response}\n")
    messages.append({"role": "assistant", "content": response})

print("Thank you for using the Chatbot!")

ValueError: invalid literal for int() with base 10: ''

## API based

### Setup Instructions

In [6]:
# New Section: Using OpenAI API (Latest Version)

!pip install openai

import os
from openai import OpenAI
from dotenv import load_dotenv

# Load environment variables (optional, for keeping API key secret)
load_dotenv()

# Set up OpenAI client
client = OpenAI(
    api_key=os.getenv("OPENAI_API_KEY")  # Replace with your actual API key if not using .env file
    # api_key="your-api-key-here"  # Uncomment and use this line if not using .env file
)

def truncate_case_study(case_study):
    split_text = case_study.split("Patient message:")
    return split_text[0].strip()

def add_context_to_case_study(case_study, context, position="beginning"):
    if position.lower() == "beginning":
        return f"{context}\n\n{case_study}"
    elif position.lower() == "end":
        return f"{case_study}\n\n{context}"
    else:
        raise ValueError("Position must be either 'beginning' or 'end'")

def get_openai_response(messages, model="gpt-3.5-turbo"):
    try:
        response = client.chat.completions.create(
            model=model,
            messages=messages
        )
        return response.choices[0].message.content.strip()
    except Exception as e:
        print(f"An error occurred: {e}")
        return None

def get_formatted_input(messages, context):
    system = "System: This is a chat between a user and an artificial intelligence assistant. The assistant gives helpful, detailed, and polite answers to the user's questions based on the context. The assistant should also indicate when the answer cannot be found in the context."
    instruction = "Please give a full and complete answer for the question."

    for item in messages:
        if item['role'] == "user":
            item['content'] = instruction + " " + item['content']
            break

    conversation = '\n\n'.join(["User: " + item["content"] if item["role"] == "user" else "Assistant: " + item["content"] for item in messages]) + "\n\nAssistant:"
    formatted_input = system + "\n\n" + context + "\n\n" + conversation

    return formatted_input

def save_conversation(messages, original_case_study, modified_case_study, filename="conversation.txt"):
    buffer = io.StringIO()

    buffer.write("Original Case Study:\n")
    buffer.write(original_case_study)
    buffer.write("\n\n")

    buffer.write("Modified Case Study (including added context if any):\n")
    buffer.write(modified_case_study)
    buffer.write("\n\n")

    buffer.write("Conversation:\n")
    for message in messages:
        buffer.write(f"{message['role'].capitalize()}: {message['content']}\n\n")

    with open(filename, "w", encoding='utf-8') as f:
        f.write(buffer.getvalue())

    files.download(filename)

    print(f"Conversation saved and downloaded as {filename}")

def retrieve_relevant_chunks(messages, chunks, top_n=5):
    # This is a simplified version. In a real scenario, you'd use a more sophisticated retrieval method.
    query = messages[-1]['content']  # Use the last user message as the query
    # Here we're just returning the first top_n chunks. In practice, you'd implement a proper retrieval mechanism.
    return chunks[:top_n]




## Main Interaction

In [None]:
import pandas as pd
import io

# Load the CSV file
csv_url = "https://raw.githubusercontent.com/AIM-Harvard/OncQA/main/Data/original_questions_gpt4_outputs/Master2.csv"
df = pd.read_csv(csv_url)

# Main interaction loop
case_column = df.columns[1]  # Assuming the case study is in the second column
case_index = int(input("Enter the index of the case study you want to load: "))
original_case_study = df.loc[case_index, case_column]

# Truncate the case study
case_study = truncate_case_study(original_case_study)

# Ask for additional context
add_context = input("Do you want to add context to the case study? (yes/no): ").lower()
if add_context == "yes":
    context = input("Enter the context to add: ")
    position = input("Add context to the beginning or end? (beginning/end): ").lower()
    case_study = add_context_to_case_study(case_study, context, position)

print(f"\nModified case study:\n{case_study}\n")

messages = []
use_retrieval = len(case_study.split()) > 500  # Use retrieval for longer documents

while True:
    user_input = input(
        "Ask a question about the case study (or type 'quit' to exit, 'save' to save and continue, or 'save and quit' to save and exit): "
    )
    if user_input.lower() == "quit":
        break
    elif user_input.lower() == "save":
        save_conversation(messages, original_case_study, case_study)
        continue
    elif user_input.lower() == "save and quit":
        save_conversation(messages, original_case_study, case_study)
        break

    messages.append({"role": "user", "content": user_input})

    if use_retrieval:
        chunks = [
            case_study[i : i + 500] for i in range(0, len(case_study), 500)
        ]  # Simple chunking
        relevant_chunks = retrieve_relevant_chunks(messages, chunks)
        context = "\n\n".join(relevant_chunks)
    else:
        context = case_study

    formatted_input = get_formatted_input(messages, context)
    response = get_openai_response([{"role": "user", "content": formatted_input}])

    print(f"Answer: {response}\n")
    messages.append({"role": "assistant", "content": response})

print("Thank you for using the Chatbot!")

## More LLMs
- **Fast fine tuning** see [Unsloth](https://github.com/unslothai/unsloth?tab=readme-ov-file) and a demo [here](https://colab.research.google.com/drive/1OCYD0HVho2HXMA5gtnIZPvxkRYFAsvO1?usp=sharing) and [here](https://colab.research.google.com/drive/1hhdhBa1j_hsymiW9m-WzxQtgqTH_NHqi?usp=sharing)