In [17]:
import os
import io
from typing import List, Dict, Any

import PyPDF2
from PIL import Image
import pytesseract
import docx
from dotenv import load_dotenv

# LangChain public interfaces
from langchain.chat_models import ChatOpenAI
from langchain.prompts import ChatPromptTemplate
from langchain.chains import LLMChain
from langchain.callbacks import get_openai_callback

# Import your persona prompts explicitly
from prompts import HEOR_modeler, Clinician, Health_policy_maker, Market_access_professional, Statistician, Layman

# Load environment variables
load_dotenv()
api_key = os.getenv("OPENAI_API_KEY")

In [18]:

class ChatHistory:
    def __init__(self):
        self.history: List[Dict[str, str]] = []

    def add_message(self, role: str, content: str):
        self.history.append({"role": role, "content": content})

    def get_messages(self) -> List[Dict[str, str]]:
        return self.history

    def clear_history(self):
        self.history = []

    def set_parameters(self, profession: str, max_words: int, engine: str, temperature: float, max_tokens: int):
        self.profession = profession
        self.max_words = max_words
        self.engine = engine
        self.temperature = temperature
        self.max_tokens = max_tokens

In [19]:
def generate_response(
    question: str,
    profession: str,
    max_words: int,
    api_key: str,
    engine: str,
    temperature: float,
    max_tokens: int,
    chat_history: ChatHistory = None
) -> tuple:
    """Generate a response using OpenAI's API with optional chat history."""
    # Select the right system prompt
    if profession == 'HEOR modeler':
        system_prompt = HEOR_modeler
    elif profession == 'Clinician':
        system_prompt = Clinician
    elif profession == 'Health policy maker':
        system_prompt = Health_policy_maker
    elif profession == 'Market access professional':
        system_prompt = Market_access_professional
    elif profession == 'Statistician':
        system_prompt = Statistician
    else:
        system_prompt = Layman

    # Assemble messages
    messages = [("system", system_prompt)]
    if chat_history and chat_history.history:
        for msg in chat_history.history:
            messages.append((msg["role"], msg["content"]))
    messages.append(("user", "Statement: {question}"))  

    # Build prompt template
    prompt = ChatPromptTemplate.from_messages(messages)

    # Initialize the LLM
    llm = ChatOpenAI(
        model_name=engine,
        temperature=temperature,
        max_tokens=max_tokens,
        openai_api_key=api_key
    )

    # Chain prompt + LLM
    chain = LLMChain(llm=llm, prompt=prompt)

    # Invoke and capture usage
    with get_openai_callback() as cb:
        answer = chain.predict(question=question)
        if chat_history:
            chat_history.add_message("assistant", answer)
        return answer, cb


In [20]:
# File-processing helpers (unchanged)
def extract_text_from_pdf(file_path):
    with open(file_path, 'rb') as file:
        reader = PyPDF2.PdfReader(file)
        return "".join(page.extract_text() or "" for page in reader.pages)


# def extract_text_from_image(file_path):
#     img = Image.open(file_path)
#     return pytesseract.image_to_string(img)


def extract_text_from_docx(file_path):
    doc = docx.Document(file_path)
    return "\n".join(p.text for p in doc.paragraphs)


def process_file(file_path):
    lp = file_path.lower()
    if lp.endswith('.pdf'):
        return extract_text_from_pdf(file_path)
    if lp.endswith(('.png', '.jpg', '.jpeg')):
        return extract_text_from_image(file_path)
    if lp.endswith('.docx'):
        return extract_text_from_docx(file_path)
    return None

In [47]:
# Example usage
if __name__ == "__main__":
    question = "What is cost-effectiveness analysis?"
    profession = "Market access professional"
    max_words = 2000
    engine = "gpt-4.1"
    temperature = 0.7
    max_tokens = int(max_words * 5)

    history = ChatHistory()
    history.set_parameters(profession, max_words, engine, temperature, max_tokens)

    if not api_key:
        print("Please set your OpenAI API key in the .env file.")
        exit(1)

    # First query
    resp, cb = generate_response(
        question, profession, max_words, api_key,
        engine, temperature, max_tokens, history
    )
    print("Response:", resp)
    print(f"Tokens used: prompt={cb.prompt_tokens}, completion={cb.completion_tokens}, total={cb.total_tokens}")

Response: **Interpretation for Market Access Professionals (without HEOR background):**

Cost-effectiveness analysis (CEA) is a tool used to help decision-makers—like payers and health authorities—figure out if a new medicine or health technology provides good value for the money compared to what’s already available.

**Think about CEA as a way to answer the question:**  
*“If we spend money on this new treatment, what do we get in return, and is it worth it compared to other options?”*

Here’s how it works in practical terms:

- **Comparing Costs and Benefits:**  
  CEA compares the extra cost of a new product (such as a new cancer drug) to the extra health benefits it provides. The health benefits are usually measured in terms of how much it improves patients’ lives—like helping them live longer or have a better quality of life.

- **Common Outcome – The “Cost per QALY”:**  
  One of the most common ways results are reported is as a “cost per QALY” (Quality-Adjusted Life Year). This 

In [27]:
follow_up = "Can you explain that in more detail?"
history.add_message("user", follow_up)
resp, cb = generate_response(
        follow_up, profession, max_words, api_key,
        engine, temperature, max_tokens, history
    )
print("Follow-up:", resp)


Follow-up: Absolutely! Let’s go into more detail, still using simple language and analogies.

---

### What is Cost-Effectiveness Analysis (CEA)?

**Cost-effectiveness analysis** is a way to compare different healthcare treatments or programs to see which one gives us the most health benefit for the money we spend. It helps people who make decisions in healthcare (like governments, insurance companies, or hospitals) decide where to spend their limited budget.

---

### A Simple Analogy: Fixing Your Car

Imagine you have a car with a broken part. You go to two mechanics:

- **Mechanic A** says they can fix your car for $100, and it will last for 1 year.
- **Mechanic B** says they can fix your car for $150, and it will last for 3 years.

Which repair should you choose?

You want to know not just the price, but also how long the fix will last. So you compare the cost for each year your car will run:

- Mechanic A: $100 ÷ 1 year = $100 per year
- Mechanic B: $150 ÷ 3 years = $50 per year



In [35]:
  # File processing example
file_path = r"C:\Users\HananIrfan\OneDrive - ConnectHEOR\Desktop\Learning and Development\Fw_ Readings_ Basics of HEOR\What_are_health_util.pdf"
text = process_file(file_path)
if text:
        # Add the file's extracted text as a user message
        history.add_message("user", text)
        resp, cb = generate_response(
            text, profession, max_words, api_key,
            engine, temperature, max_tokens, history
        )
        print("From file:", resp)
else:
        print("No text extracted from file.")


From file: Absolutely! I’ll explain the statement in simple, layman’s terms using analogies and examples.

---

## What Are Health Utilities?

**Imagine you’re rating how good or bad different health situations are, a bit like giving a movie a score out of 10.**  
But here, instead of 1 to 10, we use a scale from 0 to 1:
- **1** means you feel as good as possible (perfect health).
- **0** means the worst possible health (equivalent to being dead).

These scores are called **utilities**. They help us put a number on how “good” or “bad” a particular health state is, based on what people prefer.

---

## Step 1: Define Health States

First, you have to describe different health situations (“health states”).  
For example:
- Living with mild pain
- Needing help to walk
- Having trouble breathing

Each health state is like describing a different “scene” in a movie about health. You need to be specific, like: “Can walk, but with pain; needs help with daily tasks; feels tired most days.”

---

In [36]:
follow_up = "I didn't understand the scene in a movie analogy. Please elaborate."
history.add_message("user", follow_up)
resp, cb = generate_response(
        follow_up, profession, max_words, api_key,
        engine, temperature, max_tokens, history
    )
print("Follow-up:", resp)

Follow-up: Of course! Let’s look at the “scene in a movie” analogy in more detail, step by step.

---

### Step 1: Imagine a Movie About Health

Think about your favorite movie. Every movie is made up of different scenes, and each scene shows the characters in a certain situation—maybe they’re happy, sad, in danger, or relaxing.

Now, **imagine a movie where each scene shows a person in a different health situation**. For example:
- In one scene, the character is running and playing with friends (perfect health).
- In another scene, the character is walking slowly because of knee pain.
- In another, the character needs help getting dressed and feels tired all the time.

Each of these scenes is like a different “health state” in real life.

---

### Step 2: Rating the Scenes

Suppose you’re a movie critic, and your job is to rate each scene based on how enjoyable or good you think it is for the character.  
- The scene where the character is running and playing might get a **10 out of 1

In [42]:
 #to print entire chat history
print("Chat History:")
    # Use pprint to avoid truncation in notebook output
import pprint
pprint.pprint(history.get_messages())


Chat History:
[{'content': 'Sure! Let’s break down “cost-effectiveness analysis” (often '
             'called CEA) in simple terms, using a real-life analogy:\n'
             '\n'
             '**Imagine you’re at the grocery store.**  \n'
             'You want to buy apples, but you also want to get the best deal '
             'for your money. There are two bags of apples:\n'
             '\n'
             '- **Bag A:** Costs $5 and has 5 apples.\n'
             '- **Bag B:** Costs $8 and has 10 apples.\n'
             '\n'
             'Which one is the better buy?  \n'
             'You divide the price by the number of apples:\n'
             '\n'
             '- Bag A: $5 ÷ 5 = $1 per apple\n'
             '- Bag B: $8 ÷ 10 = $0.80 per apple\n'
             '\n'
             'So, **Bag B gives you more apples for your money**; it’s more '
             '“cost-effective.”\n'
             '\n'
             '---\n'
             '\n'
             '### How does this relate to healthc

In [45]:
    # If you want to clear the chat history:
history.clear_history()
print("Chat History:")
pprint.pprint(history.get_messages())

Chat History:
[]


In [46]:
# File processing example
file_path = r"C:\Users\HananIrfan\Downloads\AMVUTTRA-CM_Core Cost-Effectiveness Analysis_TechnicalReport_VF_23April2025.docx"
text = process_file(file_path)
if text:
        # Add the file's extracted text as a user message
        history.add_message("user", text)
        resp, cb = generate_response(
            text, profession, max_words, api_key,
            engine, temperature, max_tokens, history
        )
        print("From file:", resp)
else:
        print("No text extracted from file.")

From file: Certainly! The statement you provided is a detailed technical table of contents and executive summary from a health economic evaluation (HEOR) report about the cost-effectiveness of **vutrisiran** compared to other treatments for **transthyretin amyloid cardiomyopathy (ATTR-CM)**. 

As a layperson with some HEOR knowledge, let me break this down in plain English, using analogies and everyday examples:

---

### 1. **What is this report about?**
Imagine you’re part of a government or insurance company deciding whether to pay for a new medicine. You have to decide: “Is this new medicine worth the money, compared to what we already have, or just regular care?”

This report is like a big, detailed **comparison shopping guide** for a new drug called **vutrisiran**, used to treat a rare heart condition called ATTR-CM. The goal is to see if this drug gives enough extra health benefit to justify its cost.

---

### 2. **What is a cost-effectiveness model?**
Think of this as a **gian