# DATA

In [None]:
import pandas as pd

"""
FILE FORMAT: csv
Required columns: question, context
Optional columns: history
"""
file_path = '/content/drive/MyDrive/Colab Notebooks/data/llm_analysis/expt1/iehp_test_data.csv'

# Read the CSV file into a DataFrame
df = pd.read_csv(file_path)

# Display the first few rows of the DataFrame
print(df.head())

                       file_name  \
0  iehp_additional_test_data.csv   
1  iehp_additional_test_data.csv   
2  iehp_additional_test_data.csv   
3  iehp_additional_test_data.csv   
4  iehp_additional_test_data.csv   

                                            question  valid_answer  \
0    What is the overall deductible for Gold 80 HMO?          True   
1  What is the overall deductible for Platinum 90...          True   
2                     What is my overall deductible?          True   
3           Is cosmetic surgery included in my plan?          True   
4  How much do I need to pay for child birth unde...          True   

                                              answer  \
0  <p>The overall deductible for the Gold 80 HMO ...   
1  <p>The overall deductible for the Platinum 90 ...   
2  <p>Your overall deductible varies depending on...   
3  <p><strong>Cosmetic surgery is not covered</st...   
4  <p>Under the Platinum 90 HMO plan, if you are ...   

                         

# PROMPTS

In [None]:
system_prompt = """You are an AI assistant designed to answer Frequently Asked Questions for users.
**Instructions:**
- **Answer Generation**:
  - Use the context and conversation history to answer the user's query.
  - Answer in the first person or in the voice of the enterprise/company, as appropriate.
  - Do not use any external information or personal beliefs.
  - Make logical inferences if the answer is not explicitly stated but can be concluded from the context.
  - If the question is vague or missing some information for you to answer, you should ask the user a question to get more information and set "valid_answer": true.
  - If you pick information from the conversation history, make sure to refer to who said it.
  - Set `"valid_answer": true` if your answer is based on or similar to or can be concluded from the given context.
  - Set `"valid_answer": false` and `"answer": ""` if unable to answer from the provided context.
  - Don't generate URLs if they're not present in the context.
  - Contact numbers cannot be considered as hyperlinks.

- **HTML Formatting**:
  - Present the entire answer in HTML format.
  - Use appropriate HTML tags to structure the content (e.g., `<p>`, `<br>`, `<ul>`, `<li>`, `<strong>`, `<em>`).
  - Do not create hyperlinks for incomplete URLs, file names, relative paths, or any href attributes with empty or # values. for example don't generate `href=\\"#\\"`
  - If a URL is incomplete or invalid, present it as plain text without making it a hyperlink.
  - Ensure all hyperlinks use `<a>` tags with `target="_blank"` to open links in a new tab.

- **Visual Appeal**:
  - Structure the answer for readability and visual appeal.
  - Use headings, bullet points, and emphasis where appropriate.

- **JSON Output**:
  - Output a well-formed JSON object with the following structure:
    ```json
    {
      "valid_answer": boolean, // true if an answer is provided/can be concluded from context, false otherwise
      "answer": string, // the answer in HTML format
      "source_of_information": [string] // list of sources used from the context
    }
    ```
  - Ensure proper escaping of characters to maintain valid JSON.

- **Source Citation**:
  - List all sources from the context that were used to generate the answer.
  - Add the page url if present and suggest the user to click on it for further information or to learn more.
  - If more than one citation is in the answer, present them in bullets
"""

user_prompt = """
########
Context:
{context}
########
Conversation history:
{history}
########
User Query:
{query}
########
Output:
"""

# CLIENTS

In [None]:
api_keys = {
    "gemini": "<key>",
    "anthropic": "<key>",
    "groq": "<key>"
}

In [None]:
!pip install anthropic
!pip install groq



In [None]:
from google import genai
import anthropic
from groq import Groq

clients = {
    "groq": Groq(api_key=api_keys.get("groq")),
    "gemini": genai.Client(api_key=api_keys.get("gemini")),
    "anthropic": anthropic.Anthropic(api_key=api_keys.get("anthropic"))
}


In [None]:
import json
import os
import re
import time
import traceback

def post_process_llm_answer(string):
    if string is None:
        return {}
    # remove all characters before the first {
    string = re.sub(r"^[^{]*{", "{", string)
    # remove all characters after the last }
    string = re.sub(r"}[^}]*$", "}", string)
    result = json.loads(string)
    return result

def analyse_func(func):
    def wrapper(*args, **kwargs):
        start_time = time.time()
        result = func(*args, **kwargs)
        end_time = time.time()
        time_taken = end_time - start_time
        analysis = {
            "provider": func.__name__.split("_")[0],
            "model": kwargs.get("model"),
            "time": round(time_taken, 4)
        }
        analysis.update(result)
        return analysis
    return wrapper

@analyse_func
def gemini_call(system_prompt, user_prompt, context, history, query, model="gemini-2.0-flash"):
    user_prompt = user_prompt.format(context=context, history=history, query=query)
    response = clients["gemini"].models.generate_content(
        model=model,
        contents=user_prompt,
        config=genai.types.GenerateContentConfig(
            system_instruction=system_prompt,
            temperature=0,
            max_output_tokens=10000,
            top_p=0,
            top_k=1
        )
    )
    result = post_process_llm_answer(response.text)
    result["total_tokens"] = response.usage_metadata.total_token_count
    return result

@analyse_func
def anthropic_call(system_prompt, user_prompt, context, history, query, model="claude-3-7-sonnet-20250219"):
    user_prompt = user_prompt.format(context=context, history=history, query=query)
    response = clients["anthropic"].messages.create(
        model=model,
        max_tokens=10000,
        temperature=0,
        system=system_prompt,
        messages=[
            {
                "role": "user",
                "content": [
                    {
                        "type": "text",
                        "text": user_prompt
                    }
                ]
            }
        ]
    )
    result = post_process_llm_answer(response.content[0].text)
    return result

@analyse_func
def groq_call(system_prompt, user_prompt, context, history, query, model="qwen-2.5-32b"):
    user_prompt = user_prompt.format(context=context, history=history, query=query)
    response = clients["groq"].chat.completions.create(
        model=model,
        messages=[
            {
                "role": "system",
                "content": system_prompt
            },
            {
                "role": "user",
                "content": user_prompt
            }
        ],
        temperature=0,
        max_completion_tokens=10000,
        top_p=0,
        stream=False,
        response_format={"type": "json_object"},
        stop=None,
    )
    result = post_process_llm_answer(response.choices[0].message.content)
    result["total_tokens"] = response.usage.total_tokens
    result["queue_time"] = round(response.usage.queue_time, 4)
    result["gen_time"] = round(response.usage.total_time, 4)
    return result

def test_llms(test_df, llms):
    results = {}
    for index, row in test_df.iterrows():
        context = row.get("context", "")
        history = row.get("history", "")
        query = row.get("question", "")
        for provider, models in llms.items():
            for model in models:
                try:
                    func = globals()[f"{provider}_call"]
                    result = func(system_prompt, user_prompt, context, history, query, model=model)
                    key = f"{provider}_{model}"
                    if key not in results:
                        results[key] = []
                    result["question"] = query
                    result["context"] = context
                    result["history"] = history
                    results[key].append(result)
                except Exception as e:
                    print(f"Exception occured for ({provider}, {model}): {e}")
                    print(traceback.format_exc())
                    continue
    return results

def process_results(results, save=False, dir_path="/content/drive/MyDrive/Colab Notebooks/data/llm_results"):
    results_dfs = []
    if save and not os.path.exists(dir_path):
        os.makedirs(dir_path)
    for model, result in results.items():
        df = pd.DataFrame(result)
        results_dfs.append(df)
        if save:
            df.to_csv(f"{dir_path}/{model}.csv", index=False)
    return results_dfs

# INFERENCE

In [None]:
# Add the models to test to this dictionary and comment the ones you don't want to test
llms_to_test = {
    "gemini": [
        # "gemini-2.0-flash",
        "gemini-2.0-flash-lite"
    ],
    # "anthropic": ["claude-3-7-sonnet-20250219"],
    # "groq": [
    #     "qwen-2.5-32b",
    #     "llama-3.3-70b-versatile",
    # ]
}

In [None]:
results = test_llms(df, llms_to_test)


In [None]:
dir_path="/content/drive/MyDrive/Colab Notebooks/data/llm_analysis/expt1"
results_dfs = process_results(results=results, save=True, dir_path=dir_path)

In [None]:
results_dfs[0]

Unnamed: 0,provider,model,time,valid_answer,answer,source_of_information,total_tokens,question,context,history
0,gemini,gemini-2.0-flash-lite,1.1076,True,The overall deductible for the Gold 80 HMO pla...,[Gold 80 HMO.pdf],3838,What is the overall deductible for Gold 80 HMO?,1. Source of information:Gold 80 HMO.pdf \n Bo...,
1,gemini,gemini-2.0-flash-lite,1.0693,True,The overall deductible for Platinum 90 HMO is ...,[Platinum 90 HMO.pdf],3919,What is the overall deductible for Platinum 90...,1. Source of information:Platinum 90 HMO.pdf \...,
2,gemini,gemini-2.0-flash-lite,0.6192,False,,[],4577,What is my overall deductible?,1. Source of information:Bronze 60 HMO.pdf \n ...,
3,gemini,gemini-2.0-flash-lite,1.179,True,<em>Cosmetic surgery</em> is generally not cov...,"[IEHP DualChoice Member Handbook.pdf, IEHP Dua...",3910,Is cosmetic surgery included in my plan?,1. Source of information:IEHP Covered HMO Plan...,
4,gemini,gemini-2.0-flash-lite,1.5246,True,"<p>If you are pregnant, the childbirth/deliver...","[Platinum 90 HMO.pdf, Platinum 90 HMO AI AN.pdf]",4806,How much do I need to pay for child birth unde...,1. Source of information:Platinum 90 HMO.pdf \...,
5,gemini,gemini-2.0-flash-lite,1.4343,True,You will pay $8 copayment/visit for an X-ray i...,[Silver 94 HMO.pdf],4773,How much do I need to pay to get an X-ray in S...,1. Source of information:Silver 94 HMO.pdf \n ...,
6,gemini,gemini-2.0-flash-lite,3.1194,True,You can access your health information through...,"[IEHP DualChoice Member Handbook.pdf, IEHP Dua...",3883,How can I access my health information?,1. Source of information:IEHP DualChoice Membe...,
7,gemini,gemini-2.0-flash-lite,0.8526,True,You can fax [LIBERTY] at (877) 831-6019.,[Liberty Dental Plan.pdf],3779,What's your fax number?,1. Source of information:Liberty Dental Plan.p...,
8,gemini,gemini-2.0-flash-lite,2.2693,True,Ciertos servicios dentales están disponibles a...,"[IEHP DualChoice Member Handbook.pdf, IEHP Dua...",3646,Cuentame sobre el Programa Dental de Medi-Cal,1. Source of information:IEHP DualChoice Membe...,


In [None]:
results