## Setup

In [None]:
from tqdm import tqdm
import pandas as pd

import os 

from dotenv import load_dotenv

load_dotenv()

from llama_index.core import Settings, load_index_from_storage, StorageContext 

from llama_index.embeddings.openai import OpenAIEmbedding

from llama_index.llms.openai import OpenAI
from llama_index.core.llms import ChatMessage

In [None]:
storage_context = StorageContext.from_defaults(persist_dir="./data/indices")
# load index
vsi = load_index_from_storage(storage_context, index_id="openai_large_1224")

In [None]:
Settings.embed_model = OpenAIEmbedding(model="text-embedding-3-large")

In [None]:
retr = vsi.as_retriever()

In [None]:
# load questions from txt file (the txt file one question per line)
STROKE_FRAGEN_PATH = 'xxx'
CAROTIS_FRAGEN_PATH = 'xxx'

with open(STROKE_FRAGEN_PATH, 'r') as file:
    stroke_fragen = file.readlines()
    
stroke_fragen = [f.strip() for f in stroke_fragen]

with open(CAROTIS_FRAGEN_PATH, 'r') as file: 
    carotis_fragen = file.readlines()
    
carotis_fragen = [f.strip() for f in carotis_fragen]

fragen = stroke_fragen + carotis_fragen

In [None]:
def evaluate_questions(questions: list[str], model):
    query_engine = vsi.as_query_engine(llm=model, embed_model=OpenAIEmbedding(model="text-embedding-3-large"))
    results = [query_engine.query(q).response for q in tqdm(questions)]
    return results

In [None]:
df = pd.DataFrame()
df["frage"] = fragen

## GPT 4o mini (no RAG)

In [None]:
llm = OpenAI(model="gpt-4o-mini", temperature=0)

In [None]:
results = []

for f in tqdm(fragen):
    messages = [
        ChatMessage(role="user", content=f),
    ]

    response = llm.chat(messages)
    answer = response.message.content
    results.append(answer)
    
df["gpt-4o-mini-no-rag"] = results

## GPT 4o mini (RAG)

In [None]:
llm = OpenAI(model="gpt-4o-mini", temperature=0)

results = evaluate_questions(fragen, llm)

df["gpt-4o-mini"] = results

## Llama 3.1 405B

In [None]:
from llama_index.llms.together import TogetherLLM

In [None]:
llm = TogetherLLM(model="meta-llama/Meta-Llama-3.1-405B-Instruct-Turbo", api_key = os.getenv("TOGETHER_API_KEY"), temperature=0)

results = evaluate_questions(fragen, llm)

df["llama3.1_405B"] = results

## Mixtral

In [None]:
llm = TogetherLLM(model="mistralai/Mixtral-8x22B-Instruct-v0.1", api_key = os.getenv("TOGETHER_API_KEY"), temperature=0)

results = evaluate_questions(fragen, llm)

df["mixtral"] = results

## Claude 3.5 Sonnet

In [None]:
from llama_index.llms.anthropic import Anthropic

In [None]:
llm = Anthropic(model="claude-3-5-sonnet-20240620", temperature=0)

results = evaluate_questions(fragen, llm)

df["claude_3_5_sonnet"] = results

In [None]:
df.to_csv("xxx", index=True)