In [1]:
import yaml
import logging

import pandas as pd 

from LM import LLM,JudgeLLM
from utils import load_environment,dump_in_jsonl

load_environment()

In [2]:
logging.basicConfig(
    level=logging.INFO,
    format='%(asctime)s - %(name)s - %(levelname)s - %(message)s'
)

logger = logging.getLogger(__name__)

In [3]:
with open("models.yaml","r") as f:
    models = yaml.safe_load(f)

test_models = models['TEST_MODELS']
judge_models = models['JUDGE_MODELS']

In [4]:
LIMIT_SAMPLE_SIZE = 1

df = pd.read_csv("../data/examples.csv")
df = df[:LIMIT_SAMPLE_SIZE]

evaluation_prompts_file_path = "../data/evaluation_prompts.csv"

In [5]:
model_instance = LLM(test_models[0])
test_model_output = model_instance.call_llm(
    df['full_prompt'].iloc[0],intention="test",sample_id = 0
)

test_model_output

INFO:httpx:HTTP Request: POST https://api.openai.com/v1/chat/completions "HTTP/1.1 200 OK"
INFO:LM:Successfully called : openai:gpt-3.5-turbo | Intention : test.


{'sample_id': 0,
 'test model': 'openai:gpt-3.5-turbo',
 'Encountered Problems': False,
 'test model response': 'Based on the information provided in the context document, your risk factors for dementia include high blood pressure, diabetes, obesity, lack of physical activity, poor diet, high alcohol consumption, low cognitive engagement, depression, traumatic brain injury, hearing loss, and social isolation. \n\nCognitive engagement is important because it is thought to support the development of a "cognitive reserve," which may protect against brain cell damage caused by dementia. Engaging in activities that challenge and stimulate the brain throughout life may help reduce the risk of developing dementia.'}

In [6]:
dump_in_jsonl(test_model_output,"test_responses.jsonl")


# Judge 

In [7]:
judges = JudgeLLM(
     judge_models
)

judges

<LM.JudgeLLM at 0x121942f90>

In [8]:
output = judges.calling_judges(
    user_request= df['user_request'].iloc[0],
    context_document= df['context_document'].iloc[0],
    test_model_response= test_model_output['test model response'],
    test_model= test_model_output['test model'],
    sample_id= 0, 
    evaluation_prompt_file_path=evaluation_prompts_file_path,
)

INFO:httpx:HTTP Request: POST https://api.anthropic.com/v1/messages "HTTP/1.1 200 OK"
INFO:LM:Successfully called : anthropic:claude-3-5-sonnet-20240620 | Intention : judge.
INFO:LM:Accurate
INFO:LM:Successfully called : google_vertexai:gemini-1.5-pro | Intention : judge.
INFO:LM:Accurate


In [9]:
output

[{'judged_model': 'openai:gpt-3.5-turbo',
  'sample_id': 0,
  'judge model': 'anthropic:claude-3-5-sonnet-20240620',
  'Encountered Problems': False,
  'judge model response': 'Sentence 1: Based on the information provided in the context document, your risk factors for dementia include high blood pressure, diabetes, obesity, lack of physical activity, poor diet, high alcohol consumption, low cognitive engagement, depression, traumatic brain injury, hearing loss, and social isolation.\nSentence 1 label: Accurate\n\nSentence 2: Cognitive engagement is important because it is thought to support the development of a "cognitive reserve," which may protect against brain cell damage caused by dementia.\nSentence 2 label: Accurate\n\nSentence 3: Engaging in activities that challenge and stimulate the brain throughout life may help reduce the risk of developing dementia.\nSentence 3 label: Accurate\n\nFinal Answer: Accurate',
  'verdict': 'Accurate'},
 {'judged_model': 'openai:gpt-3.5-turbo',
 