In [1]:
import os

from dotenv import load_dotenv
from posthog.ai.anthropic import Anthropic

# Load environment variables from openai.env file
load_dotenv("../openai.env")

# Read the OPENAI_API_KEY from the environment
api_key = os.getenv("OPENAI_API_KEY")
serpapi_key = os.getenv("SERPAPI_API_KEY")
deepseek_api_key = os.getenv("DEEPSEEK_API_KEY")
Anthropic_api_key = os.getenv("ANTHROPIC_API_KEY")

os.environ["OPENAI_API_KEY"] = api_key
os.environ["SERPAPI_API_KEY"] = serpapi_key
os.environ["DEEPSEEK_API_KEY"] = deepseek_api_key
os.environ["ANTHROPIC_API_KEY"] = Anthropic_api_key

In [2]:
import json
import random
from langchain.chat_models import ChatOpenAI
from langchain.schema import SystemMessage, HumanMessage
from langchain_anthropic import ChatAnthropic

In [3]:
# write jsonl
def dump_jsonl(obj, path, append=True):
    mode = "w"
    with open(path, mode, encoding="utf-8") as fw:
        fw.write(json.dumps(obj, ensure_ascii=False) + "\n")

In [4]:
# load dataset
filepath = '../data/qa_data.json'
data = []
with open(filepath, 'r', encoding='utf-8') as f:
    for line in f:
        line = line.strip()
        if not line:
            continue
        obj = json.loads(line)
        data.append(obj)

data

[{'knowledge': "Arthur's Magazine (1844–1846) was an American literary periodical published in Philadelphia in the 19th century.First for Women is a woman's magazine published by Bauer Media Group in the USA.",
  'question': "Which magazine was started first Arthur's Magazine or First for Women?",
  'right_answer': "Arthur's Magazine",
  'hallucinated_answer': 'First for Women was started first.'},
 {'knowledge': 'The Oberoi family is an Indian family that is famous for its involvement in hotels, namely through The Oberoi Group.The Oberoi Group is a hotel company with its head office in Delhi.',
  'question': 'The Oberoi family is part of a hotel company that has a head office in what city?',
  'right_answer': 'Delhi',
  'hallucinated_answer': "The Oberoi family's hotel company is based in Mumbai."},
 {'knowledge': 'Allison Beth "Allie" Goertz (born March 2, 1991) is an American musician. Goertz is known for her satirical songs based on various pop culture topics. Her videos are posted

In [5]:
#call different models
def evaluate_single_model(model_name, test_data,output_path):
    """
    Perform hallucination detection on a single sample (directly call ChatOpenAI without going through the Agent)
    Write the result to output_path. Return accuracy.
    """
    if "claude" in model_name:
        chatmodel = ChatAnthropic(
            model="claude-3-haiku-20240307",
            temperature=0,
            max_tokens=1024,
            timeout=None,
            max_retries=2,
        )

    elif "gpt" in model_name:
        api_key = os.getenv("OPENAI_API_KEY")
        chatmodel = ChatOpenAI(
            model=model_name,
            temperature=0,
            streaming=False,
            openai_api_key=api_key
        )

    SYSTEM_PROMPT = """
    You are a hallucination detector. Given a question and an answer, your objective is to determine if the provided answer contains non-factual or hallucinated information. You SHOULD give your judgement based on the following hallucination types and the world knowledge.

    Based on the provided knowledge, question, and answer, determine whether the answer contains any non-factual or hallucinated information.

    **REQUIREMENTS:**
    1. Summarize the answer. If it contains non-factual information, return **No**
    2. Summarize the answer. If it is factual information, return **Yes**
    3. You must return your final judgement in the format of "**Yes**" or "**No**".
    """

    correct = 0
    incorrect = 0

    for i, sample in enumerate(test_data):
        knowledge = sample["knowledge"]
        question = sample["question"]

        if random.random() > 0.5:
            answer = sample["hallucinated_answer"]
            ground_truth = "No"
        else:
            answer = sample["right_answer"]
            ground_truth = "Yes"

        human_content = (
            f"#Question#: {question}\n"
            f"#Answer#: {answer}\n"
            f"#Your Judgement#:"
        )
        messages = [
            SystemMessage(content=SYSTEM_PROMPT),
            HumanMessage(content=human_content),
        ]

        response = chatmodel(messages)
        text = response.content.strip()

        if "Yes" in text and "No" not in text:
            pred = "Yes"
        else:
            pred = "No"

        if pred == ground_truth:
            correct += 1
        else:
            incorrect += 1

        result = {
            "knowledge": knowledge,
            "question": question,
            "answer": answer,
            "ground_truth": ground_truth,
            "prediction": pred,
        }
        dump_jsonl(result, output_path, append=True)
        print(f"[{i+1}/{len(test_data)}], {knowledge}, {question}, {answer} gt={ground_truth} pred={pred}")

    total = len(test_data)
    accuracy = correct / total if total else 0.0
    print(f"\nTotal: {total}, Correct: {correct}, Incorrect: {incorrect}, Accuracy: {accuracy:.4f}")
    return accuracy

In [6]:
random.seed(12)
sampled_data = random.sample(data, k=200)

In [7]:
sampled_data

[{'knowledge': 'Mark Pulemau Tuinei (March 31, 1960 – May 6, 1999) was an American football offensive tackle in the National Football League for the Dallas Cowboys. Known as a "gentle giant", his career lasted for 15 years (1983–1997) and his ability to protect quarterback Troy Aikman and to run-block for running back Emmitt Smith helped them win Super Bowls in 1992, 1993, and 1995 and the NFC East Division in 1985 and 1992-96.Troy Kenneth Aikman (born November 21, 1966) is a former American football quarterback who played for the Dallas Cowboys in the National Football League (NFL).',
  'question': 'What is the birthdate of this former American football quarterback who played for the Dallas Cowboys and who was protected by his offensive tackle Mark Tuinei?',
  'right_answer': 'November 21, 1966',
  'hallucinated_answer': 'Tuinei and Aikman were teammates.'},
 {'knowledge': ' He attended Merrimack High School in Merrimack, New Hampshire.Merrimack High School (MHS) is the public seconda

In [9]:
# test
evaluate_single_model("gpt-3.5-turbo", sampled_data, "../outputs/gpt_output_200_1.json")

  chatmodel = ChatOpenAI(
  response = chatmodel(messages)


[1/200], Mark Pulemau Tuinei (March 31, 1960 – May 6, 1999) was an American football offensive tackle in the National Football League for the Dallas Cowboys. Known as a "gentle giant", his career lasted for 15 years (1983–1997) and his ability to protect quarterback Troy Aikman and to run-block for running back Emmitt Smith helped them win Super Bowls in 1992, 1993, and 1995 and the NFC East Division in 1985 and 1992-96.Troy Kenneth Aikman (born November 21, 1966) is a former American football quarterback who played for the Dallas Cowboys in the National Football League (NFL)., What is the birthdate of this former American football quarterback who played for the Dallas Cowboys and who was protected by his offensive tackle Mark Tuinei?, November 21, 1966 gt=Yes pred=Yes
[2/200],  He attended Merrimack High School in Merrimack, New Hampshire.Merrimack High School (MHS) is the public secondary school of the town of Merrimack, New Hampshire. About 1,400 students from grades 9 through 12 at

0.645

In [8]:
evaluate_single_model("gpt-3.5-turbo", sampled_data, "../outputs/gpt_output_200_2.json")

  chatmodel = ChatOpenAI(
  response = chatmodel(messages)


[1/200], Mark Pulemau Tuinei (March 31, 1960 – May 6, 1999) was an American football offensive tackle in the National Football League for the Dallas Cowboys. Known as a "gentle giant", his career lasted for 15 years (1983–1997) and his ability to protect quarterback Troy Aikman and to run-block for running back Emmitt Smith helped them win Super Bowls in 1992, 1993, and 1995 and the NFC East Division in 1985 and 1992-96.Troy Kenneth Aikman (born November 21, 1966) is a former American football quarterback who played for the Dallas Cowboys in the National Football League (NFL)., What is the birthdate of this former American football quarterback who played for the Dallas Cowboys and who was protected by his offensive tackle Mark Tuinei?, November 21, 1966 gt=Yes pred=Yes
[2/200],  He attended Merrimack High School in Merrimack, New Hampshire.Merrimack High School (MHS) is the public secondary school of the town of Merrimack, New Hampshire. About 1,400 students from grades 9 through 12 at

0.625

In [9]:
evaluate_single_model("gpt-3.5-turbo", sampled_data, "../outputs/gpt_output_200_3.json")

[1/200], Mark Pulemau Tuinei (March 31, 1960 – May 6, 1999) was an American football offensive tackle in the National Football League for the Dallas Cowboys. Known as a "gentle giant", his career lasted for 15 years (1983–1997) and his ability to protect quarterback Troy Aikman and to run-block for running back Emmitt Smith helped them win Super Bowls in 1992, 1993, and 1995 and the NFC East Division in 1985 and 1992-96.Troy Kenneth Aikman (born November 21, 1966) is a former American football quarterback who played for the Dallas Cowboys in the National Football League (NFL)., What is the birthdate of this former American football quarterback who played for the Dallas Cowboys and who was protected by his offensive tackle Mark Tuinei?, Tuinei and Aikman were teammates. gt=No pred=No
[2/200],  He attended Merrimack High School in Merrimack, New Hampshire.Merrimack High School (MHS) is the public secondary school of the town of Merrimack, New Hampshire. About 1,400 students from grades 9

0.61

In [10]:
evaluate_single_model("gpt-3.5-turbo", sampled_data, "../outputs/gpt_output_200_4.json")

[1/200], Mark Pulemau Tuinei (March 31, 1960 – May 6, 1999) was an American football offensive tackle in the National Football League for the Dallas Cowboys. Known as a "gentle giant", his career lasted for 15 years (1983–1997) and his ability to protect quarterback Troy Aikman and to run-block for running back Emmitt Smith helped them win Super Bowls in 1992, 1993, and 1995 and the NFC East Division in 1985 and 1992-96.Troy Kenneth Aikman (born November 21, 1966) is a former American football quarterback who played for the Dallas Cowboys in the National Football League (NFL)., What is the birthdate of this former American football quarterback who played for the Dallas Cowboys and who was protected by his offensive tackle Mark Tuinei?, Tuinei and Aikman were teammates. gt=No pred=No
[2/200],  He attended Merrimack High School in Merrimack, New Hampshire.Merrimack High School (MHS) is the public secondary school of the town of Merrimack, New Hampshire. About 1,400 students from grades 9

0.63

In [11]:
evaluate_single_model("gpt-3.5-turbo", sampled_data, "../outputs/gpt_output_200_5.json")

[1/200], Mark Pulemau Tuinei (March 31, 1960 – May 6, 1999) was an American football offensive tackle in the National Football League for the Dallas Cowboys. Known as a "gentle giant", his career lasted for 15 years (1983–1997) and his ability to protect quarterback Troy Aikman and to run-block for running back Emmitt Smith helped them win Super Bowls in 1992, 1993, and 1995 and the NFC East Division in 1985 and 1992-96.Troy Kenneth Aikman (born November 21, 1966) is a former American football quarterback who played for the Dallas Cowboys in the National Football League (NFL)., What is the birthdate of this former American football quarterback who played for the Dallas Cowboys and who was protected by his offensive tackle Mark Tuinei?, November 21, 1966 gt=Yes pred=Yes
[2/200],  He attended Merrimack High School in Merrimack, New Hampshire.Merrimack High School (MHS) is the public secondary school of the town of Merrimack, New Hampshire. About 1,400 students from grades 9 through 12 at

0.625

In [12]:
evaluate_single_model("claude-3-haiku-20240307", sampled_data, output_path="../outputs/claude3_output_200_1.json")

[1/200], Mark Pulemau Tuinei (March 31, 1960 – May 6, 1999) was an American football offensive tackle in the National Football League for the Dallas Cowboys. Known as a "gentle giant", his career lasted for 15 years (1983–1997) and his ability to protect quarterback Troy Aikman and to run-block for running back Emmitt Smith helped them win Super Bowls in 1992, 1993, and 1995 and the NFC East Division in 1985 and 1992-96.Troy Kenneth Aikman (born November 21, 1966) is a former American football quarterback who played for the Dallas Cowboys in the National Football League (NFL)., What is the birthdate of this former American football quarterback who played for the Dallas Cowboys and who was protected by his offensive tackle Mark Tuinei?, November 21, 1966 gt=Yes pred=No
[2/200],  He attended Merrimack High School in Merrimack, New Hampshire.Merrimack High School (MHS) is the public secondary school of the town of Merrimack, New Hampshire. About 1,400 students from grades 9 through 12 att

0.65

In [13]:
evaluate_single_model("claude-3-haiku-20240307", sampled_data, output_path="../outputs/claude3_output_200_2.json")

[1/200], Mark Pulemau Tuinei (March 31, 1960 – May 6, 1999) was an American football offensive tackle in the National Football League for the Dallas Cowboys. Known as a "gentle giant", his career lasted for 15 years (1983–1997) and his ability to protect quarterback Troy Aikman and to run-block for running back Emmitt Smith helped them win Super Bowls in 1992, 1993, and 1995 and the NFC East Division in 1985 and 1992-96.Troy Kenneth Aikman (born November 21, 1966) is a former American football quarterback who played for the Dallas Cowboys in the National Football League (NFL)., What is the birthdate of this former American football quarterback who played for the Dallas Cowboys and who was protected by his offensive tackle Mark Tuinei?, Tuinei and Aikman were teammates. gt=No pred=No
[2/200],  He attended Merrimack High School in Merrimack, New Hampshire.Merrimack High School (MHS) is the public secondary school of the town of Merrimack, New Hampshire. About 1,400 students from grades 9

0.6

In [14]:
evaluate_single_model("claude-3-haiku-20240307", sampled_data, output_path="../outputs/claude3_output_200_3.json")

[1/200], Mark Pulemau Tuinei (March 31, 1960 – May 6, 1999) was an American football offensive tackle in the National Football League for the Dallas Cowboys. Known as a "gentle giant", his career lasted for 15 years (1983–1997) and his ability to protect quarterback Troy Aikman and to run-block for running back Emmitt Smith helped them win Super Bowls in 1992, 1993, and 1995 and the NFC East Division in 1985 and 1992-96.Troy Kenneth Aikman (born November 21, 1966) is a former American football quarterback who played for the Dallas Cowboys in the National Football League (NFL)., What is the birthdate of this former American football quarterback who played for the Dallas Cowboys and who was protected by his offensive tackle Mark Tuinei?, Tuinei and Aikman were teammates. gt=No pred=No
[2/200],  He attended Merrimack High School in Merrimack, New Hampshire.Merrimack High School (MHS) is the public secondary school of the town of Merrimack, New Hampshire. About 1,400 students from grades 9

0.635

In [15]:
evaluate_single_model("claude-3-haiku-20240307", sampled_data, output_path="../outputs/claude3_output_200_4.json")

[1/200], Mark Pulemau Tuinei (March 31, 1960 – May 6, 1999) was an American football offensive tackle in the National Football League for the Dallas Cowboys. Known as a "gentle giant", his career lasted for 15 years (1983–1997) and his ability to protect quarterback Troy Aikman and to run-block for running back Emmitt Smith helped them win Super Bowls in 1992, 1993, and 1995 and the NFC East Division in 1985 and 1992-96.Troy Kenneth Aikman (born November 21, 1966) is a former American football quarterback who played for the Dallas Cowboys in the National Football League (NFL)., What is the birthdate of this former American football quarterback who played for the Dallas Cowboys and who was protected by his offensive tackle Mark Tuinei?, November 21, 1966 gt=Yes pred=No
[2/200],  He attended Merrimack High School in Merrimack, New Hampshire.Merrimack High School (MHS) is the public secondary school of the town of Merrimack, New Hampshire. About 1,400 students from grades 9 through 12 att

0.615

In [16]:
evaluate_single_model("claude-3-haiku-20240307", sampled_data, output_path="../outputs/claude3_output_200_5.json")

[1/200], Mark Pulemau Tuinei (March 31, 1960 – May 6, 1999) was an American football offensive tackle in the National Football League for the Dallas Cowboys. Known as a "gentle giant", his career lasted for 15 years (1983–1997) and his ability to protect quarterback Troy Aikman and to run-block for running back Emmitt Smith helped them win Super Bowls in 1992, 1993, and 1995 and the NFC East Division in 1985 and 1992-96.Troy Kenneth Aikman (born November 21, 1966) is a former American football quarterback who played for the Dallas Cowboys in the National Football League (NFL)., What is the birthdate of this former American football quarterback who played for the Dallas Cowboys and who was protected by his offensive tackle Mark Tuinei?, November 21, 1966 gt=Yes pred=No
[2/200],  He attended Merrimack High School in Merrimack, New Hampshire.Merrimack High School (MHS) is the public secondary school of the town of Merrimack, New Hampshire. About 1,400 students from grades 9 through 12 att

0.59

In [18]:
# avg accuracy
gpt_acc = (0.645 + 0.625 + 0.61 + 0.63 + 0.625) / 5
claude_acc = (0.65 + 0.6 + 0.635 + 0.615 + 0.59) / 5
print(gpt_acc)
print(claude_acc)

0.627
0.618
