In [1]:
from sentence_transformers import SentenceTransformer
from qdrant_client import QdrantClient
from qdrant_client.http.models import PointStruct
import uuid

In [2]:
client = QdrantClient("http://localhost:6333")
model = SentenceTransformer("all-MiniLM-L6-v2")

In [3]:
def insert_doc(doc_id, text, metadata):
    vector = model.encode(text).tolist()
    client.upsert(
        collection_name="documents",
        points=[{
            "id": str(uuid.uuid4()),
            "vector": vector,
            "payload": {**metadata, "text": text}
        }]
    )

In [4]:
def search_context(query, top_k=3):
    vector = model.encode(query).tolist()
    return client.search("documents", query_vector=vector, limit=top_k)


In [None]:
# insert_doc("doc1", "Tesla stated in Q2 2023 that margins were under pressure.", {"source": "10Q_Q2_2023"})

In [None]:
import json

with open("./parsed_script.json", "r") as file:
    movie_dialogues = json.load(file)

#print(movie_dialogues)


In [6]:
from tqdm import tqdm

for row in tqdm(movie_dialogues, desc="Inserting documents"):
    insert_doc(row["doc_id"], row["text"], {"source": row["source"]})


Inserting documents: 100%|███████████████████████████████████████████████████████████| 155/155 [00:11<00:00, 13.64it/s]


In [7]:
def build_prompt(query, contexts):
    context_text = "\n".join(f"{c.payload['text']} [source: {c.payload['source']}]" for c in contexts)
    return f"""
System: Use only the facts below to answer. If unsure, say 'I don't know.'
Facts:
{context_text}
User: {query}
"""


In [8]:
from openai import OpenAI

openai_client = OpenAI(api_key="you api key")
# add comment for key
def query_llm(prompt):
    response = openai_client.chat.completions.create(
        model="gpt-3.5-turbo",
        messages=[{"role": "user", "content": prompt}],
        temperature=0.3,
        max_tokens=256
    )
    return response.choices[0].message.content


In [9]:
llm_input = "What did Tesla say about Q2?"
contexts = search_context(llm_input)
prompt = build_prompt(llm_input, contexts) # this is a RAG query
# prompt = built_prompt(llm_input, []) # this is a non RAG query
print(query_llm(prompt))


  return client.search("documents", query_vector=vector, limit=top_k)


Tesla stated in Q2 2023 that margins were under pressure.


In [None]:
# print(query_llm("Tell me a good joke"))

In [10]:
from transformers import pipeline

nli = pipeline("text-classification", model="microsoft/deberta-large-mnli")

def verify_claim(claim, context):
    result = nli(f"{claim} </s> {context}")
    return result[0]['label']  # Should be 'ENTAILMENT' if valid


Some weights of the model checkpoint at microsoft/deberta-large-mnli were not used when initializing DebertaForSequenceClassification: ['config']
- This IS expected if you are initializing DebertaForSequenceClassification from the checkpoint of a model trained on another task or with another architecture (e.g. initializing a BertForSequenceClassification model from a BertForPreTraining model).
- This IS NOT expected if you are initializing DebertaForSequenceClassification from the checkpoint of a model that you expect to be exactly identical (initializing a BertForSequenceClassification model from a BertForSequenceClassification model).
Device set to use cpu


In [11]:
claim = "Tesla promised 30% margin in Q2"
context = "Tesla said margins were under pressure in Q2."
print(verify_claim(claim, context))  # EXPECTED: 'CONTRADICTION' or 'NEUTRAL'


CONTRADICTION


In [36]:
def score_response(claim, contexts):
    verify_result = [verify_claim(claim, c.payload["text"]) for c in contexts]
    support = sum(1 for r in verify_result if r == "ENTAILMENT")
    contradiction = sum(1 for r in verify_result if r == "CONTRADICTION")
    return {
        "supported": support,
        "contradicted": contradiction,
        "hallucination_score": contradiction / (support + 1)
    }


In [13]:
def fallback_handler(score_data):
    if score_data['hallucination_score'] > 0.5:
        return "⚠️ Potential hallucination. Would you like to rephrase?"
    return "✅ Looks good."


In [14]:
def tag_response(response, contexts):
    return {
        "answer": response,
        "sources": [c.payload["source"] for c in contexts],
        "model_version": "gpt-3.5-turbo",
        "verified": True
    }


In [15]:
prompt = "Who played finger football with Tony Stark?"
contexts_list = search_context(prompt) 

  return client.search("documents", query_vector=vector, limit=top_k)


In [16]:

gpt_claim = query_llm(prompt)


In [17]:
score = score_response(gpt_claim, contexts_list)
print(score)
fallback_handler(score)

{'supported': 0, 'contradicted': 3, 'hallucination_score': 3.0}


'⚠️ Potential hallucination. Would you like to rephrase?'

In [38]:
def truthnet_query(user_input):
    contexts = search_context(user_input) # create another version for non RAG, comment this line 
    prompt = build_prompt(user_input, contexts) # check for hallucination, replace contexts with [] 
    answer = query_llm(prompt)
    score = score_response(answer, contexts)
    trace = tag_response(answer, contexts)
    fallback_msg = fallback_handler(score)
    return {
        "answer": answer,
        "score": score,
        "trace": trace,
        "fallback": fallback_msg
    }


In [24]:
import json

with open("questions.json", "r") as file:
    questions = json.load(file)
print(type(questions))
print(questions)

<class 'list'>
[{'question_1': 'What game are Tony Stark and Nebula playing aboard the Benatar?'}, {'question_2': 'What advice does Clint Barton give Lila about using three fingers while shooting an arrow?'}, {'question_3': 'What does Clint Barton say just before Lila disappears?'}, {'question_4': 'What does Tony Stark record in his helmet for Pepper while stranded in space?'}, {'question_5': 'What line does Tony say after finishing his recording and preparing to drift off?'}, {'question_6': 'How does Tony react when he sees Rocket for the first time?'}, {'question_7': "What is Thanos' answer when Nebula asks what he would do after completing his plan?"}, {'question_8': 'Who suggests using the Infinity Stones to bring everyone back?'}, {'question_9': 'What does Thor say after beheading Thanos?'}, {'question_10': "What does Natasha say when asked how she's handling an underwater earthquake?"}, {'question_11': 'What does Steve Rogers say is still out there, despite the state of the world

In [None]:
results = {}

for i in tqdm(range(len(questions))):
    row = questions[int(i)]  # force i to be plain int
    key = f"question_{int(i) + 1}"  # readable key name
    question = row[key]
    #print(f"processing {key}")
    results[key] = truthnet_query(question)
    #break

  return client.search("documents", query_vector=vector, limit=top_k)
 32%|██████████████████████████▌                                                        | 8/25 [05:55<16:54, 59.66s/it]

In [None]:
# save results
with open("results.json", "w") as f:
    json.dump(results, f, indent=2)