<a href="https://colab.research.google.com/github/Sai-Keerthi338/Lie_detector_for_AI/blob/main/Lie_detection_core_logic.ipynb" target="_parent"><img src="https://colab.research.google.com/assets/colab-badge.svg" alt="Open In Colab"/></a>

In [None]:
# Import required libraries after installing libraries locally
from sentence_transformers import SentenceTransformer, util
import torch
import faiss
import pdfplumber
import os
import re
import numpy as np

# Load embedding model
embed_model = SentenceTransformer('all-MiniLM-L6-v2')

# File paths for storage
embeddings_file = "trusted_embeddings.pt"
texts_file = "trusted_chunks.pt"

# Load previous embeddings if they exist
if os.path.exists(embeddings_file) and os.path.exists(texts_file):
    trusted_embeddings = torch.load(embeddings_file)
    trusted_chunks = torch.load(texts_file)
else:
    trusted_embeddings = torch.tensor([])
    trusted_chunks = []

# Build FAISS index (after embeddings are loaded)
def build_faiss_index():
    global index
    if trusted_embeddings.nelement() == 0:
        index = None
        print(" No embeddings to index.")
        return
    dim = trusted_embeddings.shape[1]
    index = faiss.IndexFlatL2(dim)
    index.add(trusted_embeddings.cpu().numpy())
    print(f" FAISS index built with {len(trusted_chunks)} chunks.")

# Extract text from PDF
def extract_text_from_pdf(pdf_path):
    text = ""
    with pdfplumber.open(pdf_path) as pdf:
        for page in pdf.pages:
            page_text = page.extract_text()
            if page_text:
                text += page_text + "\n"
    return text

# Chunk text into ~100-word blocks
def chunk_text(text, max_words=100):
    sentences = re.split(r'\. |\n', text)
    chunks, chunk = [], []
    for s in sentences:
        if len(" ".join(chunk).split()) + len(s.split()) <= max_words:
            chunk.append(s)
        else:
            chunks.append(" ".join(chunk))
            chunk = [s]
    if chunk:
        chunks.append(" ".join(chunk))
    return chunks

# Add a new trusted PDF to the system
def add_pdf_to_index(pdf_path):
    global trusted_chunks, trusted_embeddings
    text = extract_text_from_pdf(pdf_path)
    new_chunks = chunk_text(text)
    existing_set = set(c.strip().lower() for c in trusted_chunks)
    new_chunks = [c for c in new_chunks if c.strip().lower() not in existing_set]

    if new_chunks:
        new_embeddings = embed_model.encode(new_chunks, convert_to_tensor=True)
        if trusted_embeddings.nelement() == 0:
            trusted_embeddings = new_embeddings
        else:
            trusted_embeddings = torch.cat((trusted_embeddings, new_embeddings), dim=0)
        trusted_chunks.extend(new_chunks)

        # Save updated data
        torch.save(trusted_embeddings, embeddings_file)
        torch.save(trusted_chunks, texts_file)

        build_faiss_index()
        print(f" Added {len(new_chunks)} new chunks from {pdf_path}")
    else:
        print(" No new chunks to add.")

#  Check similarity and flag hallucination
def similarity_score(answer, k=1):
    if index is None:
        print(" Index not built.")
        return 0
    answer_emb = embed_model.encode(answer)
    D, _ = index.search(np.array([answer_emb]), k)
    score = 1 - D[0][0] / 2  # Convert L2 distance to similarity
    print(f" Similarity Score: {score:.3f}")
    return score

def detect_by_similarity(answer, threshold=0.6):
    sim = similarity_score(answer)
    return sim < threshold  # True means it's likely a hallucination

#  Test an AI answer
def test_answer(answer):
    print(f"\n AI Answer: {answer}")
    if detect_by_similarity(answer):
        print(" Flag: This might be a hallucination.")
    else:
        print(" Passed: Looks trustworthy.")
#add trusted files
add_pdf_to_index("file_path")
#give ai answer as input
test_answer("ai_answer")
