In [None]:
!pip install langchain-google-genai langchain langchain-community faiss-cpu pypdf
!pip install langchain-classic
!pip install -U langchain-huggingface
!pip install google-generativeai 
!pip install langchain-community sentence-transformers
from langchain_huggingface import HuggingFaceEmbeddings
from langchain_google_genai import GoogleGenerativeAIEmbeddings, ChatGoogleGenerativeAI
from langchain_community.document_loaders import PyPDFLoader
from langchain_text_splitters import RecursiveCharacterTextSplitter
from langchain_community.vectorstores import FAISS
from langchain_classic.chains.retrieval_qa.base import RetrievalQA
import getpass

import os
os.environ["GOOGLE_API_KEY"] = getpass.getpass("Enter your API key:")
print("API key set succesfully")

from langchain_community.document_loaders import PyPDFLoader
pdf_path = r"C:\Users\LENOVO\Downloads\HealthCareSectorinindia-AnOverview.pdf"
loader = PyPDFLoader(pdf_path)
documents = loader.load()

from langchain_text_splitters import RecursiveCharacterTextSplitter

text_splitter = RecursiveCharacterTextSplitter(
    chunk_size = 1000,
    chunk_overlap = 150
)
chunks = text_splitter.split_documents(documents)

!pip install hf_xet
embeddings = HuggingFaceEmbeddings(
    model_name="sentence-transformers/all-MiniLM-L6-v2"
)

from langchain_community.vectorstores import FAISS
vectorstore = FAISS.from_documents(chunks,embeddings)
retriever = vectorstore.as_retriever(search_kwargs={"k":3})

from langchain_classic.memory import ConversationBufferMemory
from langchain_classic.chains import ConversationalRetrievalChain
from langchain_classic.chains.conversational_retrieval.base import ConversationalRetrievalChain

llm = ChatGoogleGenerativeAI(
    model="models/gemini-2.5-flash",
    temperature=0.2
)
memory = ConversationBufferMemory(
    memory_key="chat_history",
    return_messages = True
)

#building RAG Chain
qa= ConversationalRetrievalChain.from_llm(
    llm=llm,
    retriever=retriever,
    memory=memory
)

#context relevance score
import numpy as np
from sklearn.metrics.pairwise import cosine_similarity
def context_relevance_score(query_embeddings,docs_embeddings):
    similarities = cosine_similarity(query_embeddings,docs_embeddings)    
    return round(float(similarities.mean()),3)

#faithfulness score
from langchain_core.prompts import PromptTemplate

PROMPT = PromptTemplate(
    input_variables=["context","answer"],
    template="""
Context:
{context}
Answer:
{answer}

Rate how much the answer is supported by the context.
Score between 0 and 1.
Only return the number.
""")
def faithfulness_score(llm,context,answer):
    response = llm.invoke(
        PROMPT.format(context=context,answer=answer)
    )
    return round(float(response.content.strip()),3)


def context_relevance_score(query, context):
    q_words = set(query.lower().split())
    c_words = set(context.lower().split())
    if not q_words:
        return 0.0
    return round(len(q_words & c_words) / len(q_words), 3)

def faithfulness_score(answer, context):
    a_words = set(answer.lower().split())
    c_words = set(context.lower().split())
    if not a_words:
        return 0.0
    return round(len(a_words & c_words) / len(a_words), 3)

def completeness_score(answer):
    return round(min(len(answer.split()) / 50, 1.0), 3)


#creating a trace logger
import csv
import os
from datetime import datetime, timezone

LOG_FILE = "trace_logs.csv"

def log_trace(data: dict):
    
    #timestamp
    data["timestamp"] = datetime.now(timezone.utc).isoformat()

    file_exists = os.path.isfile(LOG_FILE)

    with open(LOG_FILE, "a", newline="", encoding="utf-8") as f:
        writer = csv.DictWriter(f, fieldnames=data.keys())

        if not file_exists:
            writer.writeheader()

        writer.writerow(data)


while True:
    query = input("Ask: ")
    if query.lower() == "exit":
        break

    result = qa.invoke({"question": query})
    answer = result["answer"]
    print("\nAnswer:\n", answer)

    docs = retriever.invoke(query)
    retrieved_text = " ".join([d.page_content[:300] for d in docs])

    cr_score = context_relevance_score(query, retrieved_text)
    f_score = faithfulness_score(answer, retrieved_text)
    comp_score = completeness_score(answer)

    

log_trace({
        "query": query,
        "retrieved_context": retrieved_text,
        "answer": answer,
        "context_relevance_score":cr_score,
        "faithfulness_score":f_score,
        "completeness_score":comp_score,
        "num_docs": len(docs),
        "model": "models/gemini-2.5-flash"
    })