In [25]:
import os
from langchain_community.document_loaders import PyPDFLoader, DirectoryLoader
from langchain_text_splitters import RecursiveCharacterTextSplitter
from langchain_google_genai import GoogleGenerativeAIEmbeddings, ChatGoogleGenerativeAI
from langchain_community.vectorstores import FAISS
from langchain_core.prompts import ChatPromptTemplate
from langchain_core.runnables import RunnablePassthrough
from langchain_core.output_parsers import StrOutputParser

# 1. CONFIGURATION (Ensure you use the correct embedding model)
os.environ["GOOGLE_API_KEY"] = "AIzaSyDRaxfyOjRKEABAldVJEUPbFepFhlSWdFo"

def build_knowledge_base(path="./pdfs"):
    print("--- ðŸ“š Processing Semester PDFs ---")
    loader = DirectoryLoader(path, glob="./*.pdf", loader_cls=PyPDFLoader)
    docs = loader.load()
    
    # Precise splitting for Viva questions
    splitter = RecursiveCharacterTextSplitter(chunk_size=800, chunk_overlap=150)
    chunks = splitter.split_documents(docs)
    
    # IMPORTANT: Use 'gemini-embedding-001', NOT the chat model
    embeddings = GoogleGenerativeAIEmbeddings(model="models/gemini-embedding-001")
    
    vector_db = FAISS.from_documents(chunks, embeddings)
    vector_db.save_local("faiss_study_index")
    return vector_db

def format_docs(docs):
    """Formats retrieved chunks with citations for the AI to see."""
    context = ""
    for doc in docs:
        source = doc.metadata.get('source', 'Unknown File')
        page = doc.metadata.get('page', 0) + 1
        context += f"\n[Source: {source}, Page: {page}]\n{doc.page_content}\n"
    return context

def start_viva_session():
    # Load or build the DB
    if os.path.exists("faiss_study_index"):
        embeddings = GoogleGenerativeAIEmbeddings(model="models/gemini-embedding-001")
        vector_db = FAISS.load_local("faiss_study_index", embeddings, allow_dangerous_deserialization=True)
    else:
        vector_db = build_knowledge_base()

    # The 2026 Model Choice
    llm = ChatGoogleGenerativeAI(model="gemini-2.5-flash", temperature=0.1)

    # The 2026 Prompt Design
    template = """You are a College Viva Examiner. Answer the student's question based ONLY on the context below. 
    If the answer isn't there, say "I couldn't find that in your semester notes."
    
    Context:
    {context}
    
    Student Question: {question}
    
    Detailed Answer (Include Citations):"""
    
    prompt = ChatPromptTemplate.from_template(template)

    # THE 2026 LCEL PIPE (|) CHAIN
    rag_chain = (
        {"context": vector_db.as_retriever() | format_docs, "question": RunnablePassthrough()}
        | prompt
        | llm
        | StrOutputParser()
    )

    # Run the viva
    query = input("\nAsk your viva question: ")
    print("\n--- ASSISTANT RESPONSE ---")
    print(rag_chain.invoke(query))

if __name__ == "__main__":
    start_viva_session()


--- ASSISTANT RESPONSE ---
Artificial Intelligence (23CP307T) is a course coordinated by Dr. Sidheswar Routray, with faculty including Dr. Shilpa Pandey, Dr. Davinder Singh, Dr. Trishna Paul, and Dr. Azriel Henry from the Department of Computer Science & Engineering, School of Technology [Source: pdfs/4_AO Lecture Notes.pdf, Page: 1; pdfs/2_AI Agent Lecture Notes.pdf, Page: 1].

An AI system is composed of agents and their environment [Source: pdfs/2_AI Agent Lecture Notes.pdf, Page: 2]. An agent is defined as anything capable of perceiving its environment through sensors and acting on it using actuators and effectors [Source: pdfs/2_AI Agent Lecture Notes.pdf, Page: 2]. Agents can be categorized into five classes based on their perceived intelligence and capability:
*   Simple Reflex Agent
*   Model-based reflex agent
*   Goal-based agents
*   Utility-based agent
*   Learning agent

All these agents are capable of improving their performance and generating better actions over time [S