API KEYs

In [35]:
import os
from getpass import getpass

hf_token = getpass("Enter your Hugging Face token: ")
from huggingface_hub import login
login(hf_token)


tavily_key = getpass("Enter your Tavily API key: ")
os.environ["TAVILY_API_KEY"] = tavily_key

Enter your Hugging Face token: ··········
Enter your Tavily API key: ··········


Initializing the LLaMA 3.2 1B Instruct Model



In [36]:
from transformers import pipeline, AutoTokenizer, AutoModelForCausalLM
import torch

model_id = "meta-llama/Llama-3.2-1B-Instruct"
tokenizer = AutoTokenizer.from_pretrained(model_id)
model = AutoModelForCausalLM.from_pretrained(model_id, torch_dtype=torch.float16, device_map="auto")

llm = pipeline("text-generation", model=model, tokenizer=tokenizer)

Device set to use cuda:0


PDF Document Ingestion and Vector Indexing

In [39]:
from langchain.vectorstores import FAISS
from langchain.embeddings import HuggingFaceEmbeddings
from langchain.text_splitter import RecursiveCharacterTextSplitter
from langchain_community.document_loaders import PyPDFLoader

loader = PyPDFLoader("/content/lebo106.pdf")
docs = loader.load()

splitter = RecursiveCharacterTextSplitter(chunk_size=500, chunk_overlap=100)
split_docs = splitter.split_documents(docs)


embedding_model = HuggingFaceEmbeddings(model_name="BAAI/bge-small-en-v1.5")
vectorstore = FAISS.from_documents(split_docs, embedding_model)


Web Search

In [40]:
from tavily import TavilyClient
tavily = TavilyClient(api_key=os.environ["TAVILY_API_KEY"])

def web_search(query):
    results = tavily.search(query=query, max_results=3)
    return "\n".join([r["content"] for r in results["results"]])


RAG Agent Pipeline

In [41]:

from typing import TypedDict, Optional
from langgraph.graph import StateGraph, END
from langchain_core.runnables import RunnableLambda

class AgentState(TypedDict):
    query: str
    rag_context: Optional[str]
    web_context: Optional[str]
    final_context: Optional[str]
    output: Optional[str]
    edited_output: Optional[str]

def retrieve_rag(state: AgentState) -> dict:
    docs = vectorstore.similarity_search(state["query"], k=3)
    context = "\n".join(doc.page_content for doc in docs)
    return {"rag_context": context}


def retrieve_web(state: AgentState) -> dict:
    context = web_search(state["query"])
    return {"web_context": context}


def combine_contexts(state: AgentState) -> dict:
    combined = f"""You are answering a question using information from two sources.

RAG Context:
{state.get('rag_context', 'Not available')}

Web Context:
{state.get('web_context', 'Not available')}
"""
    return {"final_context": combined}


def generate_response(state: AgentState) -> dict:
    prompt = f"""
You are a subject matter expert tutor specializing in 12th-grade NCERT Biology.

Using the context provided below, write a clear, accurate, and formal answer to the question.
Your response should be aligned with NCERT academic tone and avoid speculative or unrelated content.

Context:
{state['final_context']}

Question:
{state['query']}

Answer:
"""
    output = llm(prompt)[0]['generated_text']
    return {"output": output}


def editor_agent(state: AgentState) -> dict:
    prompt = f"""
You are an academic editor for NCERT-style science answers.

Edit the answer below to make it concise, grammatically correct, and aligned with formal academic standards.

Original Answer:
{state['output']}

Edited Answer:"""
    edited = llm(prompt)[0]["generated_text"]
    return {"edited_output": edited}



LangGraph Workflow Definition

In [42]:
from langgraph.graph import StateGraph, END
from langchain_core.runnables import RunnableLambda


graph = StateGraph(AgentState)


graph.add_node("rag", RunnableLambda(retrieve_rag))
graph.add_node("web", RunnableLambda(retrieve_web))
graph.add_node("combine", RunnableLambda(combine_contexts))
graph.add_node("llm", RunnableLambda(generate_response))
graph.add_node("editor", RunnableLambda(editor_agent))


graph.set_entry_point("rag")
graph.add_edge("rag", "web")
graph.add_edge("web", "combine")
graph.add_edge("combine", "llm")
graph.add_edge("llm", "editor")
graph.add_edge("editor", END)

app = graph.compile()

Interactive Memory Chatbot

In [45]:
from langchain.memory import ConversationBufferMemory
from langchain.schema import messages_from_dict, messages_to_dict
memory = ConversationBufferMemory(return_messages=True)
def chatbot_get_edited_answer():
    print("🤖 NCERT Biology Chatbot")
    print("Type 'exit' to quit.\n")

    while True:
        query = input("🧑‍🎓 You: ")
        if query.strip().lower() in ["exit", "quit"]:
            print("👋 Goodbye!")
            break

        try:
            result = app.invoke({"query": query})
            edited = result.get("edited_output", "").strip()

            # 🟩 Extract only the final answer (after "Edited Answer:")
            if "Edited Answer:" in edited:
                edited_answer = edited.split("Edited Answer:")[-1].strip()
            else:
                edited_answer = edited

            print("\n📘 Final Edited Answer:\n", edited_answer)

        except Exception as e:
            print("⚠️ Error:", str(e))

        print("\n" + "-" * 50 + "\n")



Gradio UI Integration

In [49]:

import gradio as gr

# Function to run chatbot and extract edited answer only
def gr_chatbot(user_input, history=[]):
    if user_input.strip().lower() in ["exit", "quit"]:
        return history + [[user_input, "👋 Goodbye!"]], ""

    memory.chat_memory.add_user_message(user_input)

    try:
        result = app.invoke({"query": user_input})
        full_response = result.get("edited_output", "❌ No response generated.")

        # Extract only after "Edited Answer:"
        if "Edited Answer:" in full_response:
            response = full_response.split("Edited Answer:")[-1].strip()
        else:
            response = full_response

        memory.chat_memory.add_ai_message(response)
    except Exception as e:
        response = f"⚠️ Error: {str(e)}"

    history.append([user_input, response])
    return history, ""

# Function to clear the chat
def clear_chat():
    memory.clear()
    return [], ""

# UI Layout
with gr.Blocks(css="""
#title {font-size: 32px; font-weight: bold; text-align: center; margin-top: 20px; color: #2c3e50;}
#subtitle {text-align: center; font-size: 16px; color: #555;}
#chatbox {border: 1px solid #ccc; border-radius: 8px; padding: 10px;}
#textbox textarea {font-size: 16px;}
#send_btn {background-color: #3B82F6; color: white; border-radius: 6px;}
#clear_btn {background-color: #EF4444; color: white; border-radius: 6px;}
""") as demo:

    gr.Markdown("# 🧬 NCERT Biology Tutor", elem_id="title")
    gr.Markdown("### Ask Class 12 Biology Questions — Powered by PDF + Web + NCERT Editing", elem_id="subtitle")

    with gr.Row():
        chatbot = gr.Chatbot(elem_id="chatbox", show_label=False, height=500)

    with gr.Row(equal_height=True):
        txt = gr.Textbox(label="", placeholder="Type your biology question here...", lines=1, elem_id="textbox")
        send_btn = gr.Button("Send", elem_id="send_btn")
        clear_btn = gr.Button("Clear Chat", elem_id="clear_btn")

    send_btn.click(gr_chatbot, [txt, chatbot], [chatbot, txt])
    txt.submit(gr_chatbot, [txt, chatbot], [chatbot, txt])
    clear_btn.click(clear_chat, outputs=[chatbot, txt])

demo.launch(share=True)



  chatbot = gr.Chatbot(elem_id="chatbox", show_label=False, height=500)


Colab notebook detected. To show errors in colab notebook, set debug=True in launch()
* Running on public URL: https://da0a909c75c508380c.gradio.live

This share link expires in 1 week. For free permanent hosting and GPU upgrades, run `gradio deploy` from the terminal in the working directory to deploy to Hugging Face Spaces (https://huggingface.co/spaces)


