In [1]:
!pip install requests sentence-transformers pymupdf



In [2]:
import os
import zipfile
import fitz  
import requests
import re
from sentence_transformers import SentenceTransformer, util

In [10]:
books_folder = "class3_books"  
embedding_model = SentenceTransformer("all-MiniLM-L6-v2")
OLLAMA_API_URL = "http://localhost:11434/api/generate"
OLLAMA_MODEL = "gemma:2b-instruct"

print("Configuration loaded successfully!")

Configuration loaded successfully!


In [11]:
def extract_text_from_pdf_bytes(pdf_bytes):
    doc = fitz.open(stream=pdf_bytes, filetype="pdf")
    text = []
    for page in doc:
        text.append(page.get_text())
    return "\n".join(text)

def clean_text_generic(text):
    text = re.sub(r"[^a-zA-Z0-9\s\.\,\?\!\:\;\-]", " ", text)
    text = re.sub(r"\s+", " ", text)
    return text.strip()

def chunk_text(text, max_words=200):
    paragraphs = text.split("\n\n")
    chunks = []
    for para in paragraphs:
        words = para.split()
        if len(words) == 0:
            continue
        for i in range(0, len(words), max_words):
            chunks.append(" ".join(words[i:i+max_words]))
    return chunks

In [12]:
all_chunks = []
for zip_file in os.listdir(books_folder):
    if zip_file.endswith(".zip"):
        zip_path = os.path.join(books_folder, zip_file)
        with zipfile.ZipFile(zip_path, "r") as zf:
            for pdf_name in zf.namelist():
                if pdf_name.endswith(".pdf"):
                    with zf.open(pdf_name) as pdf_file:
                        pdf_bytes = pdf_file.read()
                        text = extract_text_from_pdf_bytes(pdf_bytes)
                        text = clean_text_generic(text)
                        chunks = chunk_text(text)
                        tagged = [f"[{pdf_name}] {ch}" for ch in chunks]
                        all_chunks.extend(tagged)
        print(f"Processed {zip_file}")

print(f"Total chunks collected: {len(all_chunks)}")

# Generate embeddings
if all_chunks:
    chunk_embeddings = embedding_model.encode(all_chunks, convert_to_tensor=True)
    print("Embeddings generated successfully!")
else:
    print("No syllabus content found. The system will still work but won't have context.")
    chunk_embeddings = None

Processed AI_base.zip
Processed ENGLISH.zip
Processed EVS.zip
Processed MATHS.zip
Total chunks collected: 505
Embeddings generated successfully!


In [13]:
def retrieve_context(query, k=3):
    if not all_chunks:
        return "No syllabus content available."
    
    query_embedding = embedding_model.encode([query], convert_to_tensor=True)
    hits = util.semantic_search(query_embedding, chunk_embeddings, top_k=k)[0]
    top_chunks = [all_chunks[h["corpus_id"]] for h in hits]
    return "\n".join(top_chunks)

def query_ollama(prompt, max_tokens=150):
    payload = {
        "model": OLLAMA_MODEL,
        "prompt": prompt,
        "stream": False,
        "options": {
            "temperature": 0.1,
            "num_predict": max_tokens,
            "top_p": 0.9,
            "repeat_penalty": 1.1
        }
    }
    
    try:
        response = requests.post(OLLAMA_API_URL, json=payload, timeout=60)
        response.raise_for_status()
        return response.json()["response"]
    except requests.exceptions.RequestException:
        return "I'm having trouble connecting to the knowledge base right now."

def answer_question(query):
    context = retrieve_context(query, k=3)
    
    prompt = f"""
    You are a teacher for Class 3 students.
    Answer the question using the following syllabus context only.
    If the context does not have the answer, say "I couldn't find this in the syllabus."

    Question: {query}

    Context from syllabus:
    {context}

    Answer in simple words for a Class 3 student:
    """
    
    return query_ollama(prompt)

In [14]:
def run_chat():
    print("\n🤖 CBSE Class 3 Q/A Tool Ready! Ask syllabus questions (type 'quit' to exit)\n")
    
    while True:
        query = input("You: ")
        if query.lower() in ["quit", "exit"]:
            break
        print("Bot:", answer_question(query))

In [15]:
run_chat()


🤖 CBSE Class 3 Q/A Tool Ready! Ask syllabus questions (type 'quit' to exit)



You:  what are word numerals


Bot: **Word numerals are words that represent numbers.**

For example, the number 56 has the word "fifty-six" written next to it.

We can also make new words with numbers, like 15, 27, and 94.


You:  why was meena angry


Bot: **1. What was Meena playing with?**
Meena made paper boats.


**2. Meena was angry. Why?**
Meena was angry because someone was pushing her paper boats into the water.


**3. How did Meena help the boy? B.**
Meena helped the boy by teaching him how to make paper boats.


**4. What else can you make with paper? Discuss in small groups.**
You can make other things with paper, such as cards, hats, and boxes.


**5. What games do you play with your friends? Which one do you like the most? Why?**
We can play games with our friends like tag, hide and seek, and


You:  quit
