In [1]:
import os
from dotenv import load_dotenv
import json
import re
import pickle
import hashlib
from pathlib import Path
from typing import Dict, List, Optional, Any, Tuple
from langchain_core.documents import Document
from langchain_core.prompts import ChatPromptTemplate
from langchain_huggingface import HuggingFaceEmbeddings
from langchain_text_splitters import RecursiveCharacterTextSplitter
from langchain_community.vectorstores import FAISS
from langchain_groq import ChatGroq

  from .autonotebook import tqdm as notebook_tqdm


Initialization 

In [31]:
def initialize_environment():
    """Initialize environment variables and return LLM and embeddings."""
    load_dotenv()
    
    os.environ['LANGCHAIN_API_KEY'] = os.getenv('LANGCHAIN_API_KEY')
    os.environ["LANGCHAIN_TRACING_V2"] = 'true'
    os.environ['LANGCHAIN_PROJECT'] = os.getenv('LANGCHAIN_PROJECT')
    os.environ["GROQ_API_KEY"] = os.getenv("GROQ_API_KEY")
    os.environ["HF_TOKEN"] = os.getenv("HF_TOKEN")
    
    llm = ChatGroq(
        model="openai/gpt-oss-20b",
        streaming=True,
        temperature=0.7,
    )
    
    embeddings = HuggingFaceEmbeddings(
        model_name="sentence-transformers/all-MiniLM-L6-v2"
    )
    
    return llm, embeddings

In [32]:
def load_category_data(category_file='category.json', topics_file='topics.json'):
    """Load category and topics data from JSON files."""
    with open(category_file, 'r', encoding='utf-8') as f:
        category_data = json.load(f)
    
    with open(topics_file, 'r', encoding='utf-8') as f:
        topics_data = json.load(f)
    
    return category_data, topics_data

Content Fetcing

In [33]:
def get_boards(category_data):
    """Get list of available boards."""
    return list(category_data.get('Boards', {}).keys())


def get_classes(category_data, board):
    """Get list of available classes for a board."""
    return list(category_data['Boards'].get(board, {}).get('Classes', {}).keys())


def get_subjects(category_data, board, class_num):
    """Get list of available subjects for a board and class."""
    return list(category_data['Boards'].get(board, {})
               .get('Classes', {}).get(str(class_num), {})
               .get('Subjects', {}).keys())


def get_topics(topics_data, board, class_num, subject):
    """Get list of available topics for a board, class, and subject."""
    topics = topics_data.get('Boards', {}).get(board, {}).get('Classes', {}).get(str(class_num), {}).get('Subjects', {}).get(subject, {})
    return {num: name for num, name in topics.items()}


def get_books(category_data, board, class_num, subject):
    """Get list of books for a board, class, and subject."""
    books = category_data['Boards'].get(board, {}).get('Classes', {}).get(str(class_num), {}).get('Subjects', {}).get(subject, {}).get('Books', {})
    return list(books.values())


def extract_topic_from_book(book_content, topic_num):
    """
    Extract topic content when chapter is marked as:
        1. ##x##
        2. UNIT-x / Unit-x / unit-x (dash required)
    """
    topic_num = str(topic_num).strip()
    
    # Stop when next UNIT-x or ##x##
    next_marker = rf'(?=UNIT\s*-\s*\d+|##\s*\d+\s*##|\Z)'
    
    patterns = [
        rf'##\s*{re.escape(topic_num)}\s*##\s*(.*?){next_marker}',
        rf'UNIT\s*-\s*{re.escape(topic_num)}\s*(.*?){next_marker}',
    ]
    
    flags = re.IGNORECASE | re.DOTALL
    
    for pat in patterns:
        match = re.search(pat, book_content, flags)
        if match:
            return match.group(1).strip()
    
    return None

In [34]:
def fetch_single_topic(category_data, topics_data, board, class_num, subject, topic_num, data_folder='data'):
    """
    Fetch content for a single topic.
    
    Returns:
        dict: Contains topic information and content
    """
    result = {
        'board': board,
        'class': class_num,
        'subject': subject,
        'topic_num': topic_num,
        'book_name': None,
        'book_id': None,
        'topic_name': None,
        'content': None,
        'status': 'error',
        'message': ''
    }
    
    # Get topic name
    topics = get_topics(topics_data, board, class_num, subject)
    result['topic_name'] = topics.get(str(topic_num), f"Topic {topic_num}")
    
    # Get books for this subject
    books = get_books(category_data, board, class_num, subject)
    if not books:
        result['message'] = "No books found for this subject"
        return result
    
    print(f"  Searching in {len(books)} book(s) for topic {topic_num}...")
    
    for book in books:
        book_id = book.get('book_id')
        book_name = book.get('Name')
        book_path = os.path.join(data_folder, f"{book_id}.txt")
        
        if not os.path.exists(book_path):
            continue
        
        try:
            with open(book_path, 'r', encoding='utf-8') as f:
                book_content = f.read()
            
            topic_content = extract_topic_from_book(book_content, topic_num)
            
            if topic_content:
                print(f"    ‚úì Topic {topic_num} found in {book_name}")
                result['book_name'] = book_name
                result['book_id'] = book_id
                result['content'] = topic_content
                result['status'] = 'success'
                result['message'] = 'Topic found successfully'
                return result
        
        except Exception as e:
            print(f"    ‚ùå Error reading {book_name}: {e}")
            continue
    
    result['message'] = f"Topic {topic_num} not found in any available books"
    return result

In [35]:
def fetch_multiple_topics(category_data, topics_data, board, class_num, subject, topic_nums, data_folder='data'):
    """
    Fetch content for multiple topics and combine them.
    
    Args:
        topic_nums: List of topic numbers to fetch
        
    Returns:
        dict: Combined result with all topics' content
    """
    combined_result = {
        'board': board,
        'class': class_num,
        'subject': subject,
        'topics': [],
        'combined_content': '',
        'status': 'success',
        'message': '',
        'failed_topics': []
    }
    
    print(f"\nüìö Fetching {len(topic_nums)} topics...")
    
    for topic_num in topic_nums:
        result = fetch_single_topic(category_data, topics_data, board, class_num, subject, topic_num, data_folder)
        
        if result['status'] == 'success':
            combined_result['topics'].append({
                'topic_num': topic_num,
                'topic_name': result['topic_name'],
                'book_name': result['book_name'],
                'content_length': len(result['content'])
            })
            
            # Add topic marker and content
            combined_result['combined_content'] += f"\n\n{'='*80}\n"
            combined_result['combined_content'] += f"TOPIC {topic_num}: {result['topic_name']}\n"
            combined_result['combined_content'] += f"{'='*80}\n\n"
            combined_result['combined_content'] += result['content']
        else:
            combined_result['failed_topics'].append({
                'topic_num': topic_num,
                'error': result['message']
            })
    
    if not combined_result['topics']:
        combined_result['status'] = 'error'
        combined_result['message'] = 'No topics found'
    else:
        combined_result['message'] = f"Successfully fetched {len(combined_result['topics'])} topics"
        if combined_result['failed_topics']:
            combined_result['message'] += f", {len(combined_result['failed_topics'])} failed"
    
    return combined_result

Vector Store and Caching

In [36]:
def generate_cache_key(content):
    """Generate a unique cache key based on content hash."""
    content_hash = hashlib.md5(content.encode('utf-8')).hexdigest()
    return f"vectorstore_{content_hash}"

In [37]:
def load_vector_store_from_cache(content, embeddings, cache_dir='cache'):
    """
    Load vector store from cache if available.
    
    Returns:
        FAISS vector store or None if not cached
    """
    cache_path = Path(cache_dir)
    cache_path.mkdir(exist_ok=True)
    
    cache_key = generate_cache_key(content)
    faiss_path = cache_path / f"{cache_key}.faiss"
    pkl_path = cache_path / f"{cache_key}.pkl"
    
    if faiss_path.exists() and pkl_path.exists():
        try:
            print(f"  ‚úì Loading vector store from cache...")
            vector_store = FAISS.load_local(
                str(cache_path),
                embeddings,
                cache_key,
                allow_dangerous_deserialization=True
            )
            print(f"  ‚úì Cache loaded successfully!")
            return vector_store
        except Exception as e:
            print(f"  ‚ö† Cache load failed: {e}")
            return None
    
    return None

In [38]:
def save_vector_store_to_cache(vector_store, content, cache_dir='cache'):
    """Save vector store to cache."""
    cache_path = Path(cache_dir)
    cache_path.mkdir(exist_ok=True)
    
    cache_key = generate_cache_key(content)
    
    try:
        print(f"  üíæ Saving vector store to cache...")
        vector_store.save_local(str(cache_path), cache_key)
        print(f"  ‚úì Cache saved successfully!")
    except Exception as e:
        print(f"  ‚ö† Cache save failed: {e}")


def clear_cache(cache_dir='cache'):
    """Clear all cached vector stores."""
    cache_path = Path(cache_dir)
    if cache_path.exists():
        for file in cache_path.glob("*"):
            file.unlink()
        print(f"  ‚úì Cache cleared!")

Vector Store and Setup

In [39]:
def create_vector_store(content, embeddings, use_cache=True, cache_dir='cache'):
    """
    Create or load vector store from content.
    
    Returns:
        tuple: (vector_store, retriever)
    """
    # Try to load from cache first
    if use_cache:
        vector_store = load_vector_store_from_cache(content, embeddings, cache_dir)
        if vector_store:
            retriever = vector_store.as_retriever(search_kwargs={"k": 5})
            return vector_store, retriever
    
    # Create new vector store
    print(f"  üîß Creating new vector store...")
    docs = [Document(page_content=content)]
    
    text_splitter = RecursiveCharacterTextSplitter(
        chunk_size=800,
        chunk_overlap=200,
    )
    
    split_docs = text_splitter.split_documents(documents=docs)
    print(f"  ‚úì Created {len(split_docs)} chunks")
    
    vector_store = FAISS.from_documents(
        documents=split_docs,
        embedding=embeddings,
    )
    
    # Save to cache
    if use_cache:
        save_vector_store_to_cache(vector_store, content, cache_dir)
    
    retriever = vector_store.as_retriever(search_kwargs={"k": 5})
    return vector_store, retriever

RAG QnA Functions

In [40]:
def reformulate_query(question, llm):
    """Reformulate the query for better retrieval precision."""
    contextualize_prompt = ChatPromptTemplate.from_messages([
        ("system", 
         "You are an expert at reformulating search queries for educational content retrieval.\n\n"
         "Your task:\n"
         "1. Analyze the user's question carefully\n"
         "2. Extract key concepts, entities, and educational terms\n"
         "3. Expand abbreviations and clarify ambiguous terms\n"
         "4. Reformulate into a clear, specific search query\n"
         "5. Keep the reformulated query concise (1-2 sentences max)\n"
         "6. If the question is already clear, return it as-is\n\n"
         "Do NOT answer the question. Only return the reformulated search query."
        ),
        ("human", "{question}")
    ])
    
    chain = contextualize_prompt | llm
    response = chain.invoke({"question": question})
    reformulated = response.content.strip()
    
    print(f"üìù Reformulated query: {reformulated}")
    return reformulated

In [41]:
def ask_question(question, retriever, llm, verbose=True):
    """
    Answer a question using RAG.
    
    Args:
        question: User's question
        retriever: Vector store retriever
        llm: Language model
        verbose: Print question
        
    Returns:
        str: Answer
    """
    if verbose:
        print(f"\n‚ùì Question: {question}")
    
    # Reformulate query
    reformulated_query = reformulate_query(question, llm)
    
    # Retrieve relevant documents
    docs = retriever.invoke(reformulated_query)
    context = "\n\n".join([doc.page_content for doc in docs])
    
    # Escape curly braces in context to prevent LangChain template errors
    escaped_context = context.replace("{", "{{").replace("}", "}}")
    
    # Create prompt
    system_prompt = (
        "You are an expert educational AI assistant specialized in explaining academic content.\n\n"
        "## Your Core Principles:\n"
        "1. **Accuracy First**: Base answers EXCLUSIVELY on the provided context\n"
        "2. **No Fabrication**: If information isn't available, state: "
        "'This information is not available in the current chapter.'\n"
        "3. **Educational Clarity**: Explain concepts clearly and precisely\n"
        "4. **Structured Responses**: Use examples when helpful\n"
        "5. **Comprehensive Understanding**: Read the entire context before answering\n\n"
        "## Response Guidelines:\n"
        "- For definitions: Provide clear, concise explanations\n"
        "- For processes: Break down into logical steps\n"
        "- For concepts: Explain with relevant examples from context\n"
        "- For comparisons: Highlight key similarities and differences\n\n"
        "## What to Avoid:\n"
        "- Do NOT add external knowledge not in the context\n"
        "- Do NOT make assumptions beyond what's stated\n"
        "- Do NOT provide opinions or subjective interpretations\n\n"
        "## Context from Chapter:\n"
        f"{escaped_context}\n\n"
        "Now answer the student's question using ONLY the information above."
    )
    
    prompt = ChatPromptTemplate.from_messages([
        ("system", system_prompt),
        ("human", "{question}")
    ])
    
    chain = prompt | llm
    response = chain.invoke({"question": question})
    return response.content

MCQ Generation

In [42]:
def generate_mcqs_for_context(context, num_questions, difficulty_level, topic_name, llm):
    """Generate MCQs for a specific context and topic."""
    
    # Escape all curly braces in context to prevent LangChain template errors
    escaped_context = context.replace("{", "{{").replace("}", "}}")
    
    # Simpler system message to reduce errors
    system_message = (
        "You are an expert educational assessment creator.\n\n"
        f"Generate EXACTLY {num_questions} multiple choice questions about '{topic_name}' "
        f"at {difficulty_level} difficulty level based on the content provided.\n\n"
        "CRITICAL RULES:\n"
        "1. Return ONLY valid JSON, no markdown, no code blocks, no extra text\n"
        "2. Each question must have exactly 4 options: A, B, C, D\n"
        "3. Only ONE option should be correct\n"
        "4. Escape all special characters properly in JSON strings\n"
        "5. Use double quotes for all JSON strings, never single quotes\n\n"
        "JSON FORMAT (copy this structure exactly):\n"
        '{\n'
        '  "mcqs": [\n'
        '    {\n'
        '      "question": "Your question here?",\n'
        '      "options": {\n'
        '        "A": "First option",\n'
        '        "B": "Second option",\n'
        '        "C": "Third option",\n'
        '        "D": "Fourth option"\n'
        '      },\n'
        '      "correct_answer": "A",\n'
        '      "explanation": "Brief explanation"\n'
        '    }\n'
        '  ]\n'
        '}\n\n'
        "IMPORTANT:\n"
        "- If text contains quotes, escape them with backslash\n"
        "- Keep questions and options concise\n"
        "- Base all questions on the content below\n\n"
        f"CONTENT:\n{escaped_context}\n\n"
        f"Generate {num_questions} questions now in valid JSON format:"
    )
    
    # Create MCQ generation prompt
    mcq_prompt = ChatPromptTemplate.from_messages([
        ("system", system_message),
        ("human", "Generate the MCQs in valid JSON format only.")
    ])
    
    chain = mcq_prompt | llm
    
    try:
        response = chain.invoke({})
        response_text = response.content.strip()
        
        # Debug: Print first 500 chars of response
        print(f"    üìÑ Response preview: {response_text[:500]}...")
        
        # Clean response - remove markdown code blocks
        if response_text.startswith("```"):
            # Find the actual JSON content
            parts = response_text.split("```")
            for part in parts:
                part = part.strip()
                if part.startswith("json"):
                    part = part[4:].strip()
                if part.startswith("{") or part.startswith("["):
                    response_text = part
                    break
        
        # Remove any trailing markdown
        if "```" in response_text:
            response_text = response_text.split("```")[0].strip()
        
        # Try to parse JSON
        mcqs_data = json.loads(response_text)
        
        # Handle both {"mcqs": [...]} and direct array formats
        if isinstance(mcqs_data, dict) and 'mcqs' in mcqs_data:
            return mcqs_data['mcqs']
        elif isinstance(mcqs_data, list):
            return mcqs_data
        else:
            print(f"    ‚ö† Unexpected JSON structure")
            return None
            
    except json.JSONDecodeError as e:
        print(f"    ‚ö† JSON parsing error: {e}")
        print(f"    üìÑ Full response:\n{response_text}")
        
        # Try to fix common JSON issues
        try:
            # Fix common issues: trailing commas, single quotes, etc.
            fixed_text = response_text.replace("'", '"')  # Replace single quotes
            fixed_text = re.sub(r',(\s*[}\]])', r'\1', fixed_text)  # Remove trailing commas
            mcqs_data = json.loads(fixed_text)
            
            if isinstance(mcqs_data, dict) and 'mcqs' in mcqs_data:
                print(f"    ‚úì Fixed JSON issues, parsed successfully")
                return mcqs_data['mcqs']
            elif isinstance(mcqs_data, list):
                print(f"    ‚úì Fixed JSON issues, parsed successfully")
                return mcqs_data
        except:
            pass
        
        return None
    except Exception as e:
        print(f"    ‚ö† Unexpected error: {e}")
        return None

In [49]:
def generate_mcqs_for_context(context, num_questions, difficulty_level, topic_name, llm):
    """Generate MCQs for a specific context and topic."""
    
    # Escape all curly braces in context to prevent LangChain template errors
    escaped_context = context.replace("{", "{{").replace("}", "}}")
    
    # Simpler system message to reduce errors
    system_message = (
        "You are an expert educational assessment creator.\n\n"
        f"Generate EXACTLY {num_questions} multiple choice questions about '{topic_name}' "
        f"at {difficulty_level} difficulty level based on the content provided.\n\n"
        "CRITICAL RULES:\n"
        "1. Return ONLY valid JSON, no markdown, no code blocks, no extra text\n"
        "2. Each question must have exactly 4 options: A, B, C, D\n"
        "3. Only ONE option should be correct\n"
        "4. Escape all special characters properly in JSON strings\n"
        "5. Use double quotes for all JSON strings, never single quotes\n\n"
        "JSON FORMAT (copy this structure exactly):\n"
        '{{\n'
        '  "mcqs": [\n'
        '    {{\n'
        '      "question": "Your question here?",\n'
        '      "options": {{\n'
        '        "A": "First option",\n'
        '        "B": "Second option",\n'
        '        "C": "Third option",\n'
        '        "D": "Fourth option"\n'
        '      }},\n'
        '      "correct_answer": "A",\n'
        '      "explanation": "Brief explanation"\n'
        '    }}\n'
        '  ]\n'
        '}}\n\n'
        "IMPORTANT:\n"
        "- If text contains quotes, escape them with backslash\n"
        "- Keep questions and options concise\n"
        "- Base all questions on the content below\n\n"
        f"CONTENT:\n{escaped_context}\n\n"
        f"Generate {num_questions} questions now in valid JSON format:"
    )
    
    # Create MCQ generation prompt
    mcq_prompt = ChatPromptTemplate.from_messages([
        ("system", system_message),
        ("human", "Generate the MCQs in valid JSON format only.")
    ])
    
    chain = mcq_prompt | llm
    
    try:
        response = chain.invoke({})
        response_text = response.content.strip()
        
        # Debug: Print first 500 chars of response
        print(f"    üìÑ Response preview: {response_text[:500]}...")
        
        # Clean response - remove markdown code blocks
        if response_text.startswith("```"):
            # Find the actual JSON content
            parts = response_text.split("```")
            for part in parts:
                part = part.strip()
                if part.startswith("json"):
                    part = part[4:].strip()
                if part.startswith("{") or part.startswith("["):
                    response_text = part
                    break
        
        # Remove any trailing markdown
        if "```" in response_text:
            response_text = response_text.split("```")[0].strip()
        
        # Try to parse JSON
        mcqs_data = json.loads(response_text)
        
        # Handle both {"mcqs": [...]} and direct array formats
        if isinstance(mcqs_data, dict) and 'mcqs' in mcqs_data:
            return mcqs_data['mcqs']
        elif isinstance(mcqs_data, list):
            return mcqs_data
        else:
            print(f"    ‚ö† Unexpected JSON structure")
            return None
            
    except json.JSONDecodeError as e:
        print(f"    ‚ö† JSON parsing error: {e}")
        print(f"    üìÑ Full response:\n{response_text}")
        
        # Try to fix common JSON issues
        try:
            # Fix common issues: trailing commas, single quotes, etc.
            fixed_text = response_text.replace("'", '"')  # Replace single quotes
            fixed_text = re.sub(r',(\s*[}\]])', r'\1', fixed_text)  # Remove trailing commas
            mcqs_data = json.loads(fixed_text)
            
            if isinstance(mcqs_data, dict) and 'mcqs' in mcqs_data:
                print(f"    ‚úì Fixed JSON issues, parsed successfully")
                return mcqs_data['mcqs']
            elif isinstance(mcqs_data, list):
                print(f"    ‚úì Fixed JSON issues, parsed successfully")
                return mcqs_data
        except:
            pass
        
        return None
    except Exception as e:
        print(f"    ‚ö† Unexpected error: {e}")
        return None

Display and Export 

In [44]:
def display_mcqs(mcqs, show_topic=True):
    """Display MCQs in a formatted way."""
    if not mcqs:
        print("No MCQs to display")
        return
    
    print("\n" + "=" * 80)
    print("GENERATED MCQs".center(80))
    print("=" * 80)
    
    for i, mcq in enumerate(mcqs, 1):
        if show_topic and 'topic' in mcq:
            print(f"\n[Topic: {mcq['topic']}]")
        
        print(f"\nQ{i}. {mcq['question']}")
        for option, text in mcq['options'].items():
            print(f"   {option}. {text}")
        print(f"\n   ‚úì Correct Answer: {mcq['correct_answer']}")
        print(f"   üí° Explanation: {mcq['explanation']}")
        print("-" * 80)

In [45]:
def export_mcqs_to_json(mcqs, filename="mcqs_export.json"):
    """Export MCQs to JSON file."""
    try:
        with open(filename, 'w', encoding='utf-8') as f:
            json.dump({"mcqs": mcqs, "total": len(mcqs)}, f, indent=2, ensure_ascii=False)
        print(f"\n‚úì MCQs exported to {filename}")
    except Exception as e:
        print(f"\n‚ùå Export failed: {e}")

In [46]:
def display_topic_summary(result):
    """Display summary of fetched topics."""
    print(f"\n‚úì {result['message']}")
    print(f"\nüìñ Topics fetched:")
    for topic in result['topics']:
        print(f"  ‚Ä¢ Topic {topic['topic_num']}: {topic['topic_name']}")
        print(f"    - Book: {topic['book_name']}")
        print(f"    - Content length: {topic['content_length']} characters")
    
    if result['failed_topics']:
        print(f"\n‚ö† Failed to fetch:")
        for failed in result['failed_topics']:
            print(f"  ‚Ä¢ Topic {failed['topic_num']}: {failed['error']}")

In [47]:
def example_usage():
    """Example of how to use all the functions in a notebook."""
    
    print("=" * 80)
    print("Education RAG System - Function-based Version".center(80))
    print("=" * 80)
    
    # Step 1: Initialize
    print("\nüîß Step 1: Initializing environment...")
    llm, embeddings = initialize_environment()
    category_data, topics_data = load_category_data()
    print("‚úì Environment initialized")
    
    # Step 2: Fetch content for multiple topics
    print("\nüìö Step 2: Fetching content...")
    board = "CBSE"
    class_num = "11"
    subject = "Chemistry"
    topic_nums = ["2", "4"]
    
    result = fetch_multiple_topics(category_data, topics_data, board, class_num, subject, topic_nums)
    
    if result['status'] != 'success':
        print(f"‚ùå Error: {result['message']}")
        return
    
    display_topic_summary(result)
    content = result['combined_content']
    
    # Step 3: Create vector store (with caching)
    print("\nüóÇÔ∏è Step 3: Creating vector store...")
    vector_store, retriever = create_vector_store(content, embeddings, use_cache=True)
    print("‚úì Vector store ready")
    
    # Step 4: Ask questions
    print("\n" + "=" * 80)
    print("Q&A DEMO".center(80))
    print("=" * 80)
    
    questions = [
        "What are the main concepts in these topics?",
        "Summarize the key points from all topics."
    ]
    
    for q in questions:
        answer = ask_question(q, retriever, llm, verbose=True)
        print(f"üí° Answer: {answer}\n")
        print("-" * 80)
    
    # Step 5: Generate MCQs across topics
    print("\n" + "=" * 80)
    print("MCQ GENERATION".center(80))
    print("=" * 80)
    
    # Check if we have any topics
    if not result['topics']:
        print("\n‚ùå No topics available for MCQ generation")
        return
    
    # Build topic distributions based on available topics
    topic_distributions = []
    num_topics = len(result['topics'])
    
    if num_topics >= 1:
        topic_distributions.append({
            'topic_name': result['topics'][0]['topic_name'], 
            'num_questions': 3
        })
    
    if num_topics >= 2:
        topic_distributions.append({
            'topic_name': result['topics'][1]['topic_name'], 
            'num_questions': 2
        })
    
    if num_topics >= 3:
        topic_distributions.append({
            'topic_name': result['topics'][2]['topic_name'], 
            'num_questions': 2
        })
    
    print(f"\nüìã MCQ Distribution Plan:")
    total = sum(td['num_questions'] for td in topic_distributions)
    for td in topic_distributions:
        print(f"  ‚Ä¢ {td['topic_name']}: {td['num_questions']} questions")
    print(f"  Total: {total} questions")
    
    mcqs = generate_mcqs_from_topics(topic_distributions, retriever, llm, difficulty_level="medium")
    
    if mcqs:
        display_mcqs(mcqs, show_topic=True)
        export_mcqs_to_json(mcqs, f"mcqs_{board}_{class_num}_{subject}.json")
    else:
        print("\n‚ùå Failed to generate MCQs")

In [48]:
if __name__ == "__main__":
    example_usage()

                 Education RAG System - Function-based Version                  

üîß Step 1: Initializing environment...
‚úì Environment initialized

üìö Step 2: Fetching content...

üìö Fetching 2 topics...
  Searching in 2 book(s) for topic 2...
    ‚úì Topic 2 found in Chemistry Part-I (NCERT)
  Searching in 2 book(s) for topic 4...
    ‚úì Topic 4 found in Chemistry Part-I (NCERT)

‚úì Successfully fetched 2 topics

üìñ Topics fetched:
  ‚Ä¢ Topic 2: Topic 2
    - Book: Chemistry Part-I (NCERT)
    - Content length: 138108 characters
  ‚Ä¢ Topic 4: Topic 4
    - Book: Chemistry Part-I (NCERT)
    - Content length: 89599 characters

üóÇÔ∏è Step 3: Creating vector store...
  üîß Creating new vector store...
  ‚úì Created 408 chunks
  üíæ Saving vector store to cache...
  ‚úì Cache saved successfully!
‚úì Vector store ready

                                    Q&A DEMO                                    

‚ùì Question: What are the main concepts in these topics?
üìù Reformula