In [2]:
import re

def extract_key_sections(text):
    """
    Extracts key sections from the NCERT text:
      - Summary
      - Definitions
      - Solved Examples
      - Exercises
    Returns a dictionary where missing sections are None.
    """
    sections = {}
    
    # Extract Summary (try variations)
    summary_match = re.search(r"(SUMMARY|Conclusion|Key Points)[:\s](.+?)(?=\n\n|\n[A-Z])", text, re.DOTALL | re.IGNORECASE)
    sections["Summary"] = summary_match.group(2).strip() if summary_match else None  

    # Extract Definitions
    definitions = re.findall(r"(Definition|Key Concept)[:\s](.+?)(?=\n\n|\n[A-Z])", text, re.DOTALL | re.IGNORECASE)
    sections["Definitions"] = "\n".join([d[1].strip() for d in definitions]) if definitions else None

    # Extract Solved Examples
    solved_examples = re.findall(r"(Example|Solved Example)[\s\d.:]+(.+?)(?=\n\n|\n[A-Z])", text, re.DOTALL | re.IGNORECASE)
    sections["Solved Examples"] = "\n".join([ex[1].strip() for ex in solved_examples]) if solved_examples else None

    # Extract Exercises
    exercises = re.findall(r"(Exercise|Practice Questions)[:\s](.+?)(?=\n\n|\n[A-Z])", text, re.DOTALL | re.IGNORECASE)
    sections["Exercises"] = "\n".join([ex[1].strip() for ex in exercises]) if exercises else "No exercises found."

    return sections


In [4]:
import time
import google.generativeai as genai

# Configure your Gemini API Key (make sure it's set)
GOOGLE_API_KEY = "AIzaSyDxtvmaGC9iB53VkvyYbtcBZKgOVg9Z2S8"  # Replace with your key
genai.configure(api_key=GOOGLE_API_KEY)

def generate_text_with_gemini(section_type, full_text, retries=3, delay=10):
    """
    Uses Google Gemini AI (gemini-2.0-flash) to generate missing sections.
    Retries on failure, with a delay between attempts.
    """
    prompt = f"""
    You are an AI teacher assisting in generating educational content for NCERT books.
    Instead of copying directly, explain in your own words.

    **Reference NCERT Chapter Content (for context):**
    {full_text}  # Limit to first 3000 characters

    **Task:** Generate an original {section_type} for this chapter.
    - For a summary, provide a concise and structured overview.
    - For a definition, extract and explain the key concepts in simple terms.
    - For a solved example, create a new step-by-step example problem.
    """
    
    for attempt in range(retries):
        try:
            time.sleep(delay)
            model = genai.GenerativeModel(model_name="gemini-2.0-flash")
            response = model.generate_content(prompt)
            # Check for valid candidate response
            if response.candidates and response.candidates[0].content.parts:
                return response.candidates[0].content.parts[0].text.strip()
            else:
                print(f"⚠️ No valid response for {section_type}. Retrying attempt {attempt+1}...")
        except Exception as e:
            print(f"⚠️ Attempt {attempt+1} failed for {section_type}: {e}")
            if "exhausted" in str(e).lower():
                print(f"⚠️ API limit reached. Retrying in {delay} seconds...")
                time.sleep(delay)
            else:
                break
    return f"{section_type} generation failed."


In [6]:
TEXT_BASE_FOLDER = "ncert_texts"
MODEL_ANSWER_FOLDER = "ncert_model_answers"
# Import required module at the beginning of the script
import os

def process_ncert_texts():
    """
    Process NCERT texts with AI.
      - Saves the complete structured content in MODEL_ANSWER_FOLDER.
    """
    for class_name in os.listdir(TEXT_BASE_FOLDER):
        class_path = os.path.join(TEXT_BASE_FOLDER, class_name)
        if os.path.isdir(class_path):
            for subject in os.listdir(class_path):
                subject_path = os.path.join(class_path, subject)
                if os.path.isdir(subject_path):
                    model_answer_subject_folder = os.path.join(MODEL_ANSWER_FOLDER, class_name, subject)
                    os.makedirs(model_answer_subject_folder, exist_ok=True)

                    for chapter_file in os.listdir(subject_path):
                        if chapter_file.endswith(".txt"):
                            chapter_path = os.path.join(subject_path, chapter_file)
                            with open(chapter_path, "r", encoding="utf-8") as f:
                                text = f.read()

                            key_sections = extract_key_sections(text)

                            # For each section that is missing, generate it using Gemini
                            for section in ["Summary", "Definitions", "Solved Examples"]:
                                if key_sections[section] is None:
                                    print(f"⚠️ No {section} found for {chapter_file}. Generating with AI...")
                                    key_sections[section] = generate_text_with_gemini(section, text)

                            # Save the complete model answer in the new folder
                            output_file = os.path.join(model_answer_subject_folder, chapter_file)
                            with open(output_file, "w", encoding="utf-8") as f:
                                for section, content in key_sections.items():
                                    f.write(f"### {section} ###\n{content}\n\n")
                            print(f"✅ Processed {chapter_file}")

# Run the processing to create model answers
process_ncert_texts()

✅ Processed Chapter1.pdf.txt
⚠️ No Definitions found for Chapter10.pdf.txt. Generating with AI...
⚠️ No Solved Examples found for Chapter10.pdf.txt. Generating with AI...
✅ Processed Chapter10.pdf.txt
⚠️ No Definitions found for Chapter11.pdf.txt. Generating with AI...
⚠️ No Solved Examples found for Chapter11.pdf.txt. Generating with AI...
✅ Processed Chapter11.pdf.txt
⚠️ No Definitions found for Chapter12.pdf.txt. Generating with AI...
⚠️ No Solved Examples found for Chapter12.pdf.txt. Generating with AI...
✅ Processed Chapter12.pdf.txt
⚠️ No Definitions found for Chapter13.pdf.txt. Generating with AI...
✅ Processed Chapter13.pdf.txt
⚠️ No Definitions found for Chapter14.pdf.txt. Generating with AI...
⚠️ No Solved Examples found for Chapter14.pdf.txt. Generating with AI...
✅ Processed Chapter14.pdf.txt
⚠️ No Definitions found for Chapter15.pdf.txt. Generating with AI...
⚠️ No Solved Examples found for Chapter15.pdf.txt. Generating with AI...
✅ Processed Chapter15.pdf.txt
⚠️ No Defini

In [9]:
def display_extracted_sections():
    for class_name in os.listdir(MODEL_ANSWER_FOLDER):
        class_path = os.path.join(MODEL_ANSWER_FOLDER, class_name)
        if os.path.isdir(class_path):
            for subject in os.listdir(class_path):
                subject_path = os.path.join(class_path, subject)
                if os.path.isdir(subject_path):
                    print(f"\n📂 Model answers for {class_name}/{subject}:")
                    for chapter_file in sorted(os.listdir(subject_path)):
                        chapter_path = os.path.join(subject_path, chapter_file)
                        print(f"\n🔹 {chapter_file}:\n")
                        with open(chapter_path, "r", encoding="utf-8") as f:
                            print(f.read())
                        print("=" * 80)

display_extracted_sections()



📂 Model answers for class_11/biology:

🔹 Chapter1.pdf.txt:

### Summary ###
The living world is rich in variety. Millions of plants and animals have been

### Definitions ###
of a biological species. Mayr was

### Solved Examples ###
of mango to understand the
of Mangifera indica.
Insects represent a group of organisms sharing common features like

### Exercises ###
No exercises found.



🔹 Chapter10.pdf.txt:

### Summary ###
According to the cell theory, cells arise from preexisting cells. The process by

### Definitions ###
Okay, here's an explanation of cell cycle and cell division, avoiding direct copying and using simpler language:

**Summary:**

Imagine your body is like a LEGO castle. To build it bigger or repair damaged parts, you need more LEGO bricks (cells). The cell cycle is the process where one LEGO brick (cell) duplicates itself to make two identical bricks. This involves careful steps to copy the instructions (DNA), grow bigger, and then split in two. There are two mai

PermissionError: [Errno 13] Permission denied: 'ncert_model_answers\\class_11\\math\\.ipynb_checkpoints'