
# AI Agent: Textbook to Notebook Notes Generator
 
This AI agent processes textbook PDFs and converts them into concise, structured notes.
 The workflow involves:
 1. Text extraction from PDF
 2. Chunking for processing
 3. Note generation using LLaMA3:8b
 4. Quality evaluation using ChatGPT
 5. PDF output creation


In [19]:
!ollama pull llama3:8b
!ollama serve

[?2026h[?25l[1Gpulling manifest ⠋ [K[?25h[?2026l[?2026h[?25l[1Gpulling manifest ⠙ [K[?25h[?2026l[?2026h[?25l[1Gpulling manifest ⠹ [K[?25h[?2026l[?2026h[?25l[1Gpulling manifest ⠸ [K[?25h[?2026l[?2026h[?25l[1Gpulling manifest ⠼ [K[?25h[?2026l[?2026h[?25l[1Gpulling manifest ⠴ [K[?25h[?2026l[?2026h[?25l[1Gpulling manifest ⠦ [K[?25h[?2026l[?2026h[?25l[1Gpulling manifest ⠧ [K[?25h[?2026l[?2026h[?25l[1Gpulling manifest ⠇ [K[?25h[?2026l[?2026h[?25l[1Gpulling manifest ⠏ [K[?25h[?2026l[?2026h[?25l[1Gpulling manifest [K
pulling 6a0746a1ec1a: 100% ▕██████████████████▏ 4.7 GB                         [K
pulling 4fa551d4f938: 100% ▕██████████████████▏  12 KB                         [K
pulling 8ab4849b038c: 100% ▕██████████████████▏  254 B                         [K
pulling 577073ffcc6c: 100% ▕██████████████████▏  110 B                         [K
pulling 3f8eb4da87fa: 100% ▕██████████████████▏  485 B                         [K
verify

In [20]:

import os
import json
import gradio as gr
import textwrap
import asyncio
try:
    import fitz  # PyMuPDF for PDF text extraction
except Exception:
    fitz = None
from dotenv import load_dotenv
from openai import AsyncOpenAI
from pydantic import BaseModel
from fpdf import FPDF
from datetime import datetime
import re

In [21]:
# Load environment variables from .env file
# This is where API keys are stored securely
load_dotenv(override=True)

True

In [22]:
# Retrieve API keys from environment variables
openai_api_key = os.getenv("OPENAI_API_KEY")  # For ChatGPT evaluation
google_api_key = os.getenv("GOOGLE_API_KEY")  # Not used in this version

# Check if required API keys are available
if openai_api_key:
    print(f"OpenAI API Key found, starting with: {openai_api_key[:8]}...")
else:
    print("OpenAI API Key not found. Please set it in your .env file.")

OpenAI API Key found, starting with: sk-proj-...


In [23]:
# Initialize API clients for different AI models

# LLaMA client - runs locally via Ollama
ollama_client = AsyncOpenAI(
    base_url="http://localhost:11434/v1",  # Local Ollama server
    api_key="ollama"  # Dummy API key for local Ollama
)

# ChatGPT client - for evaluation (using free API)
chatgpt_client = AsyncOpenAI(
    api_key=openai_api_key,
    base_url="https://api.openai.com/v1"  # Official OpenAI API endpoint
)

In [24]:
class Evaluation(BaseModel):
    """Defines the structure for evaluation results from ChatGPT."""
    is_acceptable: bool  # Whether the notes meet quality standards
    feedback: str       # Specific feedback for improvement


In [25]:
async def generate_notes(text_chunk: str, retries: int = 2, feedback: str = "") -> str:
    """
    Generate structured notes in PDF-friendly format for a given text chunk using LLaMA3:8b.
    
    Args:
        text_chunk: The text to process and summarize
        retries: Number of retry attempts if evaluation fails
        feedback: Previous evaluation feedback for improvement
        
    Returns:
        str: Generated notes in markdown-like format
    """
    
    # System prompt to guide the AI in note generation
    system_prompt = (
        "You are an expert academic assistant. "
        "Produce clear, well-structured notes formatted for PDF output. "
        "Use clear headings, bullet points, and concise summaries. "
        "Focus on key ideas, definitions, and concepts from the provided text. "
        "Format the content in a way that will look good when converted to PDF."
    )

    # Build user prompt with feedback if available (for retries)
    if feedback:
        user_prompt = (
            f"Improve the previous notes using this feedback:\n{feedback}\n\n"
            f"Original text:\n{text_chunk}"
        )
    else:
        user_prompt = f"Generate concise academic notes in PDF-friendly format for the following text:\n{text_chunk}"

    # Prepare messages for the AI model
    messages = [
        {"role": "system", "content": system_prompt},
        {"role": "user", "content": user_prompt},
    ]

    try:
        # Call LLaMA model to generate notes
        response = await ollama_client.chat.completions.create(
            model="llama3:8b",  # Local LLaMA model
            messages=messages,
        )
        notes = response.choices[0].message.content
        print(f"✅ Generated notes for chunk ({len(text_chunk)} chars)")
    except Exception as e:
        print(f"❌ Ollama generation error: {e}")
        return f"Error generating notes: {e}"

    # Evaluate notes and retry if needed
    if retries > 0:
        evaluation = await evaluate_notes(text_chunk, notes)
        if not evaluation.is_acceptable:
            print(f"🔁 Retrying with feedback: {evaluation.feedback}")
            return await generate_notes(text_chunk, retries - 1, evaluation.feedback)
        else:
            print("✅ Evaluation passed.")
    
    return notes

In [26]:
async def evaluate_notes(text_chunk: str, notes: str) -> Evaluation:
    """
    Evaluates generated notes using ChatGPT for accuracy and clarity.
    
    Args:
        text_chunk: Original text that was processed
        notes: Generated notes to evaluate
        
    Returns:
        Evaluation: Object containing acceptability and feedback
    """
    
    # Prompt for ChatGPT evaluation
    prompt = (
        "You are a quality evaluator for academic notes. Check if:\n"
        "1. The notes accurately summarize the key points from the original text\n"
        "2. The notes are well-structured and organized\n"
        "3. The content is clear and easy to understand\n"
        "4. Important concepts, definitions, and ideas are captured\n\n"
        "Respond in JSON format with these exact keys: 'is_acceptable' (boolean) and 'feedback' (string).\n\n"
        f"--- Original Text ---\n{text_chunk}\n\n"
        f"--- Generated Notes ---\n{notes}"
    )

    try:
        # Call ChatGPT for evaluation
        response = await chatgpt_client.chat.completions.create(
            model="gpt-3.5-turbo",  # Using free tier model
            messages=[{"role": "user", "content": prompt}],
            response_format={"type": "json_object"},  # Force JSON response
        )
        
        # Parse the JSON response
        data = json.loads(response.choices[0].message.content)
        evaluation = Evaluation(**data)
        print(f"✅ Evaluation completed: {'PASS' if evaluation.is_acceptable else 'FAIL'}")
        return evaluation
        
    except Exception as e:
        print(f"⚠️ ChatGPT evaluation error: {e}")
        # Return acceptable by default if evaluation fails
        return Evaluation(is_acceptable=True, feedback=f"Evaluation failed: {e}")

In [27]:
def chunk_text(text: str, max_chars: int = 2500) -> list:
    """
    Splits long text into smaller chunks for better processing by AI models.
    
    Args:
        text: The full text to split into chunks
        max_chars: Maximum characters per chunk
        
    Returns:
        list: List of text chunks
    """
    return textwrap.wrap(text, width=max_chars, break_long_words=False, replace_whitespace=False)


In [28]:
def sanitize_text(text: str) -> str:
    """
    Remove or replace problematic Unicode characters that cause font issues in PDF.
    
    Args:
        text: Input text that may contain special characters
        
    Returns:
        str: Sanitized text with ASCII equivalents
    """
    # Replace common problematic Unicode characters with ASCII equivalents
    replacements = {
        '•': '-',      # bullet to hyphen
        '–': '-',      # en dash to hyphen
        '—': '-',      # em dash to hyphen
        '“': '"',      # left double quote
        '”': '"',      # right double quote
        '‘': "'",      # left single quote
        '’': "'",      # right single quote
        '…': '...',    # ellipsis
        '→': '->',     # right arrow
        '←': '<-',     # left arrow
        '≥': '>=',     # greater than or equal
        '≤': '<=',     # less than or equal
        '×': 'x',      # multiplication sign
        '÷': '/',      # division sign
        '±': '+/-',    # plus-minus
    }
    
    for unicode_char, ascii_char in replacements.items():
        text = text.replace(unicode_char, ascii_char)
    
    return text

In [29]:
def create_pdf_file(notes_markdown: str, source_filename: str) -> str:
    """
    Convert notes into a simple PDF with raw text formatting (no styling).
    
    Args:
        notes_markdown: Generated notes in markdown-like format
        source_filename: Original PDF filename for naming output
        
    Returns:
        str: Path to the generated PDF file
    """
    
    # Create output filename based on source
    title = os.path.splitext(os.path.basename(source_filename))[0].replace('_', ' ').title()
    output_filename = f"{os.path.splitext(source_filename)[0]}_notes.pdf"

    # Create PDF with basic settings
    pdf = FPDF()
    pdf.set_auto_page_break(auto=True, margin=15)
    
    # Add a page
    pdf.add_page()
    
    # Use only basic font (no styling)
    pdf.set_font("Courier", size=10)  # Monospace font for raw text
    
    # Add basic header information
    pdf.cell(0, 10, f"Academic Notes: {title}", 0, 1)
    pdf.cell(0, 8, f"Generated on: {datetime.now().strftime('%Y-%m-%d %H:%M')}", 0, 1)
    pdf.ln(10)
    
    # Process the notes content as raw text
    lines = notes_markdown.split('\n')
    
    for line in lines:
        # Sanitize text to remove problematic characters
        safe_line = sanitize_text(line)
        
        # Simply add each line as text (no special formatting)
        if safe_line.strip():  # Only add non-empty lines
            # Use multi_cell for automatic line wrapping
            pdf.multi_cell(0, 5, safe_line)
        pdf.ln(2)  # Small spacing between lines
    
    # Add basic footer
    pdf.set_y(-15)
    pdf.set_font("Courier", size=8)
    pdf.cell(0, 10, f"Source: {os.path.basename(source_filename)}", 0, 0, 'C')
    
    try:
        # Save the PDF file
        pdf.output(output_filename)
        print(f"✅ PDF created: {output_filename}")
        return output_filename
    except Exception as e:
        print(f"⚠️ PDF creation error: {e}")
        # Create a simple text fallback if PDF fails
        fallback = f"{os.path.splitext(source_filename)[0]}_notes.txt"
        with open(fallback, "w", encoding="utf-8") as f:
            f.write(f"Notes for {title}\n")
            f.write(f"Generated on: {datetime.now().strftime('%Y-%m-%d %H:%M')}\n\n")
            f.write(notes_markdown)
        print(f"Saved fallback text file: {fallback}")
        return fallback

In [30]:
async def process_textbook(file, progress=gr.Progress()):
    """
    Main processing pipeline: Extracts text, generates notes, evaluates quality, and creates PDF.
    
    Args:
        file: Uploaded PDF file object
        progress: Gradio progress tracker
        
    Returns:
        tuple: Status message and path to generated PDF
    """
    if file is None:
        return "Please upload a textbook to begin.", None

    try:
        from PyPDF2 import PdfReader
        
        pdf_file_path = file.name
        reader = PdfReader(pdf_file_path)
        num_pages = len(reader.pages)

        # Step 1 — Extract text from PDF
        progress(0, desc="Step 1/4: Extracting text...")
        full_text = ""
        for i, page in enumerate(reader.pages):
            progress((i + 1) / num_pages, desc=f"Extracting Page {i + 1}/{num_pages}")
            page_text = page.extract_text()
            if page_text:
                full_text += page_text + "\n"

        if not full_text.strip():
            return "⚠️ No text extracted from the PDF.", None

        print(f"✅ Extracted {len(full_text)} characters from {num_pages} pages.")

        # Step 2 — Chunk text and generate notes
        chunks = chunk_text(full_text)
        num_chunks = len(chunks)
        all_notes = []

        progress(0, desc="Step 2/4: Generating notes...")
        for i, chunk in enumerate(chunks):
            progress((i + 1) / num_chunks, desc=f"Generating Chunk {i + 1}/{num_chunks}")
            notes_chunk = await generate_notes(chunk)
            all_notes.append(notes_chunk)

        # Combine all notes with separators
        combined_notes = "\n\n---\n\n".join(all_notes)

        # Step 3 — Create PDF
        progress(1, desc="Step 3/4: Creating PDF...")
        pdf_output_path = create_pdf_file(combined_notes, pdf_file_path)

        # Step 4 — Return results
        if os.path.exists(pdf_output_path):
            message = f"✅ Notes generated successfully!\n\n**Saved as:** {os.path.basename(pdf_output_path)}"
            return message, pdf_output_path
        else:
            return "❌ PDF generation failed.", None
            
    except ImportError:
        return "❌ PyPDF2 not installed. Please install it using: pip install PyPDF2", None
    except Exception as e:
        return f"❌ Error processing PDF: {str(e)}", None

In [31]:

async def create_notes_interface(file, progress=gr.Progress(track_tqdm=True)):
    """
    Handles Gradio interface flow for the note generation process.
    
    Args:
        file: Uploaded file from Gradio
        progress: Progress tracking object
        
    Returns:
        tuple: Status message and file path
    """
    if file is not None:
        return await process_textbook(file, progress)
    return "Please upload a textbook.", None


In [32]:
# Alternative PDF creation using reportlab (commented out since we're using raw text approach)
def create_pdf_alternative(notes_markdown: str, source_filename: str) -> str:
    """
    Alternative PDF creation method (not used in current implementation).
    Kept as backup option.
    """
    try:
        from reportlab.lib.pagesizes import letter
        from reportlab.pdfgen import canvas
        # Implementation would go here...
        return None
    except ImportError:
        print("ReportLab not available")
        return None
    except Exception as e:
        print(f"⚠️ Alternative PDF creation failed: {e}")
        return None

In [33]:
# Create Gradio web interface
iface = gr.Interface(
    fn=create_notes_interface,  # Main processing function
    inputs=gr.File(label="📘 Upload Textbook (PDF)"),  # File upload input
    outputs=[
        gr.Markdown(label="🧾 Status / Summary"),  # Status display
        gr.File(label="📥 Download Generated Notes (.pdf)")  # Download output
    ],
    title="AI Textbook → Notebook Notes Generator",
    description=(
        "Upload any textbook (PDF). The local LLaMA 3 (8B) model summarizes it into concise academic notes. "
        "ChatGPT evaluates quality, and the output is saved as a raw text PDF."
    ),
    allow_flagging="never"  # Disable Gradio's flagging feature
)

# Launch the application
if __name__ == "__main__":
    print("🚀 Starting Textbook to Notes Generator...")
    print("📚 Upload a PDF textbook to generate summarized notes")
    iface.launch(server_name="127.0.0.1", share=False)



🚀 Starting Textbook to Notes Generator...
📚 Upload a PDF textbook to generate summarized notes
* Running on local URL:  http://127.0.0.1:7889
* To create a public link, set `share=True` in `launch()`.


ERROR:    Exception in ASGI application
Traceback (most recent call last):
  File "c:\Upendra\Git Hub\Git Hub -- K-Upendra-7\abcd-agentic-training-vnr-upendra\AI-Agent-Textbook-Notebook\.venv\Lib\site-packages\uvicorn\protocols\http\h11_impl.py", line 403, in run_asgi
    result = await app(  # type: ignore[func-returns-value]
             ^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^
        self.scope, self.receive, self.send
        ^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^
    )
    ^
  File "c:\Upendra\Git Hub\Git Hub -- K-Upendra-7\abcd-agentic-training-vnr-upendra\AI-Agent-Textbook-Notebook\.venv\Lib\site-packages\uvicorn\middleware\proxy_headers.py", line 60, in __call__
    return await self.app(scope, receive, send)
           ^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^
  File "c:\Upendra\Git Hub\Git Hub -- K-Upendra-7\abcd-agentic-training-vnr-upendra\AI-Agent-Textbook-Notebook\.venv\Lib\site-packages\fastapi\applications.py", line 1133, in __call__
    await super().__call__(scope, rec

✅ Extracted 19920 characters from 11 pages.
✅ Generated notes for chunk (2493 chars)
⚠️ ChatGPT evaluation error: Error code: 429 - {'error': {'message': 'You exceeded your current quota, please check your plan and billing details. For more information on this error, read the docs: https://platform.openai.com/docs/guides/error-codes/api-errors.', 'type': 'insufficient_quota', 'param': None, 'code': 'insufficient_quota'}}
✅ Evaluation passed.
✅ Generated notes for chunk (2492 chars)
⚠️ ChatGPT evaluation error: Error code: 429 - {'error': {'message': 'You exceeded your current quota, please check your plan and billing details. For more information on this error, read the docs: https://platform.openai.com/docs/guides/error-codes/api-errors.', 'type': 'insufficient_quota', 'param': None, 'code': 'insufficient_quota'}}
✅ Evaluation passed.
✅ Generated notes for chunk (2495 chars)
⚠️ ChatGPT evaluation error: Error code: 429 - {'error': {'message': 'You exceeded your current quota, please c

  pdf.cell(0, 10, f"Academic Notes: {title}", 0, 1)
  pdf.cell(0, 8, f"Generated on: {datetime.now().strftime('%Y-%m-%d %H:%M')}", 0, 1)
  pdf.cell(0, 10, f"Source: {os.path.basename(source_filename)}", 0, 0, 'C')
