AI Agent: Textbook to Notebook Notes Generator 

In [None]:
import os
import json
import gradio as gr
import textwrap
import asyncio
import fitz  # PyMuPDF
import sys
from dotenv import load_dotenv
from openai import AsyncOpenAI
from pydantic import BaseModel
from fpdf import FPDF
from datetime import datetime
from pathlib import Path

In [None]:

load_dotenv(override=True)

In [None]:
openai_api_key = os.getenv("OPENAI_API_KEY")
google_api_key = os.getenv("GOOGLE_API_KEY")

if openai_api_key:
    print(f"OpenAI API Key found, starting with: {openai_api_key[:8]}...")
else:
    print("OpenAI API Key not found. Please set it in your .env file.")

if google_api_key:
    print(f"Google API Key found, starting with: {google_api_key[:8]}...")
else:
    print("Google API Key not found. Please set it in your .env file.")


In [None]:
ollama_client = AsyncOpenAI(
    base_url="http://localhost:11434/v1",
    api_key="ollama"
)

gemini_client = AsyncOpenAI(
    api_key=google_api_key,
    base_url="https://generativelanguage.googleapis.com/v1beta/openai/"
)

In [None]:
class Evaluation(BaseModel):
    """MODIFIED: Defines a more detailed evaluator output."""
    is_acceptable: bool
    feedback: str
    clarity_score: int  # Score from 1-5
    accuracy_score: int # Score from 1-5

In [None]:
async def generate_notes(text_chunk: str, retries=2, feedback=""):
    """
    Generate structured notes for a given text chunk using Ollama3:8b.
    Self-corrects based on Gemini evaluation feedback.
    """
    system_prompt = (
        "You are an expert academic assistant. "
        "Read the provided text and produce well-organized, clear Markdown notes. "
        "Focus on key concepts, definitions, and main ideas. "
        "Keep the language simple but precise. "
        "Structure the notes with clear headings and bullet points."
    )

    if feedback:
        user_prompt = (
            f"The previous notes were not acceptable. Improve them using this feedback:\n"
            f"{feedback}\n\nOriginal Text:\n{text_chunk}"
        )
    else:
        user_prompt = f"Generate concise academic notes for the following text:\n{text_chunk}"

    messages = [
        {"role": "system", "content": system_prompt},
        {"role": "user", "content": user_prompt},
    ]

    try:
        response = await ollama_client.chat.completions.create(
            model="llama3:8b",
            messages=messages,
        )
        notes = response.choices[0].message.content
    except Exception as e:
        print(f"Error during Ollama generation: {e}")
        return f"Error generating notes: {e}"

    if retries > 0:
        evaluation = await evaluate_notes(text_chunk, notes)  # pyright: ignore[reportUndefinedVariable]
        
        # MODIFIED: Retry if not acceptable OR if scores are too low
        if not evaluation.is_acceptable or (evaluation.clarity_score < 3 or evaluation.accuracy_score < 3):
            print(f"Evaluation failed or low quality. Retrying with feedback: {evaluation.feedback}")
            return await generate_notes(text_chunk, retries - 1, evaluation.feedback)
        else:
            print("Evaluation passed.")

    return notes

In [None]:
async def generate_final_summary(all_notes: str):
    """
    NEW: Uses Gemini 1.5 Pro to create a high-level summary of all notes.
    """
    print("Generating final summary...")
    prompt = (
        "You are an expert academic summarizer. "
        "Read all the provided notes (which were generated from a textbook) "
        "and generate a concise, high-level executive summary. "
        "This summary should capture the main themes, key takeaways, and "
        "overall structure of the content. Output in clean Markdown."
        f"\n\n--- All Notes ---\n{all_notes}"
    )
    try:
        response = await gemini_client.chat.completions.create(
            model="gemini-2.5-flash",
            messages=[{"role": "user", "content": prompt}],
        )
        return response.choices[0].message.content
    except Exception as e:
        print(f"⚠️ Gemini summary error: {e}")
        return "Error generating final summary."


In [None]:
async def evaluate_notes(text_chunk: str, notes: str) -> Evaluation:
    """
    MODIFIED: Evaluates generated notes using Gemini 1.5 Pro
    for accuracy, clarity, and coverage.
    """
    prompt = (
        "You are a strict quality assurance evaluator. Assess the provided notes "
        "based on their accuracy (do they match the original text?), "
        "clarity (are they easy to understand?), and "
        "completeness (did they miss key concepts from the text?). "
        "Return a JSON object with four keys only:\n"
        "1. is_acceptable (boolean): True if the notes are high quality, False otherwise.\n"
        "2. feedback (string): Specific, actionable feedback for improvement.\n"
        "3. clarity_score (int): A score from 1 (unclear) to 5 (very clear).\n"
        "4. accuracy_score (int): A score from 1 (inaccurate) to 5 (very accurate).\n\n"
        f"--- Original Text ---\n{text_chunk}\n\n"
        f"--- Notes ---\n{notes}"
    )

    try:
        response = await gemini_client.chat.completions.create(
            # MODIFIED: Upgraded to a more powerful model
            model="gemini-2.5-flash", 
            messages=[{"role": "user", "content": prompt}],
            response_format={"type": "json_object"},
        )

        data = json.loads(response.choices[0].message.content)
        return Evaluation(**data)

    except Exception as e:
        print(f" Gemini evaluation error: {e}")
        # MODIFIED: Return a default "pass" object if evaluation fails,
        # to prevent an infinite loop.
        return Evaluation(
            is_acceptable=True,
            feedback=f"Evaluation agent failed: {e}",
            clarity_score=5,
            accuracy_score=5
        )

In [None]:
def chunk_text(text: str, max_chars: int = 2500):
    """Splits long text into smaller chunks for better processing."""
    return textwrap.wrap(text, width=max_chars, break_long_words=False, replace_whitespace=False)

In [None]:
class StyledPDF(FPDF):
    """
    A custom PDF class with enhanced styling and formatting capabilities.
    """
    def __init__(self):
        super().__init__()
        self.current_color = (0, 0, 0)  # Default black
        self.page_height = 297  # A4 height in mm
        self.page_width = 210   # A4 width in mm
        
    def header(self):
        """Custom header with decorative line"""
        self.set_font('Arial', 'I', 8)
        self.set_text_color(128, 128, 128)
        self.cell(0, 10, 'AI Generated Textbook Notes', 0, 0, 'C')
        self.ln(5)
        # Decorative line
        self.set_line_width(0.5)
        self.set_draw_color(200, 200, 200)
        self.line(15, self.get_y(), 195, self.get_y())
        self.ln(5)
        
    def footer(self):
        """Custom footer with page numbers"""
        self.set_y(-15)
        self.set_font('Arial', 'I', 8)
        self.set_text_color(128, 128, 128)
        # Decorative line
        self.set_line_width(0.5)
        self.set_draw_color(200, 200, 200)
        self.line(15, self.get_y() - 5, 195, self.get_y() - 5)
        self.cell(0, 10, f'Page {self.page_no()}', 0, 0, 'C')
        
    def set_title_page(self, title: str, subtitle: str = ""):
        """Create a styled title page"""
        self.add_page()
        
        # Add decorative top border
        self.set_line_width(2)
        self.set_draw_color(0, 51, 102)
        self.line(15, 30, 195, 30)
        
        # Main title
        self.set_font('Arial', 'B', 24)
        self.set_text_color(0, 51, 102)
        self.ln(50)
        
        # Center the title
        title_lines = self.split_text_to_lines(title, 180)
        for line in title_lines:
            self.cell(0, 15, line, 0, 1, 'C')
        
        self.ln(10)
        
        # Subtitle
        if subtitle:
            self.set_font('Arial', 'I', 14)
            self.set_text_color(102, 102, 102)
            subtitle_lines = self.split_text_to_lines(subtitle, 180)
            for line in subtitle_lines:
                self.cell(0, 10, line, 0, 1, 'C')
        
        self.ln(20)
        
        # Add generation info
        self.set_font('Arial', '', 12)
        self.set_text_color(128, 128, 128)
        from datetime import datetime
        date_str = datetime.now().strftime("%B %d, %Y")
        info_text = f"Generated on {date_str}"
        self.cell(0, 8, info_text, 0, 1, 'C')
        
        self.ln(30)
        
        # Decorative bottom border
        self.set_line_width(2)
        self.set_draw_color(0, 51, 102)
        self.line(15, self.get_y(), 195, self.get_y())
        
        self.ln(30)
        
    def add_table_of_contents(self, sections):
        """Add a table of contents"""
        self.add_page()
        
        # TOC Title
        self.set_font('Arial', 'B', 18)
        self.set_text_color(0, 51, 102)
        self.cell(0, 15, 'Table of Contents', 0, 1, 'C')
        self.ln(10)
        
        # Decorative line
        self.set_line_width(1)
        self.set_draw_color(0, 51, 102)
        self.line(30, self.get_y(), 180, self.get_y())
        self.ln(10)
        
        # TOC entries
        self.set_font('Arial', '', 12)
        self.set_text_color(0, 0, 0)
        
        for i, section in enumerate(sections, 1):
            # Section number and title
            self.cell(20, 8, f"{i}.", 0, 0, 'L')
            self.cell(120, 8, section['title'], 0, 1, 'L')
            
        self.ln(20)
        
    def split_text_to_lines(self, text, max_width):
        """Helper method to split text into lines that fit within max_width"""
        words = text.split()
        lines = []
        current_line = ""
        
        for word in words:
            test_line = current_line + (" " if current_line else "") + word
            if self.get_string_width(test_line) <= max_width:
                current_line = test_line
            else:
                if current_line:
                    lines.append(current_line)
                current_line = word
                
        if current_line:
            lines.append(current_line)
            
        return lines
        
    def add_section_header(self, title: str):
        """Add a styled section header"""
        self.ln(10)
        self.set_font('Arial', 'B', 16)
        self.set_text_color(0, 51, 102)
        self.cell(0, 12, title, 0, 1, 'L')
        
        # Decorative underline
        self.set_line_width(0.8)
        self.set_draw_color(0, 51, 102)
        self.line(15, self.get_y(), 100, self.get_y())
        self.ln(8)
        
    def add_content_line(self, text: str, indent: int = 0):
        """Add a line of content with optional indentation"""
        self.set_font('Arial', '', 11)
        self.set_text_color(0, 0, 0)
        
        # Handle bullet points
        if text.strip().startswith('•') or text.strip().startswith('-'):
            # Bullet point
            bullet = text.strip()[0]
            content = text.strip()[1:].strip()
            self.cell(10, 6, bullet, 0, 0, 'L')
            self.cell(0, 6, content, 0, 1, 'L')
        else:
            # Regular text with word wrapping
            if indent > 0:
                self.cell(indent, 6, '', 0, 0, 'L')
            
            lines = self.split_text_to_lines(text, 180 - indent)
            if lines:
                if len(lines) == 1:
                    self.cell(0, 6, lines[0], 0, 1, 'L')
                else:
                    self.cell(0, 6, lines[0], 0, 1, 'L')
                    for line in lines[1:]:
                        if indent > 0:
                            self.cell(indent, 6, '', 0, 0, 'L')
                        self.cell(0, 6, line, 0, 1, 'L')

    def process_markdown_content(self, markdown_content: str):
        """Process markdown content and add to PDF with proper formatting"""
        lines = markdown_content.split('\n')
        i = 0
        
        while i < len(lines):
            line = lines[i].strip()
            
            if not line:
                # Empty line
                self.ln(4)
                i += 1
                continue
                
            if line.startswith('# '):
                # Main heading
                title = line[2:].strip()
                self.add_section_header(title)
                
            elif line.startswith('## '):
                # Subheading
                title = line[3:].strip()
                self.set_font('Arial', 'B', 14)
                self.set_text_color(51, 102, 153)
                self.ln(5)
                self.cell(0, 10, title, 0, 1, 'L')
                self.set_line_width(0.5)
                self.set_draw_color(200, 200, 200)
                self.line(15, self.get_y(), 100, self.get_y())
                self.ln(8)
                
            elif line.startswith('### '):
                # Sub-subheading
                title = line[4:].strip()
                self.set_font('Arial', 'B', 12)
                self.set_text_color(102, 102, 153)
                self.ln(3)
                self.cell(0, 8, title, 0, 1, 'L')
                self.ln(3)
                
            elif line.startswith('- ') or line.startswith('• '):
                # Bullet point
                content = line[2:].strip()
                # Handle multi-line bullet points
                j = i + 1
                while j < len(lines) and lines[j].strip() and not lines[j].strip().startswith('#') and not lines[j].strip().startswith('-'):
                    if lines[j].strip().startswith('  ') or lines[j].strip().startswith('\t'):
                        content += ' ' + lines[j].strip()
                    else:
                        break
                    j += 1
                i = j - 1
                self.add_content_line('• ' + content, indent=15)
                
            elif line.startswith('**') and line.endswith('**'):
                # Bold text
                text = line[2:-2]
                self.set_font('Arial', 'B', 11)
                self.set_text_color(0, 0, 0)
                self.add_content_line(text)
                
            else:
                # Regular paragraph
                if line:
                    self.add_content_line(line)
                    
            i += 1



In [None]:
def create_styled_pdf_file(notes_markdown: str, source_filename: str) -> str:
    """
    Create a beautifully formatted PDF with enhanced styling.
    """
    title = os.path.splitext(os.path.basename(source_filename))[0].replace('_', ' ').title()
    output_filename = f"{os.path.splitext(source_filename)[0]}_styled_notes.pdf"
    
    # Create styled PDF instance
    pdf = StyledPDF()
    
    # Set up document properties
    pdf.set_title("AI Generated Textbook Notes")
    
    # Create title page
    pdf.set_title_page(
        title=title,
        subtitle="Generated by AI Textbook Notes Generator"
    )
    
    # Parse content to extract sections for table of contents
    sections = []
    lines = notes_markdown.split('\n')
    current_section = None
    
    for line in lines:
        line = line.strip()
        if line.startswith('# '):
            if current_section:
                sections.append(current_section)
            current_section = {'title': line[2:].strip()}
        elif line.startswith('## ') and current_section:
            current_section['title'] = line[3:].strip()
    
    if current_section:
        sections.append(current_section)
    
    # Add table of contents
    pdf.add_table_of_contents(sections)
    
    # Process and add content
    try:
        pdf.process_markdown_content(notes_markdown)
        
        # Save the PDF
        pdf.output(output_filename)
        print(f"✨ Beautiful PDF created successfully: {output_filename}")
        return output_filename
        
    except Exception as e:
        print(f"Error creating styled PDF: {e}")
        
        # Fallback: Create error report with same styling
        try:
            error_pdf = StyledPDF()
            error_pdf.add_page()
            
            # Error title page
            error_pdf.set_title_page(
                title="Error Report",
                subtitle="PDF Generation Failed"
            )
            
            # Error details
            error_section = "Error Details\n\n" + str(e)
            error_pdf.process_markdown_content(error_section)
            
            error_pdf.output(output_filename)
            print(f"Error report saved: {output_filename}")
            return output_filename
            
        except Exception as e2:
            print(f"CRITICAL: Failed to create error report: {e2}")
            return None



In [None]:
async def process_textbook(file, progress=gr.Progress()):
    """
    Enhanced main orchestrator function with better progress tracking.
    """
    if file is None:
        return None

    pdf_file_path = file.name
    
    # STEP 1: Extract Text with PyMuPDF (fitz)
    print("Step 1/5: Extracting Text...")
    progress(0, desc="Step 1/5: Extracting Text...")
    full_text = ""
    try:
        doc = fitz.open(pdf_file_path)
        num_pages = len(doc)
        for i, page in enumerate(doc):
            progress((i + 1) / num_pages, desc=f"Extracting from Page {i + 1}/{num_pages}")
            full_text += page.get_text() + "\n"
        doc.close()
    except Exception as e:
        print(f"Error extracting text with PyMuPDF: {e}")
        return None

    if not full_text.strip():
        print("No text could be extracted from the PDF.")
        return None

    print(f"✅ Text extraction complete. Total characters: {len(full_text)}")
    
    # STEP 2: Chunk Text
    chunks = chunk_text(full_text)
    num_chunks = len(chunks)
    all_notes = []
    
    # STEP 3: Generate Notes
    print(f"Step 2/5: Generating notes from {num_chunks} text chunks...")
    progress(0, desc="Step 2/5: Generating Notes...")
    for i, chunk in enumerate(chunks):
        progress((i + 1) / num_chunks, desc=f"Processing Chunk {i + 1}/{num_chunks}")
        notes_chunk = await generate_notes(chunk)
        all_notes.append(notes_chunk)
        
    combined_notes = "\n\n---\n\n".join(all_notes)
    print("✅ Notes generation complete")

    # STEP 4: Generate Final Summary
    print("Step 3/5: Generating Final Summary...")
    progress(0.8, desc="Step 3/5: Generating Final Summary...")
    final_summary = await generate_final_summary(combined_notes)
    print("✅ Summary generation complete")
    
    final_markdown = f"# Executive Summary\n\n{final_summary}\n\n---\n\n# Detailed Notes\n\n{combined_notes}"

    # STEP 5: Create Styled PDF
    print("Step 4/5: Creating Beautiful PDF...")
    progress(0.9, desc="Step 4/5: Creating Beautiful PDF...")
    pdf_path = create_styled_pdf_file(final_markdown, pdf_file_path)
    
    progress(1.0, desc="Step 5/5: Complete!")
    print("🎉 All processing complete!")
    
    return pdf_path

In [None]:
async def create_notes_interface(file, progress=gr.Progress(track_tqdm=True)):
    """Enhanced interface function with better feedback"""
    if file is not None:
        try:
            result = await process_textbook(file, progress)
            if result:
                return result
            else:
                return "❌ Failed to generate notes. Please check the input file."
        except Exception as e:
            return f"❌ An error occurred: {str(e)}"
    return "📚 Please upload a textbook (PDF format) to begin generating notes."

In [None]:
# Enhanced Gradio interface
with gr.Blocks(css="""
    .gradio-container { 
        max-width: 800px !important; 
        margin: auto !important; 
        font-family: 'Segoe UI', Tahoma, Geneva, Verdana, sans-serif;
    }
    .title { 
        text-align: center; 
        color: #003366; 
        font-size: 28px; 
        margin-bottom: 10px;
    }
    .description { 
        text-align: center; 
        color: #666; 
        margin-bottom: 30px;
    }
""") as iface:
    
    gr.HTML("""
    <div class="title">📚 AI Textbook → PDF Notes Generator (v3.0)</div>
    <div class="description">
        Transform any textbook PDF into beautifully formatted study notes. 
        Upload your textbook and receive a professionally styled PDF with executive summary and detailed notes.
    </div>
    """)
    
    with gr.Row():
        with gr.Column(scale=1):
            file_input = gr.File(
                label="📖 Upload Your Textbook (PDF)", 
                file_types=[".pdf"],
                elem_id="file-upload"
            )
            
        with gr.Column(scale=1):
            output_file = gr.File(
                label="📥 Download Generated Notes (.pdf)", 
                elem_id="file-download"
            )
    
    submit_btn = gr.Button("🚀 Generate Notes", variant="primary", size="lg")
    
    # Enhanced status display
    status_display = gr.Textbox(
        label="📊 Processing Status", 
        value="Ready to process your textbook",
        interactive=False,
        elem_id="status-box"
    )
    
    # Connect interface elements
    submit_btn.click(
        fn=create_notes_interface,
        inputs=[file_input],
        outputs=[output_file],
        show_progress="full"
    )

if __name__ == "__main__":
    print("🚀 Starting AI Textbook Notes Generator...")
    print("🌐 Interface will be available at: http://127.0.0.1:7860")
    iface.launch(server_name="127.0.0.1", share=False, show_error=True)