# AI Agent: Textbook to Notebook Notes Generator 
Using LLaMA 3:8B for generation and GPT-4o-mini for evaluation


In [1]:
import os
import json
import gradio as gr
import textwrap
import asyncio
from PyPDF2 import PdfReader
from dotenv import load_dotenv
from openai import AsyncOpenAI
from pydantic import BaseModel
from fpdf import FPDF
from datetime import datetime
import re

In [2]:
load_dotenv(override=True)

True

In [3]:
openai_api_key = os.getenv("OPENAI_API_KEY")

if openai_api_key:
    print(f"OpenAI API Key found, starting with: {openai_api_key[:8]}...")
else:
    print("OpenAI API Key not found. Please set it in your .env file.")
    print("You can get a free API key from: https://platform.openai.com/api-keys")

OpenAI API Key not found. Please set it in your .env file.
You can get a free API key from: https://platform.openai.com/api-keys


In [4]:
# Initialize clients for both LLaMA (local) and OpenAI (evaluation)
llama_client = AsyncOpenAI(
    base_url="http://localhost:11434/v1",
    api_key="ollama"  # dummy key for Ollama
)

openai_client = AsyncOpenAI(api_key=openai_api_key)

In [5]:
class Evaluation(BaseModel):
    """Defines evaluator output."""
    is_acceptable: bool
    feedback: str

In [6]:
async def generate_notes_with_llama(text_chunk: str, chapter_info: str = "", retries=2, feedback=""):
    """
    Generate structured notes in PDF-friendly format using LLaMA 3:8B.
    Auto-retries with feedback from GPT-4o-mini evaluation if needed.
    """
    system_prompt = (
        "You are an expert academic assistant specialized in creating comprehensive textbook notes. "
        "Your task is to produce clear, well-structured notes that are perfect for PDF output.\n\n"
        "GUIDELINES:\n"
        "1. Use clear hierarchical headings (Chapter Title, Main Sections, Subsections)\n"
        "2. Include bullet points for key concepts and definitions\n"
        "3. Provide concise summaries of main ideas\n"
        "4. Highlight important formulas, theorems, or principles\n"
        "5. Include practical examples or applications when available\n"
        "6. Structure content for easy reading and studying\n"
        "7. Use clean formatting without special symbols like *, #, +, - except for bullet points\n"
        "8. Focus on extracting the most valuable academic content that students would need for exams and understanding.\n\n"
        "IMPORTANT: Avoid using special symbols *, #, +, - in the content except for proper bullet points."
    )

    if chapter_info:
        user_prompt = (
            f"Generate comprehensive academic notes for CHAPTER: {chapter_info}\n\n"
            f"TEXTBOOK CONTENT:\n{text_chunk}\n\n"
            "Please create well-structured notes with the following sections:\n"
            "1. Chapter Overview & Key Objectives\n"
            "2. Main Concepts & Definitions\n"
            "3. Important Formulas/Theorems/Principles\n"
            "4. Key Examples & Applications\n"
            "5. Chapter Summary & Key Takeaways\n"
            "6. Study Questions (if applicable)\n\n"
            "Format using clear headings and organized sections. Avoid special symbols."
        )
    else:
        user_prompt = f"Generate comprehensive academic notes for the following textbook content:\n{text_chunk}"

    if feedback:
        user_prompt = f"IMPROVEMENT FEEDBACK FROM PREVIOUS ATTEMPT:\n{feedback}\n\nPlease revise your notes accordingly:\n{user_prompt}"

    messages = [
        {"role": "system", "content": system_prompt},
        {"role": "user", "content": user_prompt},
    ]

    try:
        print(f"📝 Generating notes with LLaMA for: {chapter_info[:50]}...")
        response = await llama_client.chat.completions.create(
            model="llama3:8b",
            messages=messages,
            temperature=0.3,
            max_tokens=2000,
        )
        notes = response.choices[0].message.content
        print(f"✅ LLaMA generation completed for: {chapter_info[:50]}...")
        
        # Apply guard rails to clean the notes
        notes = apply_guard_rails(notes)
        
    except Exception as e:
        print(f"❌ LLaMA generation error: {e}")
        return f"Error generating notes with LLaMA: {e}"

    # Evaluate quality and retry if needed
    if retries > 0:
        evaluation = await evaluate_notes_with_gpt4omini(text_chunk, notes, chapter_info)
        if not evaluation.is_acceptable:
            print(f"🔁 Retrying generation with feedback: {evaluation.feedback[:100]}...")
            return await generate_notes_with_llama(text_chunk, chapter_info, retries - 1, evaluation.feedback)
        else:
            print(f"✅ Evaluation passed for: {chapter_info[:50]}...")
    return notes

In [7]:
def apply_guard_rails(text: str) -> str:
    """
    Apply guard rails to clean unwanted symbols and ensure proper formatting.
    """
    # Remove unwanted symbols but preserve structure
    cleaned_text = text
    
    # Replace multiple special symbols with clean alternatives
    replacements = {
        # Remove or replace problematic symbols
        '***': '---',
        '**': '',  # Remove asterisks used for emphasis
        '*': '•',  # Convert single asterisks to proper bullets
        '##': '==',
        '#': '',   # Remove hash symbols
        '++': '',  # Remove plus symbols
        '--': '—', # Convert to proper em dash
    }
    
    for symbol, replacement in replacements.items():
        cleaned_text = cleaned_text.replace(symbol, replacement)
    
    # Remove any remaining isolated special symbols
    cleaned_text = re.sub(r'(?<!\w)[*#+](?!\w)', '', cleaned_text)
    
    # Ensure proper line breaks and spacing
    cleaned_text = re.sub(r'\n\s*\n', '\n\n', cleaned_text)
    cleaned_text = re.sub(r'[ \t]+', ' ', cleaned_text)
    
    # Clean up bullet points
    cleaned_text = re.sub(r'^[\s]*[-*•][\s]+', '• ', cleaned_text, flags=re.MULTILINE)
    
    return cleaned_text.strip()

In [8]:
async def evaluate_notes_with_gpt4omini(original_text: str, generated_notes: str, chapter_info: str = "") -> Evaluation:
    """
    Evaluates generated notes using GPT-4o-mini for accuracy, completeness, and academic quality.
    """
    evaluation_prompt = (
        "You are a strict academic quality evaluator for textbook notes. "
        "Evaluate the generated notes based on these CRITICAL criteria:\n\n"
        "ESSENTIAL CRITERIA:\n"
        "1. ACCURACY: Do the notes correctly represent the original content without factual errors?\n"
        "2. COMPLETENESS: Are all key concepts, definitions, and important information included?\n"
        "3. STRUCTURE: Is the content well-organized with clear headings and logical flow?\n"
        "4. CLARITY: Is the information presented clearly and concisely for student understanding?\n"
        "5. ACADEMIC VALUE: Would these notes be genuinely useful for studying and exam preparation?\n"
        "6. CLEAN FORMATTING: Are the notes free of unwanted symbols like *, #, +, - except for proper formatting?\n\n"
        "EVALUATION RULES:\n"
        "- REJECT if any key concepts are missing or misrepresented\n"
        "- REJECT if the structure is confusing or poorly organized\n"
        "- REJECT if the notes lack academic depth or practical study value\n"
        "- REJECT if there are excessive special symbols that affect readability\n"
        "- ACCEPT only if all criteria are satisfactorily met\n\n"
        f"CHAPTER CONTEXT: {chapter_info}\n\n"
        f"ORIGINAL TEXT (excerpt):\n{original_text[:1500]}...\n\n"
        f"GENERATED NOTES:\n{generated_notes}\n\n"
        "Provide your evaluation in JSON format with these exact keys:\n"
        "- 'is_acceptable' (boolean): true only if all criteria are met\n"
        "- 'feedback' (string): Specific, actionable feedback for improvement if rejected, or confirmation of quality if accepted"
    )

    try:
        print(f"🧪 Evaluating notes with GPT-4o-mini for: {chapter_info[:50]}...")
        response = await openai_client.chat.completions.create(
            model="gpt-4o-mini",
            messages=[{"role": "user", "content": evaluation_prompt}],
            response_format={"type": "json_object"},
            max_tokens=800,
            temperature=0.1,
        )
        data = json.loads(response.choices[0].message.content)
        evaluation = Evaluation(**data)
        print(f"✅ Evaluation completed: {'ACCEPTED' if evaluation.is_acceptable else 'REJECTED'} - {chapter_info[:50]}...")
        return evaluation
    except Exception as e:
        print(f"⚠️ GPT-4o-mini evaluation error: {e}")
        return Evaluation(is_acceptable=True, feedback=f"Evaluation failed, defaulting to accept: {e}")

In [9]:
def detect_chapters(text: str):
    """
    Detect chapters in the text using comprehensive patterns.
    Returns list of chapter dictionaries with metadata.
    """
    # Comprehensive chapter patterns
    chapter_patterns = [
        r'^CHAPTER\s+(\d+[A-Z]*(?:\.\d+)*)[\s:\-]+\s*([^\n]{5,})',
        r'^Chapter\s+(\d+[A-Z]*(?:\.\d+)*)[\s:\-]+\s*([^\n]{5,})',
        r'^(\d+[A-Z]*(?:\.\d+)*)\s+([^\n]{10,})',
        r'^(\d+\.\d*)\s+([^\n]{10,})',
        r'^([IVXLCDM]+)\s+([^\n]{10,})',
        r'^Part\s+(\d+[A-Z]*)[\s:\-]+\s*([^\n]{5,})',
        r'^PART\s+(\d+[A-Z]*)[\s:\-]+\s*([^\n]{5,})',
        r'^Section\s+(\d+[A-Z]*(?:\.\d+)*)[\s:\-]+\s*([^\n]{5,})',
        r'^SECTION\s+(\d+[A-Z]*(?:\.\d+)*)[\s:\-]+\s*([^\n]{5,})',
    ]
    
    chapters = []
    lines = text.split('\n')
    
    for i, line in enumerate(lines):
        line = line.strip()
        if len(line) < 8:  # Too short to be a meaningful chapter title
            continue
            
        # Check if line is in uppercase or has chapter-like formatting
        is_likely_chapter = (line.isupper() or 
                           re.search(r'\b(chapter|part|section)\b', line, re.IGNORECASE) or
                           re.match(r'^(?:\d+|[IVXLCDM]+)(?:\.\s|\s+[A-Z])', line))
        
        for pattern in chapter_patterns:
            matches = re.findall(pattern, line, re.IGNORECASE | re.MULTILINE)
            if matches:
                for match in matches:
                    if len(match) == 2:
                        chapter_num = match[0].strip()
                        chapter_title = match[1].strip()
                        # Additional validation for meaningful titles
                        if (len(chapter_title) > 5 and 
                            not chapter_title.lower().startswith(('page', 'copyright', 'published')) and
                            not re.search(r'^\d+$', chapter_title)):
                            
                            start_idx = text.find(line)
                            chapters.append({
                                'number': chapter_num,
                                'title': chapter_title,
                                'start_idx': start_idx,
                                'line_index': i,
                                'full_line': line
                            })
                            break
    
    # Remove duplicates based on position
    chapters = sorted(chapters, key=lambda x: x['start_idx'])
    unique_chapters = []
    seen_positions = set()
    
    for chapter in chapters:
        if chapter['start_idx'] not in seen_positions:
            unique_chapters.append(chapter)
            seen_positions.add(chapter['start_idx'])
    
    # If no chapters detected, create content-based chunks
    if not unique_chapters:
        print("No chapter patterns detected, creating content-based chunks...")
        return create_content_chunks(text)
    
    # Calculate end indices
    for i in range(len(unique_chapters) - 1):
        unique_chapters[i]['end_idx'] = unique_chapters[i + 1]['start_idx']
    if unique_chapters:
        unique_chapters[-1]['end_idx'] = len(text)
    
    print(f"📚 Detected {len(unique_chapters)} chapters:")
    for chap in unique_chapters[:5]:
        print(f"  - {chap['number']}: {chap['title']}")
    if len(unique_chapters) > 5:
        print(f"  ... and {len(unique_chapters) - 5} more chapters")
    
    return unique_chapters

In [10]:
def create_content_chunks(text: str, max_chunk_size: int = 4000):
    """
    Create content-based chunks when chapter detection fails.
    """
    chunks = []
    sentences = re.split(r'[.!?]+', text)
    current_chunk = []
    current_size = 0
    
    for sentence in sentences:
        sentence = sentence.strip()
        if not sentence:
            continue
            
        sentence_size = len(sentence)
        if current_size + sentence_size > max_chunk_size and current_chunk:
            # Save current chunk and start new one
            chunk_text = ' '.join(current_chunk)
            chunks.append({
                'number': f"Section {len(chunks) + 1}",
                'title': f"Content Part {len(chunks) + 1}",
                'start_idx': text.find(chunk_text),
                'end_idx': text.find(chunk_text) + len(chunk_text),
                'line_index': len(chunks)
            })
            current_chunk = [sentence]
            current_size = sentence_size
        else:
            current_chunk.append(sentence)
            current_size += sentence_size
    
    # Add the last chunk
    if current_chunk:
        chunk_text = ' '.join(current_chunk)
        chunks.append({
            'number': f"Section {len(chunks) + 1}",
            'title': f"Content Part {len(chunks) + 1}",
            'start_idx': text.find(chunk_text),
            'end_idx': text.find(chunk_text) + len(chunk_text),
            'line_index': len(chunks)
        })
    
    return chunks

In [11]:
def extract_chapter_content(text: str, chapter_info: dict):
    """
    Extract content for a specific chapter with cleaning.
    """
    start = chapter_info['start_idx']
    end = chapter_info['end_idx']
    
    chapter_text = text[start:end].strip()
    
    # Clean up the text
    chapter_text = re.sub(r'\n\s*\n', '\n\n', chapter_text)  # Remove excessive newlines
    chapter_text = re.sub(r'[ \t]+', ' ', chapter_text)  # Normalize spaces
    
    return chapter_text

In [12]:
def sanitize_text_for_pdf(text: str) -> str:
    """
    Remove or replace problematic characters that cause font issues.
    Uses simple ASCII replacements for maximum compatibility.
    """
    # First apply guard rails
    text = apply_guard_rails(text)
    
    # Then additional PDF-specific sanitization
    replacements = {
        '•': '-',      '–': '-',      '—': '-',      '“': '"',      '”': '"',
        '‘': "'",      '’': "'",      '…': '...',    '→': '->',     '←': '<-',
        '≥': '>=',     '≤': '<=',     '×': 'x',      '÷': '/',      '±': '+/-',
        'α': 'alpha',  'β': 'beta',   'γ': 'gamma',  'δ': 'delta',  'ε': 'epsilon',
        'μ': 'mu',     'σ': 'sigma',  'π': 'pi',     'θ': 'theta',  'λ': 'lambda',
        '°': 'deg',    '∞': 'inf',    '≠': '!=',     '≡': '===',    '√': 'sqrt',
        '∑': 'sum',    '∏': 'prod',   '∫': 'integral', '∂': 'partial',
    }
    
    for unicode_char, ascii_char in replacements.items():
        text = text.replace(unicode_char, ascii_char)
    
    # Remove any remaining non-ASCII characters
    text = text.encode('ascii', 'ignore').decode('ascii')
    
    # Final cleanup of unwanted symbols
    text = re.sub(r'(?<!\w)[*#+](?!\w)', '', text)  # Remove isolated special symbols
    
    return text

In [13]:
def create_pdf_file(notes_by_chapter: dict, source_filename: str) -> str:
    """
    Convert chapter-wise notes into a well-formatted academic PDF with robust font handling.
    """
    title = os.path.splitext(os.path.basename(source_filename))[0].replace('_', ' ').title()
    output_filename = f"{os.path.splitext(source_filename)[0]}_chapter_notes.pdf"

    # Create PDF with core fonts only for maximum compatibility
    pdf = FPDF()
    pdf.set_auto_page_break(auto=True, margin=15)
    
    # Use only core fonts that are guaranteed to work
    font_family = "Helvetica"  # Core PDF font that always works
    
    # Add a page
    pdf.add_page()
    
    # Title
    pdf.set_font(font_family, "B", 16)
    pdf.cell(0, 10, sanitize_text_for_pdf(f"Chapter-wise Academic Notes: {title}"), 0, 1, "C")
    pdf.ln(5)
    
    # Metadata
    pdf.set_font(font_family, "I", 10)
    pdf.cell(0, 8, sanitize_text_for_pdf(f"Generated on: {datetime.now().strftime('%Y-%m-%d %H:%M')}"), 0, 1, "C")
    pdf.cell(0, 8, sanitize_text_for_pdf(f"Total Chapters: {len(notes_by_chapter)}"), 0, 1, "C")
    pdf.cell(0, 8, sanitize_text_for_pdf(f"AI Models: LLaMA 3:8B (Generation) + GPT-4o-mini (Evaluation)"), 0, 1, "C")
    pdf.ln(10)
    
    # Table of Contents
    pdf.set_font(font_family, "B", 14)
    pdf.cell(0, 10, sanitize_text_for_pdf("Table of Contents"), 0, 1, "C")
    pdf.ln(8)
    
    pdf.set_font(font_family, "", 11)
    for chapter_num, notes in notes_by_chapter.items():
        # Extract chapter title from notes
        lines = notes.split('\n')
        chapter_title = "Chapter Content"
        for line in lines:
            if line.strip() and not line.startswith(('#', '-', '*')):
                chapter_title = line.strip()[:50] + "..." if len(line.strip()) > 50 else line.strip()
                break
        
        toc_line = sanitize_text_for_pdf(f"Chapter {chapter_num}: {chapter_title}")
        pdf.cell(0, 7, toc_line, 0, 1)
    pdf.ln(15)
    
    # Chapter contents
    for chapter_num, notes in notes_by_chapter.items():
        # Add new page for each chapter
        pdf.add_page()
        
        # Chapter header
        pdf.set_font(font_family, "B", 16)
        pdf.cell(0, 10, sanitize_text_for_pdf(f"Chapter {chapter_num}"), 0, 1, "C")
        pdf.ln(8)
        
        # Process notes content
        pdf.set_font(font_family, "", 11)
        lines = notes.split('\n')
        
        for line in lines:
            line = line.strip()
            if not line:
                pdf.ln(3)
                continue
                
            # Use sanitized text for maximum compatibility
            safe_line = sanitize_text_for_pdf(line)
            
            # Skip lines that are just special symbols
            if re.match(r'^[*#+-]+$', safe_line.strip()):
                continue
                
            # Handle markdown elements with improved formatting
            if safe_line.startswith('# '):
                pdf.set_font(font_family, "B", 14)
                pdf.cell(0, 8, safe_line[2:], 0, 1)
                pdf.set_font(font_family, "", 11)
                pdf.ln(2)
            elif safe_line.startswith('## '):
                pdf.set_font(font_family, "B", 12)
                pdf.cell(0, 7, safe_line[3:], 0, 1)
                pdf.set_font(font_family, "", 11)
                pdf.ln(2)
            elif safe_line.startswith('### '):
                pdf.set_font(font_family, "B", 11)
                pdf.cell(0, 6, safe_line[4:], 0, 1)
                pdf.set_font(font_family, "", 11)
                pdf.ln(1)
            elif safe_line.startswith('- ') or safe_line.startswith('• '):
                pdf.set_font(font_family, "", 11)
                pdf.cell(8)  # Indent
                bullet_text = safe_line[2:]
                # Use simple hyphen for bullets to avoid Unicode issues
                if len(bullet_text) > 80:
                    pdf.multi_cell(0, 5, f"- {bullet_text}")
                else:
                    pdf.cell(0, 5, f"- {bullet_text}", 0, 1)
            elif re.match(r'^\d+\.\s', safe_line):
                pdf.set_font(font_family, "", 11)
                pdf.cell(8)  # Indent
                numbered_text = safe_line
                if len(numbered_text) > 80:
                    pdf.multi_cell(0, 5, numbered_text)
                else:
                    pdf.cell(0, 5, numbered_text, 0, 1)
            elif safe_line == '---' or safe_line == '***':
                pdf.ln(3)
                pdf.line(10, pdf.get_y(), 200, pdf.get_y())
                pdf.ln(3)
            else:
                # Regular paragraph with multi_cell for wrapping
                pdf.multi_cell(0, 5, safe_line)
            
            pdf.ln(2)
        
        pdf.ln(10)
        
        # Add chapter separator
        pdf.set_draw_color(200, 200, 200)
        pdf.line(10, pdf.get_y(), 200, pdf.get_y())
        pdf.ln(5)
    
    # Footer
    pdf.set_y(-15)
    pdf.set_font(font_family, "I", 8)
    pdf.cell(0, 10, sanitize_text_for_pdf(f"Generated from: {os.path.basename(source_filename)} • Page {pdf.page_no()}"), 0, 0, 'C')
    
    try:
        pdf.output(output_filename)
        print(f"✅ PDF created successfully: {output_filename}")
        return output_filename
    except Exception as e:
        print(f"⚠️ PDF creation error: {e}")
        # Create comprehensive fallback text file
        fallback = f"{os.path.splitext(source_filename)[0]}_detailed_notes.txt"
        with open(fallback, "w", encoding="utf-8") as f:
            f.write(f"CHAPTER-WISE ACADEMIC NOTES\n")
            f.write(f"Textbook: {title}\n")
            f.write(f"Generated on: {datetime.now().strftime('%Y-%m-%d %H:%M')}\n")
            f.write(f"AI Models: LLaMA 3:8B (Generation) + GPT-4o-mini (Evaluation)\n")
            f.write("=" * 80 + "\n\n")
            
            for chapter_num, notes in notes_by_chapter.items():
                f.write(f"CHAPTER {chapter_num}\n")
                f.write("=" * 50 + "\n")
                f.write(notes)
                f.write("\n\n" + "=" * 80 + "\n\n")
        
        print(f"📝 Saved detailed fallback text file: {fallback}")
        return fallback

In [14]:

async def process_textbook(file, progress=gr.Progress()):
    """
    Main processing pipeline: Extract text, detect chapters, generate notes with LLaMA, 
    evaluate with GPT-4o-mini, and output PDF.
    """
    if file is None:
        return "Please upload a textbook PDF to begin.", None

    try:
        pdf_file_path = file.name
        reader = PdfReader(pdf_file_path)
        num_pages = len(reader.pages)

        # Step 1 — Extract text from PDF
        progress(0, desc="Step 1/5: Extracting text from PDF...")
        full_text = ""
        for i, page in enumerate(reader.pages):
            progress((i + 1) / num_pages / 3, desc=f"Extracting Page {i + 1}/{num_pages}")
            page_text = page.extract_text()
            if page_text:
                full_text += page_text + "\n"

        if not full_text.strip():
            return "⚠️ No text could be extracted from the PDF. The file might be scanned or image-based.", None

        print(f"✅ Extracted {len(full_text)} characters from {num_pages} pages.")

        # Step 2 — Detect chapters
        progress(0.33, desc="Step 2/5: Analyzing textbook structure...")
        chapters = detect_chapters(full_text)
        
        if not chapters:
            return "⚠️ No clear chapter structure detected. Try a different textbook or check if the PDF has selectable text.", None

        # Step 3 — Generate notes for each chapter using LLaMA
        notes_by_chapter = {}
        total_chapters = len(chapters)
        
        progress(0.4, desc="Step 3/5: Generating notes with LLaMA 3:8B...")
        for i, chapter in enumerate(chapters):
            progress_percent = 0.4 + (i / total_chapters) * 0.4
            progress(progress_percent, desc=f"Processing Chapter {i + 1}/{total_chapters} with LLaMA")
            
            chapter_content = extract_chapter_content(full_text, chapter)
            chapter_info = f"{chapter['number']}: {chapter['title']}"
            
            if len(chapter_content) > 200:  # Only process if there's substantial content
                try:
                    notes = await generate_notes_with_llama(chapter_content, chapter_info)
                    notes_by_chapter[chapter['number']] = notes
                    print(f"✅ Completed chapter {chapter['number']}")
                except Exception as e:
                    notes_by_chapter[chapter['number']] = f"Chapter {chapter_info}\n\nError generating notes: {str(e)}"
                    print(f"❌ Error in chapter {chapter['number']}: {e}")
            else:
                notes_by_chapter[chapter['number']] = f"Chapter {chapter_info}\n\nInsufficient content for detailed notes (only {len(chapter_content)} characters)."
                print(f"⚠️ Skipped chapter {chapter['number']} (insufficient content)")

        # Step 4 — Create PDF
        progress(0.85, desc="Step 4/5: Creating formatted PDF...")
        pdf_output_path = create_pdf_file(notes_by_chapter, pdf_file_path)

        # Step 5 — Return results
        progress(1.0, desc="Step 5/5: Finalizing...")
        if os.path.exists(pdf_output_path):
            message = (
                f"✅ **Chapter-wise notes generated successfully!**\n\n"
                f"**Textbook Analysis Summary:**\n"
                f"• Pages processed: {num_pages}\n"
                f"• Chapters detected: {len(chapters)}\n"
                f"• Notes generated: {len(notes_by_chapter)}\n"
                f"• Output file: `{os.path.basename(pdf_output_path)}`\n\n"
                f"**AI Pipeline Used:**\n"
                f"• LLaMA 3:8B for content generation\n"
                f"• GPT-4o-mini for quality evaluation\n\n"
                f"**Processed Chapters:**\n" +
                "\n".join([f"  • Chapter {num}" for num in notes_by_chapter.keys()][:8])
            )
            if len(notes_by_chapter) > 8:
                message += f"\n  • ... and {len(notes_by_chapter) - 8} more chapters"
            
            message += f"\n\n**Download your chapter-wise notes using the file button below!**"
            return message, pdf_output_path
        else:
            return "❌ PDF generation failed. Check the console for errors.", None
            
    except Exception as e:
        return f"❌ Error processing PDF: {str(e)}", None

In [15]:
def create_notes_interface(file, progress=gr.Progress(track_tqdm=True)):
    """Handles Gradio interface flow synchronously to avoid event loop issues."""
    if file is not None:
        # Run the async function in the current event loop
        try:
            import asyncio
            # Try to get the running event loop, create new if none
            try:
                loop = asyncio.get_event_loop()
            except RuntimeError:
                loop = asyncio.new_event_loop()
                asyncio.set_event_loop(loop)
            
            # Run the async function
            return loop.run_until_complete(process_textbook(file, progress))
        except Exception as e:
            return f"❌ Error: {str(e)}", None
    return "Please upload a textbook PDF to generate chapter-wise notes.", None

In [16]:
# Check if Ollama is running
async def check_ollama_status():
    """Check if LLaMA 3:8B is available in Ollama."""
    try:
        client = AsyncOpenAI(base_url="http://localhost:11434/v1", api_key="ollama")
        models = await client.models.list()
        llama_available = any('llama3:8b' in model.id for model in models.data)
        if llama_available:
            print("✅ LLaMA 3:8B is available in Ollama")
        else:
            print("❌ LLaMA 3:8B not found in Ollama. Please run: ollama pull llama3:8b")
        return llama_available
    except Exception as e:
        print(f"❌ Cannot connect to Ollama: {e}")
        print("Please make sure Ollama is running: ollama serve")
        return False

In [17]:
# Create the Gradio interface
iface = gr.Interface(
    fn=create_notes_interface,
    inputs=gr.File(
        label="Upload Textbook PDF", 
        file_types=[".pdf"]
    ),
    outputs=[
        gr.Markdown(label="Processing Report"),
        gr.File(label="Download Chapter-wise Notes")
    ],
    title="AI Textbook to Chapter-wise Notes Generator",
    description=(
        "Transform textbooks into comprehensive chapter-wise notes using advanced AI!\n\n"
        "LLaMA 3:8B generates detailed academic notes | "
        "GPT-4o-mini ensures quality evaluation\n\n"
        "What this AI pipeline does:\n"
        "1. Extracts text from your PDF textbook\n"
        "2. Automatically detects chapter structure\n"
        "3. Generates comprehensive notes using LLaMA 3:8B\n"
        "4. Evaluates quality with GPT-4o-mini\n"
        "5. Outputs a beautifully formatted PDF\n\n"
        "Note: Upload textbooks with selectable text (not scanned images)\n\n"
        "Perfect for: Students, Researchers, and Lifelong Learners!"
    ),
    flagging_mode="never"
)


print("🎉 All systems ready! Starting web interface...")
iface.launch(
    server_name="127.0.0.1", 
    share=False,
    show_error=True
)


🎉 All systems ready! Starting web interface...
* Running on local URL:  http://127.0.0.1:7860
* To create a public link, set `share=True` in `launch()`.




✅ Extracted 19920 characters from 11 pages.
📚 Detected 16 chapters:
  - I: S I SI S I SI S ell my Dreamsell my Dreamsell my Dreamsell my Dreamsell my Dreams
  - I: could not have imagined a more suitable spot for my
  - I: stayed in Vienna for more than a month, sharing the
  - Civil: W ar, on a stopover during a long sea voyage to
  - I: have never known anyone closer to the idea one has
  ... and 11 more chapters
📝 Generating notes with LLaMA for: I: S I SI S I SI S ell my Dreamsell my Dreamsell m...
✅ LLaMA generation completed for: I: S I SI S I SI S ell my Dreamsell my Dreamsell m...
🧪 Evaluating notes with GPT-4o-mini for: I: S I SI S I SI S ell my Dreamsell my Dreamsell m...
⚠️ GPT-4o-mini evaluation error: Error code: 401 - {'error': {'message': "You didn't provide an API key. You need to provide your API key in an Authorization header using Bearer auth (i.e. Authorization: Bearer YOUR_KEY), or as the password field (with blank username) if you're accessing the API from your br

  pdf.cell(0, 10, sanitize_text_for_pdf(f"Chapter-wise Academic Notes: {title}"), 0, 1, "C")
  pdf.cell(0, 8, sanitize_text_for_pdf(f"Generated on: {datetime.now().strftime('%Y-%m-%d %H:%M')}"), 0, 1, "C")
  pdf.cell(0, 8, sanitize_text_for_pdf(f"Total Chapters: {len(notes_by_chapter)}"), 0, 1, "C")
  pdf.cell(0, 8, sanitize_text_for_pdf(f"AI Models: LLaMA 3:8B (Generation) + GPT-4o-mini (Evaluation)"), 0, 1, "C")
  pdf.cell(0, 10, sanitize_text_for_pdf("Table of Contents"), 0, 1, "C")
  pdf.cell(0, 7, toc_line, 0, 1)
  pdf.cell(0, 10, sanitize_text_for_pdf(f"Chapter {chapter_num}"), 0, 1, "C")
  pdf.cell(0, 5, numbered_text, 0, 1)
  pdf.cell(0, 5, f"- {bullet_text}", 0, 1)
  pdf.cell(0, 10, sanitize_text_for_pdf(f"Generated from: {os.path.basename(source_filename)} • Page {pdf.page_no()}"), 0, 0, 'C')
