# AI Textbook Notes Generator — Corrected Notebook

This notebook contains a corrected, usable version of your **AI Textbook Notes Generator** application converted into a runnable Python script in notebook form.
- I fixed common errors and made the file-handling, chunking, and evaluation parsing more robust.
- This notebook still **expects** Ollama (or a compatible local LLM endpoint) and a Google Gemini-compatible API (if you want to run the evaluation / summary steps). If you don't have them, the code will gracefully fallback with informative messages.
- Before running cells that call external APIs, set environment variables `OPENAI_API_KEY` and `GOOGLE_API_KEY` (or adapt the code for your particular LLM endpoints).
- The notebook saves an executable single-file script and a Gradio UI; you can run it locally in an environment that has the required packages.

**Generated on:** 2025-10-29 14:29:17Z (UTC)


In [8]:

# Corrected single-file program (adapted from user's script)
# Save and run this in an environment with required packages installed.
# Notes on packages:
#   pip install gradio PyMuPDF openai python-dotenv fpdf nbformat

import os
import json
import gradio as gr
import textwrap
import asyncio
import fitz  # PyMuPDF
import sys
from dotenv import load_dotenv
from openai import AsyncOpenAI
from pydantic import BaseModel
from fpdf import FPDF
from datetime import datetime
from pathlib import Path
import traceback
import re

# Version info
VERSION = "4.0-fixed"
APP_NAME = "AI Textbook Notes Generator (Complete) - Notebook"

# -----------------
# ENVIRONMENT SETUP
# -----------------
def load_environment():
    """Load and validate environment configuration"""
    load_dotenv(override=True)
    openai_api_key = os.getenv("OPENAI_API_KEY")
    google_api_key = os.getenv("GOOGLE_API_KEY")
    # Return booleans and keys (keys may be None)
    if not openai_api_key:
        print("❌ OPENAI_API_KEY not found in environment")
    else:
        print(f"✅ OPENAI_API_KEY found: {openai_api_key[:8]}...")
    if not google_api_key:
        print("❌ GOOGLE_API_KEY not found in environment")
    else:
        print(f"✅ GOOGLE_API_KEY found: {google_api_key[:8]}...")
    ok = bool(openai_api_key and google_api_key)
    return ok, openai_api_key, google_api_key

# -----------------
# API CLIENT SETUP
# -----------------
def setup_api_clients(google_api_key=None):
    """
    Attempt to setup two clients:
      - a local LLM (Ollama-like) using AsyncOpenAI pointing at localhost (if available)
      - a Gemini/OpenAI-like client using provided google_api_key (if provided)
    The code is defensive: it returns None for unavailable clients but does not crash.
    """
    ollama_client = None
    gemini_client = None
    try:
        # Local Ollama-like endpoint if running Ollama with an OpenAI-compatible path
        # If you don't have this, these clients remain None and the code will fallback.
        ollama_client = AsyncOpenAI(base_url="http://localhost:11434/v1", api_key="ollama")
        print("✅ Created Ollama-like AsyncOpenAI client (local) - connection not tested yet.")
    except Exception as e:
        print(f"⚠️  Could not create Ollama client: {e}")

    try:
        if google_api_key:
            gemini_client = AsyncOpenAI(api_key=google_api_key, base_url="https://generativelanguage.googleapis.com/v1beta/openai/")
            print("✅ Created Gemini/OpenAI-like client (configured).")
        else:
            print("⚠️  No google_api_key provided — Gemini client not configured.")
    except Exception as e:
        print(f"⚠️  Could not create Gemini client: {e}")

    return ollama_client, gemini_client

# -----------------
# DATA MODELS
# -----------------
class Evaluation(BaseModel):
    is_acceptable: bool
    feedback: str
    clarity_score: int
    accuracy_score: int

# -----------------
# UTILITIES
# -----------------
def chunk_text(text: str, max_chars: int = 2500):
    """Split text into chunks of roughly max_chars length without breaking words."""
    if not text:
        return []
    chunks = []
    start = 0
    n = len(text)
    while start < n:
        end = min(start + max_chars, n)
        if end < n:
            # try to roll back to nearest newline or space
            roll = text.rfind("\n", start, end)
            if roll == -1:
                roll = text.rfind(" ", start, end)
            if roll != -1 and roll > start:
                end = roll
        chunks.append(text[start:end].strip())
        start = end
    return [c for c in chunks if c]

def validate_file_upload(file):
    """
    Accepts different forms returned by Gradio File:
      - None
      - a dict-like object with 'name' or 'tmp_path' keys (gradio uploads)
      - a direct path string or Path object
      - a file-like object with .name attribute
    Returns (filepath, error_message)
    """
    if file is None:
        return None, "❌ No file uploaded. Please select a PDF file."

    # Handle dict from gradio (it may have 'name' and 'tmp_path')
    if isinstance(file, dict):
        # For gradio v3+, file can be {'name': '...', 'size':..., 'tmp_path': '/tmp/xxx'}
        file_path = file.get("tmp_path") or file.get("name")
    elif hasattr(file, "name") and os.path.exists(file.name):
        file_path = file.name
    else:
        # Could be a string path
        file_path = str(file)

    if not file_path:
        return None, "❌ Unable to determine file path from upload."

    if not os.path.exists(file_path):
        return None, f"❌ File not found: {file_path}"

    if not file_path.lower().endswith(".pdf"):
        return None, f"❌ File must be a PDF. Received: {file_path}"

    try:
        file_size = os.path.getsize(file_path)
        if file_size == 0:
            return None, "❌ File is empty."
        if file_size > 50 * 1024 * 1024:
            return None, "❌ File too large. Maximum size is 50MB."
    except OSError:
        # Could not get size; ignore
        pass

    return file_path, None

# -----------------
# PDF TEXT EXTRACTION
# -----------------
async def extract_text_from_pdf(file_path, progress_callback=None):
    if progress_callback:
        progress_callback(0.1, "Initializing PDF extraction...")
    try:
        doc = fitz.open(file_path)
        num_pages = len(doc)
        if num_pages == 0:
            return None, "❌ PDF appears to be empty or corrupted."
        full_text = []
        for i, page in enumerate(doc):
            if progress_callback:
                progress = 0.1 + (i / max(1, num_pages)) * 0.3
                progress_callback(progress, f"Extracting from Page {i+1}/{num_pages}")
            try:
                page_text = page.get_text()
            except Exception:
                page_text = ""
            if page_text and page_text.strip():
                full_text.append(page_text.strip())
        doc.close()
        combined = "\n\n".join(full_text).strip()
        if not combined:
            return None, "❌ No readable text found in PDF. Ensure the PDF contains extractable text."
        if progress_callback:
            progress_callback(0.4, f"Text extraction complete. {len(combined)} characters found.")
        return combined, None
    except Exception as e:
        return None, f"❌ Error extracting text from PDF: {e}"

# -----------------
# AI NOTE GENERATION (defensive)
# -----------------
async def generate_notes_with_retry(ollama_client, text_chunk: str, retries: int = 2, feedback: str = ""):
    system_prompt = (
        "You are an expert academic assistant. Read the provided text and produce well-organized, clear Markdown notes. "
        "Focus on key concepts, definitions, and main ideas. Keep the language simple but precise. Structure the notes with clear headings and bullet points."
    )
    if feedback:
        user_prompt = f"The previous notes were not acceptable. Improve them using this feedback:\n{feedback}\n\nOriginal Text:\n{text_chunk}"
    else:
        user_prompt = f"Generate concise academic notes for the following text:\n{text_chunk}"

    messages = [{"role":"system","content":system_prompt},{"role":"user","content":user_prompt}]

    # If no ollama_client, return fallback short notes
    if not ollama_client:
        # fallback simple rule-based extraction: first 3 sentences as a quick 'note'
        sent = re.split(r'(?<=[.!?])\s+', text_chunk.strip())
        fallback = "\n\n".join(sent[:5]) if sent else text_chunk[:500]
        return "# Quick Notes (fallback)\n\n" + fallback

    try:
        # Attempt to use AsyncOpenAI chat completion interface
        response = await ollama_client.chat.completions.create(
            model="llama3:8b",
            messages=messages,
            timeout=60
        )
        # Some clients return different shapes; guard for content path
        content = None
        try:
            content = response.choices[0].message.content
        except Exception:
            # try alternative
            content = getattr(response, "content", None) or str(response)
        return content or "[No content returned from model]"
    except Exception as e:
        # On error, return a diagnostic message but keep pipeline moving
        return f"[Model generation error: {e}]"

async def evaluate_notes_quality(gemini_client, text_chunk: str, notes: str) -> Evaluation:
    prompt = (
        "You are a strict quality assurance evaluator. Assess the provided notes "
        "based on their accuracy (do they match the original text?), clarity (are they easy to understand?), and completeness (did they miss key concepts?). "
        "Return a JSON object with keys: is_acceptable (boolean), feedback (string), clarity_score (int 1-5), accuracy_score (int 1-5).\n\n"
        f"--- Original Text ---\n{text_chunk}\n\n--- Notes ---\n{notes}"
    )

    # If no gemini_client, produce a permissive default evaluation
    if not gemini_client:
        return Evaluation(is_acceptable=True, feedback="No evaluator available (fallback accepted).", clarity_score=4, accuracy_score=4)

    try:
        response = await gemini_client.chat.completions.create(
            model="gemini-2.5-flash",
            messages=[{"role":"user","content":prompt}],
            timeout=30
        )
        # Extract content and try to parse JSON inside
        raw = None
        try:
            raw = response.choices[0].message.content
        except Exception:
            raw = getattr(response, "content", None) or str(response)
        # Try to find a JSON blob in the string
        m = re.search(r'\{.*\}', raw, flags=re.S)
        if m:
            json_text = m.group(0)
            data = json.loads(json_text)
        else:
            # fallback simple eval: mark acceptable if notes length > small threshold
            data = {
                "is_acceptable": True if len(notes) > 100 else False,
                "feedback": "Parsed evaluation not found — applied heuristic fallback.",
                "clarity_score": 4,
                "accuracy_score": 4
            }
        return Evaluation(**data)
    except asyncio.TimeoutError:
        print("⚠️  Note evaluation timeout - accepting current quality")
        return Evaluation(is_acceptable=True, feedback="Evaluation timeout - accepted current quality", clarity_score=4, accuracy_score=4)
    except Exception as e:
        print(f"⚠️  Note evaluation error: {e}")
        return Evaluation(is_acceptable=True, feedback=f"Evaluation failed: {e}", clarity_score=4, accuracy_score=4)

async def generate_final_summary(gemini_client, all_notes: str):
    prompt = (
        "You are an expert academic summarizer. Read all the provided notes and generate a concise, high-level executive summary in clean Markdown.\n\n"
        f"--- All Notes ---\n{all_notes}"
    )
    if not gemini_client:
        # Simple heuristic summary fallback
        first = all_notes[:2000]
        return "# Executive Summary (fallback)\n\n" + first + "\n\n[Full summary unavailable because no Gemini client configured.]"
    try:
        response = await gemini_client.chat.completions.create(
            model="gemini-2.5-flash",
            messages=[{"role":"user","content":prompt}],
            timeout=45
        )
        try:
            return response.choices[0].message.content
        except Exception:
            return getattr(response, "content", str(response))
    except asyncio.TimeoutError:
        return "Executive Summary (Generation timeout - providing basic summary). This textbook covers important concepts."
    except Exception as e:
        print(f"⚠️  Executive summary error: {e}")
        return f"Executive Summary (Generation failed: {e})."

# -----------------
# PDF CREATION (FPDF)
# -----------------
class StyledPDF(FPDF):
    def __init__(self):
        super().__init__()
        self.set_auto_page_break(auto=True, margin=15)

    def header(self):
        self.set_font('Arial', 'I', 8)
        self.set_text_color(128,128,128)
        self.cell(0,10, APP_NAME, 0, 0, 'C')
        self.ln(5)

    def footer(self):
        self.set_y(-15)
        self.set_font('Arial', 'I', 8)
        self.set_text_color(128,128,128)
        self.cell(0,10, f'Page {self.page_no()}', 0, 0, 'C')

    def create_title_page(self, title: str):
        self.add_page()
        self.set_font('Arial', 'B', 24)
        self.set_text_color(0, 51, 102)
        self.ln(60)
        self.multi_cell(0, 10, title, align='C')
        self.ln(10)
        self.set_font('Arial', 'I', 14)
        self.set_text_color(102,102,102)
        self.multi_cell(0, 8, "Generated by AI Textbook Notes Generator", align='C')
        self.ln(10)
        self.set_font('Arial', '', 12)
        self.set_text_color(128,128,128)
        date_str = datetime.now().strftime("%B %d, %Y")
        self.multi_cell(0, 8, f"Generated on {date_str}", align='C')
        self.ln(20)
        self.set_line_width(2)
        self.set_draw_color(0,51,102)
        y = self.get_y()
        self.line(15, y, 195, y)

    def split_text(self, text, max_width):
        words = text.split()
        lines = []
        current_line = ""
        for word in words:
            test_line = current_line + (" " if current_line else "") + word
            if self.get_string_width(test_line) <= max_width:
                current_line = test_line
            else:
                if current_line:
                    lines.append(current_line)
                current_line = word
        if current_line:
            lines.append(current_line)
        return lines

    def add_section_header(self, title: str):
        self.ln(8)
        self.set_font('Arial', 'B', 16)
        self.set_text_color(0,51,102)
        self.cell(0, 12, title, 0, 1, 'L')
        self.set_line_width(0.8)
        self.set_draw_color(0,51,102)
        self.line(15, self.get_y(), 100, self.get_y())
        self.ln(6)

    def add_content_line(self, text: str, indent: int = 0):
        self.set_font('Arial', '', 11)
        self.set_text_color(0,0,0)
        if text.strip().startswith('•') or text.strip().startswith('-'):
            bullet = text.strip()[0]
            content = text.strip()[1:].strip()
            if indent > 0:
                self.cell(indent,6,'',0,0,'L')
            self.cell(6,6,bullet,0,0,'L')
            lines = self.split_text(content, 180 - indent - 10)
            if lines:
                self.cell(0,6,lines[0],0,1,'L')
                for line in lines[1:]:
                    if indent > 0:
                        self.cell(indent,6,'',0,0,'L')
                    self.cell(6,6,'',0,0,'L')
                    self.cell(0,6,line,0,1,'L')
        else:
            lines = self.split_text(text, 180 - indent)
            for i, line in enumerate(lines):
                if indent > 0 and i == 0:
                    self.cell(indent,6,'',0,0,'L')
                self.cell(0,6,line,0,1,'L')

    def process_markdown(self, markdown_content: str):
        lines = markdown_content.split('\n')
        for line in lines:
            line = line.rstrip()
            if not line:
                self.ln(4)
                continue
            if line.startswith('# '):
                title = line[2:].strip()
                self.add_section_header(title)
            elif line.startswith('## '):
                title = line[3:].strip()
                self.set_font('Arial','B',14)
                self.set_text_color(51,102,153)
                self.ln(4)
                self.multi_cell(0,8,title)
                self.set_line_width(0.5)
                self.set_draw_color(200,200,200)
                self.line(15, self.get_y(), 100, self.get_y())
                self.ln(6)
            elif line.startswith('- ') or line.startswith('• '):
                content = line[2:].strip()
                self.add_content_line('• ' + content, indent=15)
            else:
                self.add_content_line(line)

def create_styled_pdf(notes_markdown: str, source_filename: str) -> tuple:
    try:
        base_name = os.path.splitext(os.path.basename(source_filename))[0]
        output_filename = f"{base_name}_ai_notes.pdf"
        pdf = StyledPDF()
        title = base_name.replace('_',' ').title()
        pdf.create_title_page(title)
        pdf.process_markdown(notes_markdown)
        pdf.output(output_filename)
        print(f"✅ PDF created successfully: {output_filename}")
        return output_filename, None
    except Exception as e:
        tb = traceback.format_exc()
        return None, f"❌ Error creating PDF: {e}\n{tb}"

# -----------------
# MAIN PIPELINE
# -----------------
async def process_textbook_complete(file, progress_callback=None):
    if progress_callback:
        progress_callback(0.05, "Validating file...")
    file_path, error = validate_file_upload(file)
    if error:
        return None, error
    if progress_callback:
        progress_callback(0.1, "File validated successfully")
    env_ok, openai_key, google_key = load_environment()
    if not env_ok:
        # don't fail hard: continue but warn user
        print("⚠️  Environment not fully configured - continuing with fallbacks")
    if progress_callback:
        progress_callback(0.15, "Setting up API clients...")
    ollama_client, gemini_client = setup_api_clients(google_key)
    if progress_callback:
        progress_callback(0.2, "API clients ready (or fallbacks configured)")
    if progress_callback:
        progress_callback(0.4, "Extracting text from PDF...")
    full_text, error = await extract_text_from_pdf(file_path, progress_callback)
    if error:
        return None, error
    if progress_callback:
        progress_callback(0.45, "Starting note generation...")
    chunks = chunk_text(full_text)
    all_notes = []
    num_chunks = len(chunks) or 1
    if progress_callback:
        progress_callback(0.5, f"Processing {len(chunks)} text chunks...")
    for i, chunk in enumerate(chunks):
        if progress_callback:
            progress = 0.5 + (i / num_chunks) * 0.3
            progress_callback(progress, f"Generating notes - Chunk {i+1}/{num_chunks}")
        try:
            notes_chunk = await generate_notes_with_retry(ollama_client, chunk)
            evaluation = await evaluate_notes_quality(gemini_client, chunk, notes_chunk)
            if not evaluation.is_acceptable or evaluation.clarity_score < 3 or evaluation.accuracy_score < 3:
                print(f"🔄 Retrying chunk {i+1} with feedback: {evaluation.feedback}")
                notes_chunk = await generate_notes_with_retry(ollama_client, chunk, retries=1, feedback=evaluation.feedback)
            all_notes.append(notes_chunk)
        except Exception as e:
            tb = traceback.format_exc()
            print(f"⚠️  Error processing chunk {i+1}: {e}\n{tb}")
            all_notes.append(f"[Note: Error processing this section - {str(e)}]")
    if progress_callback:
        progress_callback(0.8, "Note generation complete")
    combined_notes = "\n\n---\n\n".join(all_notes)
    if progress_callback:
        progress_callback(0.82, "Generating executive summary...")
    final_summary = await generate_final_summary(gemini_client, combined_notes)
    if progress_callback:
        progress_callback(0.88, "Executive summary complete")
    final_markdown = f"# Executive Summary\n\n{final_summary}\n\n---\n\n# Detailed Notes\n\n{combined_notes}"
    if progress_callback:
        progress_callback(0.9, "Creating beautiful PDF...")
    pdf_path, error = create_styled_pdf(final_markdown, file_path)
    if error:
        return None, error
    if progress_callback:
        progress_callback(1.0, "Processing complete!")
    return pdf_path, None

# ---------------
# GRADIO UI
# ---------------
def create_interface():
    with gr.Blocks(css=css) as interface:
    gr.HTML(f"<div style='text-align:center'><h2>{APP_NAME} v{VERSION}</h2></div>")
    with gr.Row():
        with gr.Column(scale=1):
            file_input = gr.File(label='Upload PDF', file_types=['.pdf'])
            gr.Markdown("Requirements: PDF must contain extractable text. Max 50MB.")
        with gr.Column(scale=1):
            output_file = gr.File(label='Download Generated Notes (.pdf)')
    submit_btn = gr.Button('Generate Beautiful Notes')
    clear_btn = gr.Button('Clear')
    status_output = gr.Textbox(
        label='Current Status',
        value='Ready to process your textbook',
        interactive=False,
        lines=2
    )

    submit_btn.click(
        fn=process_with_progress,
        inputs=[file_input],
        outputs=[output_file]
    ).then(
        fn=lambda: "✅ Processing complete! Download your notes.",
        outputs=[status_output]
    )

    clear_btn.click(
        fn=lambda: (None, "Ready to process your textbook"),
        outputs=[output_file, status_output]
    )
return interface


# ---------------
# DIAGNOSTICS
# ---------------
async def run_system_diagnostics():
    print("🩺 Running System Diagnostics...")
    issues = []
    print(f"✅ Python version: {sys.version}")
    required_packages = ['gradio', 'fitz', 'openai', 'pydantic', 'fpdf', 'python_dotenv']
    for pkg in required_packages:
        try:
            __import__(pkg if pkg != 'python_dotenv' else 'dotenv')
            print(f"✅ {pkg} - available")
        except ImportError:
            print(f"❌ {pkg} - MISSING")
            issues.append(f"Install {pkg}: pip install {pkg}")
    env_ok, openai_key, google_key = load_environment()
    if not env_ok:
        issues.append("Configure OPENAI_API_KEY and GOOGLE_API_KEY in .env file")
    # Test Ollama lightly (non-blocking)
    try:
        ollama, gemini = setup_api_clients(google_key)
        if ollama:
            try:
                # attempt small ping (may fail depending on server)
                resp = await ollama.chat.completions.create(model='llama3:8b', messages=[{'role':'user','content':'hi'}], max_tokens=2, timeout=5)
                print("✅ Ollama-like service - reachable")
            except Exception as e:
                print(f"⚠️  Ollama reachable but ping failed: {e}")
    except Exception as e:
        print(f"⚠️  Ollama check failed: {e}")
    if not issues:
        print(\"🎉 All checks passed! System ready to use.\")
        return True
    else:
        print(\"⚠️  Issues found:\")
        for i, issue in enumerate(issues,1):
            print(f\"  {i}. {issue}\")
        return False

# ---------------
# MAIN
# ---------------
def main():
    print(f\"🚀 {APP_NAME} v{VERSION}\")
    try:
        ready = asyncio.run(run_system_diagnostics())
        if not ready:
            print(\"⚠️  Diagnostics found issues - you can still try running the interface.\")
    except Exception as e:
        print(f\"Diagnostics run failed: {e}\")
    try:
        iface = create_interface()
        iface.launch(server_name='127.0.0.1', share=False)
    except KeyboardInterrupt:
        print(\"Shutting down...\")
    except Exception as e:
        print(f\"Failed to start interface: {e}\")


if __name__ == '__main__':
    main()


IndentationError: expected an indented block after 'with' statement on line 496 (3677315812.py, line 497)