Chriss Jordan Oboa

Professor Joe Garman

# **Georgetown Univerty**

**Enhanced Mathematical Expressions Reader**

**Robust PDF Text Extraction:**
Uses a dual approach (PyPDF2 and OCR).


**Claude API Integration:**
Handles advanced processing of text with context-sensitive instructions.

**Text-to-Speech:**
Converts processed text into audio for accessibility or multitasking.


**Logging:**
Ensures traceability of operations and error handling.

In [1]:
!pip install pytesseract pdf2image PyPDF2 gtts requests
!apt-get install -y poppler-utils tesseract-ocr

Collecting pytesseract
  Downloading pytesseract-0.3.13-py3-none-any.whl.metadata (11 kB)
Collecting pdf2image
  Downloading pdf2image-1.17.0-py3-none-any.whl.metadata (6.2 kB)
Collecting PyPDF2
  Downloading pypdf2-3.0.1-py3-none-any.whl.metadata (6.8 kB)
Collecting gtts
  Downloading gTTS-2.5.4-py3-none-any.whl.metadata (4.1 kB)
Downloading pytesseract-0.3.13-py3-none-any.whl (14 kB)
Downloading pdf2image-1.17.0-py3-none-any.whl (11 kB)
Downloading pypdf2-3.0.1-py3-none-any.whl (232 kB)
[2K   [90m━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━[0m [32m232.6/232.6 kB[0m [31m7.3 MB/s[0m eta [36m0:00:00[0m
[?25hDownloading gTTS-2.5.4-py3-none-any.whl (29 kB)
Installing collected packages: pytesseract, PyPDF2, pdf2image, gtts
Successfully installed PyPDF2-3.0.1 gtts-2.5.4 pdf2image-1.17.0 pytesseract-0.3.13
Reading package lists... Done
Building dependency tree... Done
Reading state information... Done
The following additional packages will be installed:
  tesseract-ocr-eng tesseract-o

In [2]:
import os
os.environ["ANTHROPIC_API_KEY"] = "API KEY Goes here"



---

pdf version
---



In [4]:
import os
import requests
import pytesseract
from pdf2image import convert_from_path
import PyPDF2
from gtts import gTTS
from IPython.display import Audio, display
import logging

# Configure logging
logging.basicConfig(level=logging.INFO,
                    format='%(asctime)s - %(levelname)s - %(message)s')
logger = logging.getLogger(__name__)

class PDFTextToSpeechConverter:
    def __init__(self, api_key=None):
        """
        Initialize the PDF converter with optional API key.

        Args:
            api_key (str, optional): Anthropic API key.
                                     If not provided, tries to fetch from environment.
        """
        # Secure API key handling
        if api_key:
            self.api_key = api_key
        else:
            self.api_key = os.getenv("ANTHROPIC_API_KEY")

        if not self.api_key:
            logger.error("No API key found. Please set ANTHROPIC_API_KEY environment variable.")
            raise ValueError("API key is required")

    def extract_text_from_pdf(self, pdf_path):
        """
        Extracts text from a PDF file using multiple methods.

        Args:
            pdf_path (str): Path to the PDF file

        Returns:
            str: Extracted text from the PDF
        """
        try:
            text = ""

            # Try extracting text using PyPDF2
            with open(pdf_path, 'rb') as f:
                reader = PyPDF2.PdfReader(f)
                for page in reader.pages:
                    page_text = page.extract_text()
                    if page_text:
                        text += page_text + "\n"

            # If no text extracted, use OCR
            if not text.strip():
                logger.info("No text found. Attempting OCR...")
                images = convert_from_path(pdf_path)
                for image in images:
                    ocr_text = pytesseract.image_to_string(image)
                    text += ocr_text + "\n"

            if not text.strip():
                logger.warning("Could not extract any text from the PDF")

            return text.strip()

        except Exception as e:
            logger.error(f"Error extracting text from PDF: {e}")
            raise

    def process_with_claude(self, prompt, model="claude-3-haiku-20240307"):
        """
        Send prompt to Claude API for processing.

        Args:
            prompt (str): Text to process
            model (str, optional): Claude model to use

        Returns:
            str: Processed text from Claude
        """
        url = "https://api.anthropic.com/v1/messages"
        headers = {
            "Content-Type": "application/json",
            "x-api-key": self.api_key,
            "anthropic-version": "2023-06-01"
        }

        data = {
            "model": model,
            "max_tokens": 1000,
            "messages": [
                {
                    "role": "user",
                    "content": prompt
                }
            ]
        }

        try:
            response = requests.post(url, headers=headers, json=data)
            response.raise_for_status()
            return response.json()["content"][0]["text"]

        except requests.RequestException as e:
            logger.error(f"API Request Error: {e}")
            raise
        except KeyError as e:
            logger.error(f"Response parsing error: {e}")
            raise

    def text_to_speech(self, text, filename="output.mp3", language='en'):
        """
        Convert text to speech and save as an audio file.

        Args:
            text (str): Text to convert to speech
            filename (str, optional): Output audio filename
            language (str, optional): Language for text-to-speech

        Returns:
            str: Path to the generated audio file
        """
        try:
            tts = gTTS(text, lang=language)
            tts.save(filename)
            logger.info(f"Audio saved as {filename}")
            return filename

        except Exception as e:
            logger.error(f"Text-to-speech conversion error: {e}")
            raise

    def play_audio(self, filename):
        """
        Play the audio file in Colab.

        Args:
            filename (str): Path to the audio file
        """
        try:
            display(Audio(filename, autoplay=True))
        except Exception as e:
            logger.error(f"Error playing audio: {e}")

    def process_pdf(self, pdf_path, claude_instruction=None):
        """
        Complete PDF processing workflow.

        Args:
            pdf_path (str): Path to the PDF file
            claude_instruction (str, optional): Custom instruction for Claude

        Returns:
            tuple: Extracted text and processed text
        """
        # Extract text from PDF
        extracted_text = self.extract_text_from_pdf(pdf_path)

        # Prepare Claude prompt
        if not claude_instruction:
            claude_instruction = (
                "Please process this academic/technical document with the following guidelines:\n\n"
                "1. Preserve the original document's structure, tone, and academic rigor\n"
                "2. Maintain the exact meaning and context of the original text\n"
                "3. ONLY convert mathematical and technical expressions into plain, conversational language\n"
                "4. Do NOT modify or simplify non-mathematical text\n"
                "5. When converting mathematical expressions:\n"
                "   - Use clear, verbal descriptions\n"
                "   - Explain symbols and notations\n"
                "   - Maintain the original mathematical intent\n"
                "   - Correct any expressions that are clearly incorrect\n"
                "6. Keep academic terminology where appropriate\n\n"
                "Original document content begins below:\n\n"
            )

        claude_prompt = claude_instruction + extracted_text

        # Process text with Claude
        processed_text = self.process_with_claude(claude_prompt)

        # Convert to speech
        audio_file = self.text_to_speech(processed_text)

        return extracted_text, processed_text, audio_file

def main():
    """
    Main function to demonstrate PDF processing.
    """
    try:
        # Ensure API key is set
        if not os.getenv("ANTHROPIC_API_KEY"):
            logger.error("Please set the ANTHROPIC_API_KEY environment variable")
            return

        from google.colab import files

        # Upload PDF
        logger.info("Please upload a PDF file")
        uploaded = files.upload()

        if not uploaded:
            logger.error("No file uploaded")
            return

        pdf_path = list(uploaded.keys())[0]
        logger.info(f"Processing PDF: {pdf_path}")

        # Initialize converter
        converter = PDFTextToSpeechConverter()

        # Process PDF
        original_text, processed_text, audio_file = converter.process_pdf(pdf_path)

        # Print results
        print("\n--- Original Text ---")
        print(original_text)

        print("\n--- Processed Text ---")
        print(processed_text)

        # Play audio
        converter.play_audio(audio_file)

    except Exception as e:
        logger.error(f"An error occurred: {e}")

if __name__ == "__main__":
    main()

ERROR:__main__:An error occurred: RangeError: Maximum call stack size exceeded.
