Skip to content
This repository has been archived by the owner on Jun 9, 2024. It is now read-only.

Commit

Permalink
PDF reader integration
Browse files Browse the repository at this point in the history
  • Loading branch information
ajmtrz committed May 3, 2023
1 parent 379bd8d commit 5b8c3c9
Show file tree
Hide file tree
Showing 2 changed files with 255 additions and 0 deletions.
206 changes: 206 additions & 0 deletions src/autogpt_plugins/pdf_reader/__init__.py
Original file line number Diff line number Diff line change
@@ -0,0 +1,206 @@
"""PDF reader integration."""
from typing import Any, Dict, List, Optional, Tuple, TypedDict, TypeVar
from auto_gpt_plugin_template import AutoGPTPluginTemplate
from .pdf_reader import read_pdf

PromptGenerator = TypeVar("PromptGenerator")

class Message(TypedDict):
role: str
content: str

class AutoGPTPDFReader(AutoGPTPluginTemplate):
"""
PDF Reader integration for Auto-GPT
"""

def __init__(self):
super().__init__()
self._name = "autogpt-pdf-reader"
self._version = "0.1.0"
self._description = "PDF Reader integration for Auto-GPT."
def can_handle_on_response(self) -> bool:
"""This method is called to check that the plugin can
handle the on_response method.
Returns:
bool: True if the plugin can handle the on_response method."""
return False

def on_response(self, response: str, *args, **kwargs) -> str:
"""This method is called when a response is received from the model."""
pass

def can_handle_post_prompt(self) -> bool:
"""This method is called to check that the plugin can
handle the post_prompt method.
Returns:
bool: True if the plugin can handle the post_prompt method."""
return True

def can_handle_on_planning(self) -> bool:
"""This method is called to check that the plugin can
handle the on_planning method.
Returns:
bool: True if the plugin can handle the on_planning method."""
return False

def on_planning(
self, prompt: PromptGenerator, messages: List[str]
) -> Optional[str]:
"""This method is called before the planning chat completeion is done.
Args:
prompt (PromptGenerator): The prompt generator.
messages (List[str]): The list of messages.
"""
pass

def can_handle_post_planning(self) -> bool:
"""This method is called to check that the plugin can
handle the post_planning method.
Returns:
bool: True if the plugin can handle the post_planning method."""
return False

def post_planning(self, response: str) -> str:
"""This method is called after the planning chat completeion is done.
Args:
response (str): The response.
Returns:
str: The resulting response.
"""
pass

def can_handle_pre_instruction(self) -> bool:
"""This method is called to check that the plugin can
handle the pre_instruction method.
Returns:
bool: True if the plugin can handle the pre_instruction method."""
return False

def pre_instruction(self, messages: List[str]) -> List[str]:
"""This method is called before the instruction chat is done.
Args:
messages (List[str]): The list of context messages.
Returns:
List[str]: The resulting list of messages.
"""
pass

def can_handle_on_instruction(self) -> bool:
"""This method is called to check that the plugin can
handle the on_instruction method.
Returns:
bool: True if the plugin can handle the on_instruction method."""
return False

def on_instruction(self, messages: List[str]) -> Optional[str]:
"""This method is called when the instruction chat is done.
Args:
messages (List[str]): The list of context messages.
Returns:
Optional[str]: The resulting message.
"""
pass

def can_handle_post_instruction(self) -> bool:
"""This method is called to check that the plugin can
handle the post_instruction method.
Returns:
bool: True if the plugin can handle the post_instruction method."""
return False

def post_instruction(self, response: str) -> str:
"""This method is called after the instruction chat is done.
Args:
response (str): The response.
Returns:
str: The resulting response.
"""
pass

def can_handle_pre_command(self) -> bool:
"""This method is called to check that the plugin can
handle the pre_command method.
Returns:
bool: True if the plugin can handle the pre_command method."""
return False

def pre_command(
self, command_name: str, arguments: Dict[str, Any]
) -> Tuple[str, Dict[str, Any]]:
"""This method is called before the command is executed.
Args:
command_name (str): The command name.
arguments (Dict[str, Any]): The arguments.
Returns:
Tuple[str, Dict[str, Any]]: The command name and the arguments.
"""
pass

def can_handle_post_command(self) -> bool:
"""This method is called to check that the plugin can
handle the post_command method.
Returns:
bool: True if the plugin can handle the post_command method."""
return False

def post_command(self, command_name: str, response: str) -> str:
"""This method is called after the command is executed.
Args:
command_name (str): The command name.
response (str): The response.
Returns:
str: The resulting response.
"""
pass

def can_handle_chat_completion(
self,
messages: list[Dict[Any, Any]],
model: str,
temperature: float,
max_tokens: int,
) -> bool:
"""This method is called to check that the plugin can
handle the chat_completion method.
Args:
messages (Dict[Any, Any]): The messages.
model (str): The model name.
temperature (float): The temperature.
max_tokens (int): The max tokens.
Returns:
bool: True if the plugin can handle the chat_completion method."""
return False

def handle_chat_completion(
self,
messages: list[Dict[Any, Any]],
model: str,
temperature: float,
max_tokens: int,
) -> str:
"""This method is called when the chat completion is done.
Args:
messages (Dict[Any, Any]): The messages.
model (str): The model name.
temperature (float): The temperature.
max_tokens (int): The max tokens.
Returns:
str: The resulting response.
"""
return None

def post_prompt(self, prompt: PromptGenerator) -> PromptGenerator:
"""This method is called just after the generate_prompt is called,
but actually before the prompt is generated.
Args:
prompt (PromptGenerator): The prompt generator.
Returns:
PromptGenerator: The prompt generator.
"""

prompt.add_command(
"read_pdf", "URL or full local file path of the PDF file", {"pdf_path": "<pdf_path>"}, read_pdf
)

return prompt
49 changes: 49 additions & 0 deletions src/autogpt_plugins/pdf_reader/pdf_reader.py
Original file line number Diff line number Diff line change
@@ -0,0 +1,49 @@
import os
import requests
import magic
from io import BytesIO
import PyPDF2

def read_pdf(pdf_path: str) -> str:
"""Handles the read_pdf command for the plugin.
Args:
file (str): URL or full local file path of the PDF file.
Returns:
str: The contents of the PDF file.
"""
try:
is_local_file = os.path.isfile(pdf_path)

if not is_local_file and not url.startswith("http"):
return "Invalid URL or full local file path of the PDF file. " \
"Please ensure to provide the complete file path or valid URL."

if is_local_file:
with open(pdf_path, "rb") as pdf_content:
content = pdf_content.read()
else:
response = requests.get(pdf_path)

if response.status_code != 200:
return "Error downloading the PDF file. Please check the URL and try again."

content = response.content

# Use python-magic to determine the file type based on content
file_type = magic.from_buffer(content, mime=True)

if file_type != 'application/pdf':
return "Invalid file format. Only PDF files are supported."

with BytesIO(content) as pdf_content:
pdf_reader = PyPDF2.PdfReader(pdf_content)
pdf_content = ""
for page_num in range(len(pdf_reader.pages)):
pdf_content += pdf_reader.pages[page_num].extract_text()

return pdf_content

except Exception as e:
return f"Error reading the PDF file: {str(e)}"

0 comments on commit 5b8c3c9

Please sign in to comment.