In [1]:
import yaml

class ConfigLoader:
    def __init__(self, config_path):
        self.config_path = config_path

    def load_config(self):
        with open(self.config_path, "r") as f:
            config = yaml.safe_load(f)
        return config

import argparse

class ArgumentParser:
    def __init__(self):
        self.parser = argparse.ArgumentParser(description='Translate English PDF book to Chinese.')
        self.parser.add_argument('--config', type=str, default='config.yaml', help='Configuration file with model and API settings.')
        self.parser.add_argument('--book', type=str, help='PDF file to translate.')
        self.parser.add_argument('--model_url', type=str, help='The URL of the translation model API.')
        self.parser.add_argument('--timeout', type=int, help='Timeout for the API request in seconds.')

    def parse_arguments(self):
        args = self.parser.parse_args()
        return args

class Environment:
    @staticmethod
    def is_jupyter():
        try:
            get_ipython()
            return True
        except NameError:
            return False



In [2]:
from enum import Enum, auto

class ModelType(Enum):
    ChatGLM = auto()
    GPT_3 = auto()
    GPT_3_5 = auto()

class Model:
    def __init__(self, model_url, timeout, model_type=ModelType.ChatGLM):
        self.model_url = model_url
        self.timeout = timeout
        self.model_type = model_type


In [3]:
class TranslatorException:
    def __init__(self, e):
        self.exception = e

    def handle_exception(self):
        if isinstance(self.exception, requests.exceptions.RequestException):
            return f"请求异常：{self.exception}"
        elif isinstance(self.exception, requests.exceptions.Timeout):
            return f"请求超时：{self.exception}"
        elif isinstance(self.exception, simplejson.errors.JSONDecodeError):
            return "Error: response is not valid JSON format."
        else:
            return f"发生了未知错误：{self.exception}"
        
        
class PageOutOfRangeException(Exception):
    def __init__(self, book_pages, requested_pages):
        self.book_pages = book_pages
        self.requested_pages = requested_pages
        super().__init__(f"Page out of range: Book has {book_pages} pages, but {requested_pages} pages were requested.")


In [11]:
import pdfplumber
from PyPDF2 import PdfReader
import requests
import os
import sys

class PDFTranslator:
    def __init__(self, model):
        self.model = model
        self.book_contents = []
        self.translated_contents = []
        self.translate_status = []
        self.success_rate = 0
        self.headers = {
            'Content-Type': 'application/json;charset=utf-8'
        }

    def parse_pdf(self, book, pages=None):
        with pdfplumber.open(book) as pdf:
            if pages is not None and pages > len(pdf.pages):
                raise PageOutOfRangeException(len(pdf.pages), pages)

            if pages is None:
                pages_to_parse = pdf.pages
            else:
                pages_to_parse = pdf.pages[:pages]

            for page in pages_to_parse:
                text = page.extract_text()
                self.book_contents.append(text)


    def handle_translation_response(self, response):
        try:
            response.raise_for_status()
            response_dict = response.json()
            translation = response_dict["response"]
            self.translated_contents.append(translation)
            self.translate_status.append(1)
        except Exception as e:
            exception_handler = TranslatorException(e)
            error_message = exception_handler.handle_exception()
            print(error_message, file=sys.stderr)
            self.translated_contents.append("[翻译失败]")
            self.translate_status.append(0)

    def translate_contents(self, book):
        successful_requests = 0
        total_requests = len(self.book_contents)

        with open(f"{book[:-4]}_analyzed.txt", "w", encoding="utf-8") as f:
            for i, text in enumerate(self.book_contents):
                prompt = f"翻译为中文：{text}"
                data = {
                    "prompt": prompt,
                    "history": []
                }
                response = requests.post(self.model.model_url, json=data, timeout=self.model.timeout)
                self.handle_translation_response(response)
                f.write(self.translated_contents[-1])

    def calculate_success_rate(self):
        successful_requests = sum(self.translate_status)
        total_requests = len(self.translate_status)
        self.success_rate = successful_requests / total_requests * 100
    
    def translate_pdf(self, book, pages=None):
        self.parse_pdf(book, pages)
        self.translate_contents(book)
        self.calculate_success_rate()

In [12]:
# 使用示例
environment = Environment()

args = None
if not environment.is_jupyter():
    argument_parser = ArgumentParser()
    args = argument_parser.parse_arguments()
    config_loader = ConfigLoader(args.config)
else:
    config_loader = ConfigLoader('config.yaml')

config = config_loader.load_config()
book = args.book if hasattr(args, 'book') and args.book else config['book']
model_url = args.model_url if hasattr(args, 'model_url') and args.model_url else config['model_url']
timeout = args.timeout if hasattr(args, 'timeout') and args.timeout else config['timeout']

model = Model(model_url=model_url, timeout=timeout)

# 实例化 PDFTranslator 类，并调用 translate_pdf() 方法
translator = PDFTranslator(model)
translator.translate_pdf(book, pages=1)

print("翻译完成！")
print("翻译成功率：", round(translator.success_rate, 4))
print("翻译状态：", translator.translate_status)

翻译完成！
翻译成功率： 100.0
翻译状态： [1]
