In [1]:
import base64
import io
import urllib
import requests
import gradio as gr
from urllib.parse import urlencode
API_KEY = "pS3lZ4yHm"
SECRET_KEY = "XcZX11De"

def get_access_token():
    """
    使用 AK，SK 生成鉴权签名（Access Token）
    :return: access_token，或是None(如果错误)
    """
    url = "https://aip.baidubce.com/oauth/2.0/token"
    params = {"grant_type": "client_credentials", "client_id": API_KEY, "client_secret": SECRET_KEY}
    return str(requests.post(url, params=params).json().get("access_token"))

## Gradio网页版

In [None]:
def main(image):
    url = "https://aip.baidubce.com/rest/2.0/ocr/v1/doc_analysis_office?access_token=" + get_access_token()

    # image 可以通过 get_file_content_as_base64("C:\fakepath\手写字1.jpg",True) 方法获取
    image_base64 = get_file_content_as_base64(image, True)

    payload = 'image=' + image_base64 + ('&detect_direction=true&line_probability=false&disp_line_poly=false'
                                         '&layout_analysis=false&recg_tables=true&recog_seal=true'
                                         '&recg_formula=true&erase_seal=true&disp_underline_analysis=true')

    headers = {
        'Content-Type': 'application/x-www-form-urlencoded',
        'Accept': 'application/json'
    }

    response = requests.request("POST", url, headers=headers, data=payload)
    word_text = response.json()
    wordlist = []
    for item in word_text['results']:
        wordlist += item['words']['word']
    complete_sentence = ''.join(wordlist)
    return complete_sentence


def get_file_content_as_base64(image, urlencoded=False):
    buffered = io.BytesIO()
    image.save(buffered, format='JPEG')
    content = base64.b64encode(buffered.getvalue()).decode("utf8")
    if urlencoded:
        content = urllib.parse.quote_plus(content)
    return content




# 创建Gradio界面
iface = gr.Interface(
    fn=main,
    inputs=gr.Image(type='pil', label='上传图片'),
    outputs='text',
    title='文档扫描'
)
iface.launch(share=True)


## 启用网站，提取文字

In [21]:
import requests
from PIL import Image, ImageDraw, ImageFont
import json
import gradio as gr

def get_path_base64(path, urlencoded=False):
    """
    获取文件base64编码
    :param path: 文件路径
    :param urlencoded: 是否对结果进行urlencoded 
    :return: base64编码信息
    """
    with open(path, "rb") as f:
        content = base64.b64encode(f.read()).decode("utf8")
        if urlencoded:
            content = urllib.parse.quote_plus(content)
    return content


def word_main(path):
    url = "https://aip.baidubce.com/rest/2.0/ocr/v1/doc_analysis_office?access_token=" + get_access_token()

    
    # 获取图像的base64
    base64_data = wenxin_get_path_base64(path, False)
    
    # 参数设定
    payload = {
        'image':base64_data,
        # 是否检测图像朝向.
        'detect_direction':'true',
        # 是否返回每行识别结果的置信度
        'line_probability':'false',
        # 是否返回每行的四角点坐标。
        'disp_line_poly':'true',
        # 文字类型，手写印刷混排识别。
        'words_type':'handprint_mix',
        # 是否分析文档版面
        'layout_analysis':'false',
        # 是否识别并输出表格相关信息，包括单元格内容。
        'recg_tables':'true',
        # 是否识别并输出印章信息
        'recog_seal':'false',
        # 是否识别公式，公式以Latex格式返回
        'recg_formula':'false',
        # 是否擦除水印
        'erase_seal':'false',
        # 是否识别并输出下划线
        'disp_underline_analysis':'false'}
    encode_payload = urlencode(payload)
    
    headers = {
        'Content-Type': 'application/x-www-form-urlencoded',
        'Accept': 'application/json'
    }
    
    response = requests.request("POST", url, headers=headers, data=encode_payload)
    # 检查请求是否成功
    response.raise_for_status()
    # 解析JSON响应
    word_data = response.json()
    word_boxes = [(item['words']['word'], item['words']['poly_location']) for item in word_data['results']]
    return word_boxes[0]
    
    # word_data = response.json()
    # word_list = ''
    # for item in word_data['results']:
    #     word_list += item['words']['word']
    # convert_data = ''.join(word_list)
    # return convert_data

# 
def draw_words_on_image(image_path, word_boxes):
    # 打开图像
    image = Image.open(image_path)
    draw = ImageDraw.Draw(image)
    
    # 设置字体大小
    font = ImageFont.truetype("simsun.ttc", 12)  # 确保字体文件路径正确

    # 在图像上绘制单词
    for word, box in word_boxes:
        # 获取边界框的左上角
        top_left = box[0]
        # 绘制文本
        draw.text((top_left['x'], top_left['y']), word, font=font, fill=(0, 0, 0))
    
    # 返回处理后的图像
    return image

def process_image(image):
    # 将 PIL 图像保存到临时文件
    image_path = "temp_image.png"
    image.save(image_path)
    
    # 使用 word_main 函数处理图片
    word_boxes = word_main(image_path)

    # 在原图上绘制识别出的文字并获取修改后的图像
    processed_image = draw_words_on_image(image_path, word_boxes)

    # 返回处理后的图像
    return processed_image


# 创建 Gradio 界面
iface = gr.Interface(
    fn=process_image,
    inputs=gr.Image(type="pil"),
    outputs=gr.Image(type="pil"),
    title="OCR 文字识别",
    description="上传一张图片，识别图片中的文字并在相同位置上显示识别出的文字。"
)

# 启动 Gradio 网页界面
iface.launch(share=True)

Running on local URL:  http://127.0.0.1:7862
Running on public URL: https://64e1606e4a5ec1375d.gradio.live

This share link expires in 72 hours. For free permanent hosting and GPU upgrades, run `gradio deploy` from Terminal to deploy to Spaces (https://huggingface.co/spaces)




## 调用文心一言 概括文章内容，实现流式输出

In [73]:
import time
import requests
from PIL import Image, ImageDraw, ImageFont
import json
import gradio as gr

# 填写
API_KEY_wenxin = "vxQOoOXPpMIM66yaaGhDd69Y"
SECRET_KEY_wenxin = "Nbt4lY5KCMaDp0F3oe8wsEt2Gketx8rP"



def wenxin_get_path_base64(image, urlencoded=False):
    buffered = io.BytesIO()
    image.save(buffered, format="JPEG")
    content = base64.b64encode(buffered.getvalue()).decode("utf8")
    if urlencoded:
        content = urllib.parse.quote_plus(content)
    return content


def wenxin_word_main(image):
    url = "https://aip.baidubce.com/rest/2.0/ocr/v1/doc_analysis_office?access_token=" + get_access_token()


    # 获取图像的base64
    base64_data = wenxin_get_path_base64(image, False)

    # 参数设定
    payload = {
        'image':base64_data,
        # 是否检测图像朝向.
        'detect_direction':'true',
        # 是否返回每行识别结果的置信度
        'line_probability':'false',
        # 是否返回每行的四角点坐标。
        'disp_line_poly':'true',
        # 文字类型，手写印刷混排识别。
        'words_type':'handprint_mix',
        # 是否分析文档版面
        'layout_analysis':'false',
        # 是否识别并输出表格相关信息，包括单元格内容。
        'recg_tables':'true',
        # 是否识别并输出印章信息
        'recog_seal':'false',
        # 是否识别公式，公式以Latex格式返回
        'recg_formula':'false',
        # 是否擦除水印
        'erase_seal':'false',
        # 是否识别并输出下划线
        'disp_underline_analysis':'false'}
    encode_payload = urlencode(payload)

    headers = {
        'Content-Type': 'application/x-www-form-urlencoded',
        'Accept': 'application/json'
    }

    response = requests.request("POST", url, headers=headers, data=encode_payload)
    # 检查请求是否成功
    response.raise_for_status()
    # 解析JSON响应
    word_data = response.json()
    word_boxes = []
    for item in word_data["results"]:
        word_boxes += item['words']['word']
    complete_sentence = ''.join(word_boxes)
    return complete_sentence


def get_access_token_wenxin():
    """
    使用 AK，SK 生成鉴权签名（Access Token）
    :return: access_token，或是None(如果错误)
    """
    url = "https://aip.baidubce.com/oauth/2.0/token"
    params = {"grant_type": "client_credentials", "client_id": API_KEY_wenxin, "client_secret": SECRET_KEY_wenxin}
    return str(requests.post(url, params=params).json().get("access_token"))



def main_wenxin(image):

    url = "https://aip.baidubce.com/rpc/2.0/ai_custom/v1/wenxinworkshop/chat/completions_pro?access_token=" + get_access_token_wenxin()

    payload = json.dumps({
        "messages": [
            {
                "role": "user",
                "content": wenxin_word_main(image)
            }
        ],
        "system":'根据我给你的内容，概括总结',
        "stream": True
    })
    headers = {
        'Content-Type': 'application/json'
    }

    response = requests.request("POST", url, headers=headers, data=payload, stream=True)
    
    # # wenxin_response = response.json()
    # for line in response.iter_lines():
    #     if line:
    #         json_str = line.decode('utf-8').split('data: ', 1)[-1]
    # 
    #         try:
    #             json_data = json.loads(json_str)
    #             result = json_data.get('result', '')
    #             if result:
    #                 yield result  # 一次性输出整个字符串
    #         except json.JSONDecodeError as e:
    #             yield "JSON 解析错误: " + str(e)
    accumulated_text = ""  # 用于累积接收到的文本
    for line in response.iter_lines():
        if line:
            json_str = line.decode('utf-8').split('data: ', 1)[-1]
            # 加载json格式
            json_data = json.loads(json_str)
            result = json_data.get('result', '')
                # 将新文本添加到累积文本中
            accumulated_text += result + "\n"
            yield accumulated_text 
# # 流式输出成功输出
#     all_text = ""  # 用于累积所有文本
#     for line in response.iter_lines():
#         if line:
#             json_str = line.decode('utf-8').split('data: ', 1)[-1]
#             # 加载json格式
#             json_data = json.loads(json_str)
#             result = json_data.get('result', '')
#             if result:
#                 all_text += result + "\n"  # 将每一行的结果添加到累积文本中
#         yield all_text
iface = gr.Interface(
    fn=main_wenxin,
    inputs=gr.Image(type="pil", label="上传图片"),
    outputs=gr.Textbox(lines=10),  # 自适应大小的文本框
    title="概括文章内容"
)

iface.launch(share=True)

Running on local URL:  http://127.0.0.1:7877
Running on public URL: https://b34ec4a05bb18e2690.gradio.live

This share link expires in 72 hours. For free permanent hosting and GPU upgrades, run `gradio deploy` from Terminal to deploy to Spaces (https://huggingface.co/spaces)




1.场景：问路prompt:你是A/公司制作的A/助手小艺。你是一名英语老师正在教我在特定的场景下进行对话，现在请你扮演向导与我对话，请你同时使用英文和中文对我的问题进行解答。2.场景：入住酒店prompt:你是A/公司制作的A/助手小艺。你是一名英语老师正在教我在特定的场景下进行对话，现在请你扮演一名酒店的前台与我对话，请你同时使用英文和中文与我交流以帮助我提高英语水平。在交流的过程中如果询问了你的身份，请你说你是一名酒店前台。你的每次输出都必须要包含英文。示例：0
