feat: batch_upload

RongRongJi · Apr 2, 2023 · 5e8c7d1 · 5e8c7d1
1 parent 3bf14ce
commit 5e8c7d1
Show file tree

Hide file tree

Showing 5 changed files with 169 additions and 3 deletions.
diff --git a/README.md b/README.md
@@ -32,7 +32,7 @@ _✨ QQ群聊 语录库 ✨_
 - [x] 根据关键词投放聊天语录 
 - [x] 支持白名单内用户删除语录
 - [x] 支持为指定语录增删标签
-- [ ] 回复协议更新使回复语录+指令的功能暂时无法正常工作
+- [x] [批量上传已有聊天截图(测试功能)](https://github.com/RongRongJi/nonebot_plugin_quote/blob/main/batch_upload.md)
 
 你的star是对我最好的支持！
 
@@ -46,6 +46,8 @@ _✨ QQ群聊 语录库 ✨_
 
 <img src="https://github.com/RongRongJi/nonebot_plugin_quote/raw/main/screenshot/upload.jpg" width="40%" />
 
+直接回复**结束**，即可终止上传通道。
+
 ### 随机发送语录
 
 @机器人，发送**语录**指令，机器人将从语录库中随机挑选一条语录发送。
@@ -223,6 +225,10 @@ nonebot.load_plugins("src/plugins", "nonebot_plugin_quote")
 - 增加了是否需要at机器人的选项
 - 增加了指令前缀
 
+### v0.3.3 (2023/4/2)
+
+- 增加批量上传语录功能（试验版）
+
 ## 🎉 鸣谢
 
 - [NoneBot2](https://github.com/nonebot/nonebot2)：本插件使用的开发框架。

diff --git a/batch_upload.md b/batch_upload.md
@@ -0,0 +1,40 @@
+## 🎉 如何使用批量导入功能
+
+### 适配版本
+
+v0.3.3+
+
+### 批量导入功能简介
+
+如果您的个人电脑上保存有许多群聊天记录截图，想要直接接入本机器人插件，成为群语录库，**批量导入**可以帮助你实现这一功能。
+
+### 使用方法
+
+1. 批量导入功能只有超级管理员用户才能使用
+
+超级管理员用户需要在 nonebot2 项目的 `.env` 文件中添加配置
+
+```
+GLOBAL_SUPERUSER=["6666666"]
+```
+
+2. 受私聊和群聊API方法不同的限制，该功能只能走**群聊**。建议超级管理员创建一个新的群聊，只拉入机器人，再进行以下操作。
+
+
+3. 在群聊窗口中直接一次性输入下面内容，即可进行开启批量通道。
+
+```
+batch_upload
+qqgroup=123456
+your_path=/home/name/project/data
+gocq_path=/home/name/gocq/data/cache
+tags=aaa bbb ccc
+```
+
+上述内容解释如下:
+
+向群号为123456的qq群批量上传语录。将保存在/home/name/project/data/目录下的所有聊天截图上传，你所使用的go-cqhttp下的data/cache目录为/home/name/gocq/data/cache/。这一批截图除了进行OCR自动识别标签外，还将全部额外附上aaa、bbb、ccc三个标签（每个标签用空格分开）。
+
+### 注意
+
+*该功能目前处于测试阶段，欢迎反馈
diff --git a/nonebot_plugin_quote/__init__.py b/nonebot_plugin_quote/__init__.py
@@ -14,8 +14,10 @@
 import sys
 import os
 from .task import offer, query, delete, handle_ocr_text, inverted2forward, findAlltag, addTag, delTag
+from .task import copy_images_files
 from .config import Config
 from nonebot.log import logger
+import time
 
 
 plugin_config = Config.parse_obj(get_driver().config)
@@ -465,3 +467,95 @@ async def deltag_handle(bot: Bot, event: Event, state: T_State):
 
     await deltag.finish()
 
+
+# script_batch = on_command('{}脚本123'.format(plugin_config.quote_startcmd), **need_at)
+script_batch = on_regex(pattern="^{}batch_upload".format(plugin_config.quote_startcmd), **need_at)
+
+@script_batch.handle()
+async def script_batch_handle(bot: Bot, event: Event, state: T_State):
+
+    global inverted_index
+    global record_dict
+    global forward_index
+
+    session_id = event.get_session_id()
+    user_id = str(event.get_user_id())
+
+    # 必须是超级管理员群聊
+    if user_id not in plugin_config.global_superuser:
+        await script_batch.finish()
+    if 'group' not in session_id:
+        await script_batch.finish('该功能暂不支持私聊')
+
+    groupNum = session_id.split('_')[1]
+
+    rqqid = r"qqgroup=(.*)\s"
+    ryour_path =  r"your_path=(.*)\s"
+    rgocq_path =  r"gocq_path=(.*)\s"
+    rtags =  r"tags=(.*)"
+
+    raw_msg = str(event.get_message())
+    raw_msg = raw_msg.replace('\r','')
+    group_id = re.findall(rqqid, raw_msg)
+    your_path = re.findall(ryour_path, raw_msg)
+    gocq_path = re.findall(rgocq_path, raw_msg)
+    tags = re.findall(rtags, raw_msg)
+    instruction = '''指令如下:
+batch_upload
+qqgroup=123456
+your_path=/home/xxx/images
+gocq_path=/home/xxx/gocq/data/cache
+tags=aaa bbb ccc'''
+    if len(group_id) == 0 or len(your_path) == 0 or len(gocq_path) == 0:
+        await script_batch.finish(instruction)
+    # 获取图片
+    image_files = copy_images_files(your_path[0], gocq_path[0])
+
+    total_len = len(image_files)
+    idx = 0
+
+    for (imgid, img) in image_files:
+        save_file = '../cache/' + img
+        idx += 1
+        msg_id = await bot.send_msg(group_id=int(groupNum), message='[CQ:image,file={}]'.format(save_file))
+        time.sleep(2)
+        if save_file in forward_index[group_id[0]]:
+            await bot.send_msg(group_id=int(groupNum), message='上述图片已存在')
+            continue
+        try:
+            ocr = await bot.ocr_image(image=imgid)
+            ocr_content = handle_ocr_text(ocr['texts'])
+        except exception.ActionFailed:
+            await bot.send_msg(group_id=int(groupNum), message='该图片ocr失败')
+            continue
+
+        time.sleep(1)
+        inverted_index, forward_index = offer(group_id[0], save_file, ocr_content, inverted_index, forward_index)
+        if group_id[0] not in record_dict:
+            record_dict[group_id[0]] = [save_file]
+        else:
+            if save_file not in record_dict[group_id[0]]:
+                record_dict[group_id[0]].append(save_file)
+
+        if len(tags) != 0:
+            tags = tags[0].strip().split(' ')
+            flag, forward_index, inverted_index = addTag(tags, imgid, group_id[0], forward_index, inverted_index)
+
+        # 每5张语录持久化一次
+        if idx % 5 == 0:
+            with open(plugin_config.record_path, 'w', encoding='UTF-8') as f:
+                json.dump(record_dict, f, indent=2, separators=(',', ': '), ensure_ascii=False)
+
+            with open(plugin_config.inverted_index_path, 'w', encoding='UTF-8') as fc:
+                json.dump(inverted_index, fc, indent=2, separators=(',',': '), ensure_ascii=False)
+
+            await bot.send_msg(group_id=int(groupNum), message='当前进度{}/{}'.format(idx, total_len))
+
+    with open(plugin_config.record_path, 'w', encoding='UTF-8') as f:
+        json.dump(record_dict, f, indent=2, separators=(',', ': '), ensure_ascii=False)
+
+    with open(plugin_config.inverted_index_path, 'w', encoding='UTF-8') as fc:
+        json.dump(inverted_index, fc, indent=2, separators=(',',': '), ensure_ascii=False)
+
+    await bot.send_msg(group_id=int(groupNum), message='批量导入完成')
+    await script_batch.finish()
diff --git a/nonebot_plugin_quote/task.py b/nonebot_plugin_quote/task.py
@@ -2,6 +2,8 @@
 import jieba
 import os
 import random
+import hashlib
+import shutil
 
 
 # 向语录库添加新的图片
@@ -186,4 +188,28 @@ def delTag(tags, img_name, group_id, forward_index, inverted_index):
             if len(inverted_index[group_id][tag]) == 0:
                 del inverted_index[group_id][tag]
     return path, forward_index, inverted_index
-
+
+
+IMAGE_EXTENSIONS = ['.jpg', '.jpeg', '.png', '.gif']
+def copy_images_files(source, destinate):
+    image_files = []
+    for root,_,files in os.walk(source):
+        for filename in files:
+            extension = os.path.splitext(filename)[1].lower()
+            if extension in IMAGE_EXTENSIONS:
+                image_path = os.path.join(root, filename)
+                # 获得md5
+                md5 = get_img_md5(image_path) + '.image'
+                tname = md5 + extension
+                # 复制到目录
+                destination_path = os.path.join(destinate, tname)
+                shutil.copy(image_path, destination_path)
+                image_files.append((md5, tname))
+    return image_files
+
+
+def get_img_md5(img_path):
+    with open(img_path, 'rb') as f:
+        img_data = f.read()
+    md5 = hashlib.md5(img_data).hexdigest()
+    return md5
diff --git a/pyproject.toml b/pyproject.toml
@@ -1,6 +1,6 @@
 [tool.poetry]
 name = "nonebot-plugin-quote"
-version = "0.3.2"
+version = "0.3.3"
 description = "一款适用于QQ群聊天的语录库插件"
 authors = ["RongRongJi <316315867@qq.com>"]
 license = "MIT"