diff --git a/README.md b/README.md index 3a27a3d..64fba27 100644 --- a/README.md +++ b/README.md @@ -32,7 +32,7 @@ _✨ QQ群聊 语录库 ✨_ - [x] 根据关键词投放聊天语录 - [x] 支持白名单内用户删除语录 - [x] 支持为指定语录增删标签 -- [ ] 回复协议更新使回复语录+指令的功能暂时无法正常工作 +- [x] [批量上传已有聊天截图(测试功能)](https://github.com/RongRongJi/nonebot_plugin_quote/blob/main/batch_upload.md) 你的star是对我最好的支持! @@ -46,6 +46,8 @@ _✨ QQ群聊 语录库 ✨_ +直接回复**结束**,即可终止上传通道。 + ### 随机发送语录 @机器人,发送**语录**指令,机器人将从语录库中随机挑选一条语录发送。 @@ -223,6 +225,10 @@ nonebot.load_plugins("src/plugins", "nonebot_plugin_quote") - 增加了是否需要at机器人的选项 - 增加了指令前缀 +### v0.3.3 (2023/4/2) + +- 增加批量上传语录功能(试验版) + ## 🎉 鸣谢 - [NoneBot2](https://github.com/nonebot/nonebot2):本插件使用的开发框架。 diff --git a/batch_upload.md b/batch_upload.md new file mode 100644 index 0000000..bede90d --- /dev/null +++ b/batch_upload.md @@ -0,0 +1,40 @@ +## 🎉 如何使用批量导入功能 + +### 适配版本 + +v0.3.3+ + +### 批量导入功能简介 + +如果您的个人电脑上保存有许多群聊天记录截图,想要直接接入本机器人插件,成为群语录库,**批量导入**可以帮助你实现这一功能。 + +### 使用方法 + +1. 批量导入功能只有超级管理员用户才能使用 + +超级管理员用户需要在 nonebot2 项目的 `.env` 文件中添加配置 + +``` +GLOBAL_SUPERUSER=["6666666"] +``` + +2. 受私聊和群聊API方法不同的限制,该功能只能走**群聊**。建议超级管理员创建一个新的群聊,只拉入机器人,再进行以下操作。 + + +3. 在群聊窗口中直接一次性输入下面内容,即可进行开启批量通道。 + +``` +batch_upload +qqgroup=123456 +your_path=/home/name/project/data +gocq_path=/home/name/gocq/data/cache +tags=aaa bbb ccc +``` + +上述内容解释如下: + +向群号为123456的qq群批量上传语录。将保存在/home/name/project/data/目录下的所有聊天截图上传,你所使用的go-cqhttp下的data/cache目录为/home/name/gocq/data/cache/。这一批截图除了进行OCR自动识别标签外,还将全部额外附上aaa、bbb、ccc三个标签(每个标签用空格分开)。 + +### 注意 + +*该功能目前处于测试阶段,欢迎反馈 \ No newline at end of file diff --git a/nonebot_plugin_quote/__init__.py b/nonebot_plugin_quote/__init__.py index 548e463..9adb09f 100644 --- a/nonebot_plugin_quote/__init__.py +++ b/nonebot_plugin_quote/__init__.py @@ -14,8 +14,10 @@ import sys import os from .task import offer, query, delete, handle_ocr_text, inverted2forward, findAlltag, addTag, delTag +from .task import copy_images_files from .config import Config from nonebot.log import logger +import time plugin_config = Config.parse_obj(get_driver().config) @@ -465,3 +467,95 @@ async def deltag_handle(bot: Bot, event: Event, state: T_State): await deltag.finish() + +# script_batch = on_command('{}脚本123'.format(plugin_config.quote_startcmd), **need_at) +script_batch = on_regex(pattern="^{}batch_upload".format(plugin_config.quote_startcmd), **need_at) + +@script_batch.handle() +async def script_batch_handle(bot: Bot, event: Event, state: T_State): + + global inverted_index + global record_dict + global forward_index + + session_id = event.get_session_id() + user_id = str(event.get_user_id()) + + # 必须是超级管理员群聊 + if user_id not in plugin_config.global_superuser: + await script_batch.finish() + if 'group' not in session_id: + await script_batch.finish('该功能暂不支持私聊') + + groupNum = session_id.split('_')[1] + + rqqid = r"qqgroup=(.*)\s" + ryour_path = r"your_path=(.*)\s" + rgocq_path = r"gocq_path=(.*)\s" + rtags = r"tags=(.*)" + + raw_msg = str(event.get_message()) + raw_msg = raw_msg.replace('\r','') + group_id = re.findall(rqqid, raw_msg) + your_path = re.findall(ryour_path, raw_msg) + gocq_path = re.findall(rgocq_path, raw_msg) + tags = re.findall(rtags, raw_msg) + instruction = '''指令如下: +batch_upload +qqgroup=123456 +your_path=/home/xxx/images +gocq_path=/home/xxx/gocq/data/cache +tags=aaa bbb ccc''' + if len(group_id) == 0 or len(your_path) == 0 or len(gocq_path) == 0: + await script_batch.finish(instruction) + # 获取图片 + image_files = copy_images_files(your_path[0], gocq_path[0]) + + total_len = len(image_files) + idx = 0 + + for (imgid, img) in image_files: + save_file = '../cache/' + img + idx += 1 + msg_id = await bot.send_msg(group_id=int(groupNum), message='[CQ:image,file={}]'.format(save_file)) + time.sleep(2) + if save_file in forward_index[group_id[0]]: + await bot.send_msg(group_id=int(groupNum), message='上述图片已存在') + continue + try: + ocr = await bot.ocr_image(image=imgid) + ocr_content = handle_ocr_text(ocr['texts']) + except exception.ActionFailed: + await bot.send_msg(group_id=int(groupNum), message='该图片ocr失败') + continue + + time.sleep(1) + inverted_index, forward_index = offer(group_id[0], save_file, ocr_content, inverted_index, forward_index) + if group_id[0] not in record_dict: + record_dict[group_id[0]] = [save_file] + else: + if save_file not in record_dict[group_id[0]]: + record_dict[group_id[0]].append(save_file) + + if len(tags) != 0: + tags = tags[0].strip().split(' ') + flag, forward_index, inverted_index = addTag(tags, imgid, group_id[0], forward_index, inverted_index) + + # 每5张语录持久化一次 + if idx % 5 == 0: + with open(plugin_config.record_path, 'w', encoding='UTF-8') as f: + json.dump(record_dict, f, indent=2, separators=(',', ': '), ensure_ascii=False) + + with open(plugin_config.inverted_index_path, 'w', encoding='UTF-8') as fc: + json.dump(inverted_index, fc, indent=2, separators=(',',': '), ensure_ascii=False) + + await bot.send_msg(group_id=int(groupNum), message='当前进度{}/{}'.format(idx, total_len)) + + with open(plugin_config.record_path, 'w', encoding='UTF-8') as f: + json.dump(record_dict, f, indent=2, separators=(',', ': '), ensure_ascii=False) + + with open(plugin_config.inverted_index_path, 'w', encoding='UTF-8') as fc: + json.dump(inverted_index, fc, indent=2, separators=(',',': '), ensure_ascii=False) + + await bot.send_msg(group_id=int(groupNum), message='批量导入完成') + await script_batch.finish() diff --git a/nonebot_plugin_quote/task.py b/nonebot_plugin_quote/task.py index 6943543..69a7154 100644 --- a/nonebot_plugin_quote/task.py +++ b/nonebot_plugin_quote/task.py @@ -2,6 +2,8 @@ import jieba import os import random +import hashlib +import shutil # 向语录库添加新的图片 @@ -186,4 +188,28 @@ def delTag(tags, img_name, group_id, forward_index, inverted_index): if len(inverted_index[group_id][tag]) == 0: del inverted_index[group_id][tag] return path, forward_index, inverted_index - \ No newline at end of file + + +IMAGE_EXTENSIONS = ['.jpg', '.jpeg', '.png', '.gif'] +def copy_images_files(source, destinate): + image_files = [] + for root,_,files in os.walk(source): + for filename in files: + extension = os.path.splitext(filename)[1].lower() + if extension in IMAGE_EXTENSIONS: + image_path = os.path.join(root, filename) + # 获得md5 + md5 = get_img_md5(image_path) + '.image' + tname = md5 + extension + # 复制到目录 + destination_path = os.path.join(destinate, tname) + shutil.copy(image_path, destination_path) + image_files.append((md5, tname)) + return image_files + + +def get_img_md5(img_path): + with open(img_path, 'rb') as f: + img_data = f.read() + md5 = hashlib.md5(img_data).hexdigest() + return md5 \ No newline at end of file diff --git a/pyproject.toml b/pyproject.toml index 9e8f969..e3f71ce 100644 --- a/pyproject.toml +++ b/pyproject.toml @@ -1,6 +1,6 @@ [tool.poetry] name = "nonebot-plugin-quote" -version = "0.3.2" +version = "0.3.3" description = "一款适用于QQ群聊天的语录库插件" authors = ["RongRongJi <316315867@qq.com>"] license = "MIT"