Skip to content

Commit

Permalink
feat: batch_upload
Browse files Browse the repository at this point in the history
  • Loading branch information
RongRongJi committed Apr 2, 2023
1 parent 3bf14ce commit 5e8c7d1
Show file tree
Hide file tree
Showing 5 changed files with 169 additions and 3 deletions.
8 changes: 7 additions & 1 deletion README.md
Original file line number Diff line number Diff line change
Expand Up @@ -32,7 +32,7 @@ _✨ QQ群聊 语录库 ✨_
- [x] 根据关键词投放聊天语录
- [x] 支持白名单内用户删除语录
- [x] 支持为指定语录增删标签
- [ ] 回复协议更新使回复语录+指令的功能暂时无法正常工作
- [x] [批量上传已有聊天截图(测试功能)](https://github.com/RongRongJi/nonebot_plugin_quote/blob/main/batch_upload.md)

你的star是对我最好的支持!

Expand All @@ -46,6 +46,8 @@ _✨ QQ群聊 语录库 ✨_

<img src="https://github.com/RongRongJi/nonebot_plugin_quote/raw/main/screenshot/upload.jpg" width="40%" />

直接回复**结束**,即可终止上传通道。

### 随机发送语录

@机器人,发送**语录**指令,机器人将从语录库中随机挑选一条语录发送。
Expand Down Expand Up @@ -223,6 +225,10 @@ nonebot.load_plugins("src/plugins", "nonebot_plugin_quote")
- 增加了是否需要at机器人的选项
- 增加了指令前缀

### v0.3.3 (2023/4/2)

- 增加批量上传语录功能(试验版)

## 🎉 鸣谢

- [NoneBot2](https://github.com/nonebot/nonebot2):本插件使用的开发框架。
Expand Down
40 changes: 40 additions & 0 deletions batch_upload.md
Original file line number Diff line number Diff line change
@@ -0,0 +1,40 @@
## 🎉 如何使用批量导入功能

### 适配版本

v0.3.3+

### 批量导入功能简介

如果您的个人电脑上保存有许多群聊天记录截图,想要直接接入本机器人插件,成为群语录库,**批量导入**可以帮助你实现这一功能。

### 使用方法

1. 批量导入功能只有超级管理员用户才能使用

超级管理员用户需要在 nonebot2 项目的 `.env` 文件中添加配置

```
GLOBAL_SUPERUSER=["6666666"]
```

2. 受私聊和群聊API方法不同的限制,该功能只能走**群聊**。建议超级管理员创建一个新的群聊,只拉入机器人,再进行以下操作。


3. 在群聊窗口中直接一次性输入下面内容,即可进行开启批量通道。

```
batch_upload
qqgroup=123456
your_path=/home/name/project/data
gocq_path=/home/name/gocq/data/cache
tags=aaa bbb ccc
```

上述内容解释如下:

向群号为123456的qq群批量上传语录。将保存在/home/name/project/data/目录下的所有聊天截图上传,你所使用的go-cqhttp下的data/cache目录为/home/name/gocq/data/cache/。这一批截图除了进行OCR自动识别标签外,还将全部额外附上aaa、bbb、ccc三个标签(每个标签用空格分开)。

### 注意

*该功能目前处于测试阶段,欢迎反馈
94 changes: 94 additions & 0 deletions nonebot_plugin_quote/__init__.py
Original file line number Diff line number Diff line change
Expand Up @@ -14,8 +14,10 @@
import sys
import os
from .task import offer, query, delete, handle_ocr_text, inverted2forward, findAlltag, addTag, delTag
from .task import copy_images_files
from .config import Config
from nonebot.log import logger
import time


plugin_config = Config.parse_obj(get_driver().config)
Expand Down Expand Up @@ -465,3 +467,95 @@ async def deltag_handle(bot: Bot, event: Event, state: T_State):

await deltag.finish()


# script_batch = on_command('{}脚本123'.format(plugin_config.quote_startcmd), **need_at)
script_batch = on_regex(pattern="^{}batch_upload".format(plugin_config.quote_startcmd), **need_at)

@script_batch.handle()
async def script_batch_handle(bot: Bot, event: Event, state: T_State):

global inverted_index
global record_dict
global forward_index

session_id = event.get_session_id()
user_id = str(event.get_user_id())

# 必须是超级管理员群聊
if user_id not in plugin_config.global_superuser:
await script_batch.finish()
if 'group' not in session_id:
await script_batch.finish('该功能暂不支持私聊')

groupNum = session_id.split('_')[1]

rqqid = r"qqgroup=(.*)\s"
ryour_path = r"your_path=(.*)\s"
rgocq_path = r"gocq_path=(.*)\s"
rtags = r"tags=(.*)"

raw_msg = str(event.get_message())
raw_msg = raw_msg.replace('\r','')
group_id = re.findall(rqqid, raw_msg)
your_path = re.findall(ryour_path, raw_msg)
gocq_path = re.findall(rgocq_path, raw_msg)
tags = re.findall(rtags, raw_msg)
instruction = '''指令如下:
batch_upload
qqgroup=123456
your_path=/home/xxx/images
gocq_path=/home/xxx/gocq/data/cache
tags=aaa bbb ccc'''
if len(group_id) == 0 or len(your_path) == 0 or len(gocq_path) == 0:
await script_batch.finish(instruction)
# 获取图片
image_files = copy_images_files(your_path[0], gocq_path[0])

total_len = len(image_files)
idx = 0

for (imgid, img) in image_files:
save_file = '../cache/' + img
idx += 1
msg_id = await bot.send_msg(group_id=int(groupNum), message='[CQ:image,file={}]'.format(save_file))
time.sleep(2)
if save_file in forward_index[group_id[0]]:
await bot.send_msg(group_id=int(groupNum), message='上述图片已存在')
continue
try:
ocr = await bot.ocr_image(image=imgid)
ocr_content = handle_ocr_text(ocr['texts'])
except exception.ActionFailed:
await bot.send_msg(group_id=int(groupNum), message='该图片ocr失败')
continue

time.sleep(1)
inverted_index, forward_index = offer(group_id[0], save_file, ocr_content, inverted_index, forward_index)
if group_id[0] not in record_dict:
record_dict[group_id[0]] = [save_file]
else:
if save_file not in record_dict[group_id[0]]:
record_dict[group_id[0]].append(save_file)

if len(tags) != 0:
tags = tags[0].strip().split(' ')
flag, forward_index, inverted_index = addTag(tags, imgid, group_id[0], forward_index, inverted_index)

# 每5张语录持久化一次
if idx % 5 == 0:
with open(plugin_config.record_path, 'w', encoding='UTF-8') as f:
json.dump(record_dict, f, indent=2, separators=(',', ': '), ensure_ascii=False)

with open(plugin_config.inverted_index_path, 'w', encoding='UTF-8') as fc:
json.dump(inverted_index, fc, indent=2, separators=(',',': '), ensure_ascii=False)

await bot.send_msg(group_id=int(groupNum), message='当前进度{}/{}'.format(idx, total_len))

with open(plugin_config.record_path, 'w', encoding='UTF-8') as f:
json.dump(record_dict, f, indent=2, separators=(',', ': '), ensure_ascii=False)

with open(plugin_config.inverted_index_path, 'w', encoding='UTF-8') as fc:
json.dump(inverted_index, fc, indent=2, separators=(',',': '), ensure_ascii=False)

await bot.send_msg(group_id=int(groupNum), message='批量导入完成')
await script_batch.finish()
28 changes: 27 additions & 1 deletion nonebot_plugin_quote/task.py
Original file line number Diff line number Diff line change
Expand Up @@ -2,6 +2,8 @@
import jieba
import os
import random
import hashlib
import shutil


# 向语录库添加新的图片
Expand Down Expand Up @@ -186,4 +188,28 @@ def delTag(tags, img_name, group_id, forward_index, inverted_index):
if len(inverted_index[group_id][tag]) == 0:
del inverted_index[group_id][tag]
return path, forward_index, inverted_index



IMAGE_EXTENSIONS = ['.jpg', '.jpeg', '.png', '.gif']
def copy_images_files(source, destinate):
image_files = []
for root,_,files in os.walk(source):
for filename in files:
extension = os.path.splitext(filename)[1].lower()
if extension in IMAGE_EXTENSIONS:
image_path = os.path.join(root, filename)
# 获得md5
md5 = get_img_md5(image_path) + '.image'
tname = md5 + extension
# 复制到目录
destination_path = os.path.join(destinate, tname)
shutil.copy(image_path, destination_path)
image_files.append((md5, tname))
return image_files


def get_img_md5(img_path):
with open(img_path, 'rb') as f:
img_data = f.read()
md5 = hashlib.md5(img_data).hexdigest()
return md5
2 changes: 1 addition & 1 deletion pyproject.toml
Original file line number Diff line number Diff line change
@@ -1,6 +1,6 @@
[tool.poetry]
name = "nonebot-plugin-quote"
version = "0.3.2"
version = "0.3.3"
description = "一款适用于QQ群聊天的语录库插件"
authors = ["RongRongJi <316315867@qq.com>"]
license = "MIT"
Expand Down

0 comments on commit 5e8c7d1

Please sign in to comment.