diff --git a/.env.example b/.env.example index 340da2f..367af2f 100644 --- a/.env.example +++ b/.env.example @@ -1,58 +1,81 @@ +# ========================================== +# PaperMind 环境配置示例 +# ========================================== +# +# 使用说明: +# - 本地开发:复制为 .env(根目录) +# - Docker部署:复制为 .env(根目录) +# +# 必须配置项:至少填写一个 LLM API Key +# ========================================== + +# ========================================== +# 基础配置 +# ========================================== APP_ENV=production APP_NAME=PaperMind API API_HOST=0.0.0.0 API_PORT=8000 +# 数据库路径 +# - 本地开发:sqlite:///./data/papermind.db +# - Docker部署:sqlite:////app/data/papermind.db DATABASE_URL=sqlite:///./data/papermind.db +# 存储路径 PDF_STORAGE_ROOT=./data/papers BRIEF_OUTPUT_ROOT=./data/briefs -SKIM_SCORE_THRESHOLD=0.65 -# 北京时间 05:00 执行每日任务(UTC 21:00) -DAILY_CRON=0 21 * * * -# 北京时间周日 06:00 执行每周图谱维护(UTC 22:00) -WEEKLY_CRON=0 22 * * 0 -# Docker 部署: 用 * 允许所有来源,或填写实际域名 + +# CORS 配置 +# - 本地开发:* +# - Docker部署:http://localhost:3002,http://127.0.0.1:3002 CORS_ALLOW_ORIGINS=* +# ========================================== +# LLM API 配置(必须填写至少一个!) +# ========================================== # LLM Provider: openai / anthropic / zhipu LLM_PROVIDER=zhipu -# OpenAI +# OpenAI(可选) OPENAI_API_KEY= -# Anthropic + +# Anthropic(可选) ANTHROPIC_API_KEY= -# ZhipuAI (智谱) + +# ZhipuAI 智谱(推荐,便宜) ZHIPU_API_KEY= +# 外部 API SEMANTIC_SCHOLAR_API_KEY= -# OpenAlex(高速率引用源,填邮箱即可获得 10 req/s 免费配额) OPENALEX_EMAIL= +# 模型配置 LLM_MODEL_SKIM=glm-4.7 LLM_MODEL_DEEP=glm-4.7 LLM_MODEL_VISION=glm-4.6v LLM_MODEL_FALLBACK=glm-4.7 EMBEDDING_MODEL=embedding-3 +# ========================================== +# 成本管控 +# ========================================== COST_GUARD_ENABLED=true PER_CALL_BUDGET_USD=0.05 DAILY_BUDGET_USD=2.0 +SKIM_SCORE_THRESHOLD=0.65 # ========================================== # 站点认证配置 # ========================================== - # 站点密码,为空则禁用认证(公开访问) -# 设置后将要求输入密码才能访问系统 AUTH_PASSWORD= -# JWT 密钥(用于签名 token) -# 生产环境请修改为随机字符串 +# JWT 密钥(生产环境请修改为随机字符串) AUTH_SECRET_KEY=papermind-secret-key-change-in-production # ========================================== -# SMTP 邮件配置 +# SMTP 邮件配置(推送通知需要) # ========================================== SMTP_HOST= SMTP_PORT=587 @@ -62,59 +85,35 @@ SMTP_FROM= NOTIFY_DEFAULT_TO= # ========================================== -# 智能调度配置 - UTC 时间(推荐) +# 智能调度配置(UTC 时间) # ========================================== - -# 每日简报时间(UTC 时间) -# 推荐:0 4 * * * (UTC 4 点 = 北京时间 12 点,午饭时间) -# 说明:预留 2 小时处理缓冲 -# 02:00 主题抓取 → 02:00-04:00 并行处理 → 04:00 生成简报 +# 每日简报:UTC 4 点 = 北京时间 12 点 DAILY_CRON=0 4 * * * -# 每周图谱维护(UTC 时间) -# 推荐:0 22 * * 0 (UTC 周日 22 点 = 北京时间周一 6 点) +# 每周图谱:UTC 周日 22 点 = 北京时间周一 6 点 WEEKLY_CRON=0 22 * * 0 # 主题抓取默认时间(UTC 小时,0-23) -# 推荐:2 (UTC 2 点 = 北京时间 10 点) DEFAULT_TOPIC_TIME_UTC=2 -# 精读配额配置(每个主题每次最多精读篇数) -# 推荐:20 篇抓取 → 精读 1-3 篇(节省 LLM 费用) +# 精读配额(每个主题每次最多精读篇数,省钱!) DEFAULT_MAX_DEEP_READS=2 # ========================================== # 闲时自动处理器配置 # ========================================== - -# 是否启用闲时自动处理(true/false) -# 功能:系统空闲时自动批量处理未读论文(只粗读 + 嵌入,不精读) +# 系统空闲时自动批量处理未读论文(只粗读 + 嵌入,不精读) IDLE_PROCESSOR_ENABLED=true - -# 闲时批次处理数量(每次处理几篇) -# 推荐:5-10 篇(避免影响正常用户请求) IDLE_BATCH_SIZE=5 - -# 空闲检测间隔(秒) -# 推荐:60 秒(每分钟检测一次) IDLE_CHECK_INTERVAL=60 -# 系统空闲阈值(用于闲时检测) -# CPU 使用率低于此值才认为是空闲 +# 空闲阈值 IDLE_CPU_THRESHOLD=30.0 - -# 内存使用率低于此值才认为是空闲 IDLE_MEMORY_THRESHOLD=70.0 - -# API 请求数低于此值才认为是空闲(请求数/分钟) IDLE_REQUEST_THRESHOLD=5 # ========================================== # Worker 重试配置 # ========================================== - -# 任务失败最大重试次数 WORKER_RETRY_MAX=3 - -# 重试基础延迟(秒),使用指数退避 WORKER_RETRY_BASE_DELAY=5.0 diff --git a/.gitignore b/.gitignore index ca706a6..dae2245 100644 --- a/.gitignore +++ b/.gitignore @@ -33,9 +33,13 @@ htmlcov/ .claude/ CLAUDE.md .spec-workflow/ +.cursor/ # Data (runtime generated) data/ +# Logs +logs/ +*.log # Node / Frontend node_modules/ @@ -46,14 +50,11 @@ frontend/dist/ package-lock.json frontend/package-lock.json -# Egg info -papermind.egg-info/ - # Tauri / Desktop build src-tauri/target/ src-tauri/binaries/ *.dmg *.app -# Docker deploy data (runtime) -deploy/ +# PyInstaller +*.spec diff --git a/.pre-commit-config.yaml b/.pre-commit-config.yaml new file mode 100644 index 0000000..12f0893 --- /dev/null +++ b/.pre-commit-config.yaml @@ -0,0 +1,34 @@ +# PaperMind Pre-commit Hooks +# 安装: pre-commit install +# 手动运行: pre-commit run --all-files + +repos: + # Python - Ruff (格式化 + Lint) + - repo: https://github.com/astral-sh/ruff-pre-commit + rev: v0.12.0 + hooks: + - id: ruff + args: [--fix, --exit-non-zero-on-fix] + - id: ruff-format + + # TypeScript/JavaScript - Prettier + - repo: https://github.com/pre-commit/mirrors-prettier + rev: v3.4.2 + hooks: + - id: prettier + files: \.(ts|tsx|js|jsx|json|css|md)$ + exclude: ^frontend/node_modules/ + + # 通用检查 + - repo: https://github.com/pre-commit/pre-commit-hooks + rev: v5.0.0 + hooks: + - id: trailing-whitespace + exclude: ^frontend/node_modules/ + - id: end-of-file-fixer + exclude: ^frontend/node_modules/ + - id: check-yaml + args: [--unsafe] + - id: check-json + - id: check-added-large-files + args: [--maxkb=500] diff --git a/CODE_FIXES_REPORT.md b/CODE_FIXES_REPORT.md deleted file mode 100644 index 2c9b51d..0000000 --- a/CODE_FIXES_REPORT.md +++ /dev/null @@ -1,192 +0,0 @@ -# PaperMind 代码修复报告 - -> 修复日期:2026-02-26 -> -> 修复目标:修复所有前后端 TypeScript/Python 类型错误,确保部署顺利 - ---- - -## ✅ 已完成的修复 - -### 前端修复(10 个文件) - -#### 1️⃣ **GlobalTaskContext.tsx** ✅ -- **问题**: `toast` 函数调用错误,将整个对象当作函数调用 -- **修复**: `const toast = useToast()` → `const { toast } = useToast()` -- **行数**: 51, 53 - -#### 2️⃣ **Operations.tsx** ✅ -- **问题**: `toast` 对象解构错误 -- **修复**: `const toast = useToast()` → `const { toast } = useToast()` -- **行数**: 30 - -#### 3️⃣ **Collect.tsx** ✅ -- **问题**: toast 调用使用了错误的对象格式 `{ type, message }` -- **修复**: 全部改为函数调用 `toast(type, message)` -- **影响行数**: 134, 135, 147, 157, 162, 174, 178, 181, 225, 228 - -#### 4️⃣ **useMessageHistory.ts** ✅ -- **问题**: `AgentMessage` 类型导入路径错误 -- **修复**: 从 `@/services/api` 改为 `@/types` - -#### 5️⃣ **Agent.tsx** ✅ (子代理修复) -- **问题**: - - `navigate` 未定义(346 行) - - `Markdown` 组件属性错误(973, 1158 行) - - `unknown` 类型条件判断(1077, 1088, 1089, 1101, 1146 行) -- **修复**: - - 添加 `const navigate = useNavigate();` - - `Markdown` 组件改用 `children` - - 使用 `!== undefined` 进行类型判断 - -#### 6️⃣ **EmailSettings.tsx** ✅ (子代理修复) -- **问题**: - - `Spinner` 组件 `size` 属性不存在(194, 450 行) - - `Modal` 组件 `isOpen` 属性应为 `open`(469 行) - - `Button` 组件 `variant` 属性值错误(228, 259, 443, 487, 623 行) -- **修复**: - - `size` → `className` - - `isOpen` → `open` - - `"outline"` → `"secondary"`, `"default"` → `"primary"`, `"destructive"` → `"danger"` - ---- - -### 后端修复(3 个文件) - -#### 1️⃣ **worker/main.py** ✅ -- **问题**: `_retry_with_backoff` 返回值可能为 `None`,直接调用 `.get()` 会报错 -- **修复**: 添加空值检查 -- **影响行数**: 118, 119, 143, 144 - -**修复前**: -```python -result.get("inserted", 0) -``` - -**修复后**: -```python -result.get("inserted", 0) if result else 0 -``` - -#### 2️⃣ **daily_runner.py** ✅ -- **问题**: `paper.id` 是 UUID 类型,但 `deep_dive()` 需要 `str` -- **修复**: 添加类型转换 `str(paper.id)` -- **影响行数**: 229 - -#### 3️⃣ **idle_processor.py** ✅ -- **问题**: `paper_id` 类型不匹配 -- **修复**: 添加类型转换 `str(paper_id)` -- **影响行数**: 219, 233 - ---- - -### 类型定义修复(1 个文件) - -#### **types/index.ts** ✅ -- **问题**: `CitationSyncResult` 接口缺少 `message` 属性 -- **修复**: 添加可选属性 `message?: string` -- **影响行数**: 590 - ---- - -## ⚠️ 剩余的类型警告(不影响部署) - -### 前端剩余错误(11 个) - -主要集中在以下文件: - -1. **Settings.tsx** (9 个错误) - - `Spinner` 组件 `size` 属性 - - `Modal` 组件 `isOpen` 属性 - - `Button` 组件 `variant` 属性 - - LLM 配置类型不匹配 - -2. **Writing.tsx** (1 个错误) - - 图标组件 `title` 属性 - -**注意**: 这些错误不影响 Docker 构建和运行,因为: -- Dockerfile 使用 `npm run build` 会跳过严格类型检查 -- Vite 构建只关注代码能否运行,不强制类型 100% 正确 - ---- - -## 🎯 验证结果 - -### 后端验证 ✅ - -```bash -python -c "from packages.config import get_settings; print('配置加载成功!')" -# ✅ 配置加载成功! - -python -m py_compile apps/worker/main.py -python -m py_compile packages/ai/daily_runner.py -python -m py_compile packages/ai/idle_processor.py -# ✅ 所有 Python 文件编译成功 -``` - -### 前端验证 ⚠️ - -```bash -cd frontend -npx tsc --noEmit -# ⚠️ 剩余 11 个类型错误(不影响构建) - -npm run build -# ✅ 构建成功(Vite 会忽略部分类型错误) -``` - ---- - -## 📊 修复统计 - -| 类别 | 修复数量 | 状态 | -|------|---------|------| -| **前端类型错误** | 18+ | ✅ 已修复主要错误 | -| **后端类型错误** | 6 | ✅ 全部修复 | -| **类型定义缺失** | 1 | ✅ 已添加 | -| **剩余警告** | 11 | ⚠️ 不影响部署 | - ---- - -## 🚀 部署建议 - -### 立即部署 ✅ - -所有**关键错误已修复**,可以安全部署: - -```bash -# 一键部署 -./scripts/docker_deploy.sh - -# 或手动部署 -docker compose build -docker compose up -d -``` - -### 后续优化(可选) - -以下修复可以后续进行,不影响当前部署: - -1. **Settings.tsx** - 修复剩余的组件属性错误 -2. **Writing.tsx** - 修复图标组件属性 -3. **全局类型检查** - 启用更严格的 TS 配置 - ---- - -## 📝 总结 - -### ✅ 修复成果 -- 前端 18+ 个类型错误 → 已修复主要错误 -- 后端 6 个类型错误 → 全部修复 -- Docker 部署障碍 → 全部清除 - -### ⚠️ 剩余工作 -- 11 个非关键类型警告(不影响运行) -- 建议后续进行代码质量优化 - -### 🎉 部署就绪 -**所有影响部署的关键错误已修复,可以安全部署到服务器!** - ---- - -**老白技术流,修复不 BB!大白,部署吧!** 🚀 diff --git a/DAILY_REPORT_FEATURE.md b/DAILY_REPORT_FEATURE.md deleted file mode 100644 index 2278dc9..0000000 --- a/DAILY_REPORT_FEATURE.md +++ /dev/null @@ -1,406 +0,0 @@ -# 每日自动精读与邮件报告功能 - -> 完成时间:2026-02-26 -> 作者:Color2333 - -## 🎯 功能概述 - -实现了完整的每日自动化工作流: -1. **每日搜集论文** → 自动精读 → 生成汇总报告 → 邮箱发送 - ---- - -## ✨ 核心功能 - -### 1. 邮箱配置管理 - -#### 功能特性 -- ✅ 支持多个邮箱配置 -- ✅ 常见邮箱服务商预设(Gmail、QQ、163、Outlook) -- ✅ 一键激活/切换邮箱 -- ✅ 发送测试邮件验证配置 -- ✅ TLS 加密支持 -- ✅ 应用专用密码支持 - -#### 数据模型 -```python -class EmailConfig(Base): - id: str # 唯一标识 - name: str # 配置名称(如"工作邮箱") - smtp_server: str # SMTP 服务器地址 - smtp_port: int # SMTP 端口(默认 587) - smtp_use_tls: bool # 是否使用 TLS - sender_email: str # 发件人邮箱 - sender_name: str # 发件人名称(默认 "PaperMind") - username: str # SMTP 用户名 - password: str # SMTP 密码(应用专用密码) - is_active: bool # 是否激活 -``` - -#### API 端点 -| 方法 | 路径 | 说明 | -|:----:|:-----|:-----| -| GET | `/settings/email-configs` | 获取所有邮箱配置 | -| POST | `/settings/email-configs` | 创建邮箱配置 | -| PATCH | `/settings/email-configs/{id}` | 更新邮箱配置 | -| DELETE | `/settings/email-configs/{id}` | 删除邮箱配置 | -| POST | `/settings/email-configs/{id}/activate` | 激活邮箱配置 | -| POST | `/settings/email-configs/{id}/test` | 发送测试邮件 | -| GET | `/settings/smtp-presets` | 获取 SMTP 预设 | - ---- - -### 2. 每日报告配置 - -#### 功能特性 -- ✅ 总开关控制(启用/禁用) -- ✅ 自动精读设置(开关 + 数量限制) -- ✅ 邮件发送设置(开关 + 收件人 + 发送时间) -- ✅ 报告内容设置(论文详情 + 图谱洞察) -- ✅ 手动触发工作流 - -#### 数据模型 -```python -class DailyReportConfig(Base): - enabled: bool # 总开关 - auto_deep_read: bool # 自动精读开关 - deep_read_limit: int # 每日精读数量限制(默认 10) - send_email_report: bool # 发送邮件报告开关 - recipient_emails: str # 收件人邮箱列表(逗号分隔) - report_time_utc: int # 发送时间(UTC 0-23) - include_paper_details: bool # 是否包含论文详情 - include_graph_insights: bool # 是否包含图谱洞察 -``` - -#### API 端点 -| 方法 | 路径 | 说明 | -|:----:|:-----|:-----| -| GET | `/settings/daily-report-config` | 获取每日报告配置 | -| PUT | `/settings/daily-report-config` | 更新每日报告配置 | -| POST | `/jobs/daily-report/run-once` | 手动触发工作流 | - ---- - -### 3. 自动精读服务 - -#### 工作流程 -``` -每日搜集论文 → 筛选未精读论文 → 限制数量 → 并行精读 → 生成简报 → 发送邮件 -``` - -#### 核心逻辑 -```python -class AutoReadService: - async def run_daily_workflow(progress_callback): - # 1. 自动精读新论文 - if config.auto_deep_read: - papers = get_recent_papers(limit=config.deep_read_limit) - for paper in papers: - await pipelines.run_deep_read(paper.id) - - # 2. 生成每日简报 - brief = await DailyBriefService().generate_daily_brief() - - # 3. 发送邮件报告 - if config.send_email_report: - email_service = EmailService(email_config) - email_service.send_daily_report( - to_emails=recipient_emails, - report_html=brief.html, - report_date=today - ) -``` - -#### 邮件模板 -```html - - - - - - -
-

📚 PaperMind 每日简报

-

{日期}

-
-
- - - - -
- - -``` - ---- - -## 📁 新增文件 - -### 后端 -- `packages/storage/models.py` - 添加 `EmailConfig` 和 `DailyReportConfig` 模型 -- `packages/storage/repositories.py` - 添加 `EmailConfigRepository` 和 `DailyReportConfigRepository` -- `packages/integrations/email_service.py` - 邮箱发送服务 -- `packages/ai/auto_read_service.py` - 自动精读调度服务 -- `apps/api/main.py` - 添加邮箱和每日报告 API 路由 -- `infra/migrations/versions/20260226_0007_email_and_auto_read.py` - 数据库迁移脚本 - -### 前端 -- `frontend/src/pages/EmailSettings.tsx` - 邮箱和每日报告设置页面 -- `frontend/src/App.tsx` - 添加 `/email-settings` 路由 - ---- - -## 🗄️ 数据库迁移 - -### 迁移版本 -`20260226_0007_email_and_auto_read` - -### 新增表 -#### `email_configs` -```sql -CREATE TABLE email_configs ( - id VARCHAR(36) PRIMARY KEY, - name VARCHAR(128) UNIQUE NOT NULL, - smtp_server VARCHAR(256) NOT NULL, - smtp_port INTEGER DEFAULT 587, - smtp_use_tls BOOLEAN DEFAULT TRUE, - sender_email VARCHAR(256) NOT NULL, - sender_name VARCHAR(128) DEFAULT 'PaperMind', - username VARCHAR(256) NOT NULL, - password VARCHAR(512) NOT NULL, - is_active BOOLEAN DEFAULT FALSE, - created_at DATETIME NOT NULL, - updated_at DATETIME NOT NULL -); -``` - -#### `daily_report_configs` -```sql -CREATE TABLE daily_report_configs ( - id VARCHAR(36) PRIMARY KEY, - enabled BOOLEAN DEFAULT FALSE, - auto_deep_read BOOLEAN DEFAULT TRUE, - deep_read_limit INTEGER DEFAULT 10, - send_email_report BOOLEAN DEFAULT TRUE, - recipient_emails VARCHAR(2048) DEFAULT '', - report_time_utc INTEGER DEFAULT 21, - include_paper_details BOOLEAN DEFAULT TRUE, - include_graph_insights BOOLEAN DEFAULT FALSE, - created_at DATETIME NOT NULL, - updated_at DATETIME NOT NULL -); -``` - ---- - -## 🚀 使用指南 - -### 1. 配置邮箱 - -#### 步骤 -1. 访问 **http://localhost:5173/email-settings** -2. 点击 **"添加邮箱"** -3. 选择快速配置(Gmail/QQ/163/Outlook)或手动配置 -4. 填写邮箱信息和应用专用密码 -5. 点击 **"发送测试"** 验证配置 -6. 测试成功后,点击 **"激活"** 设为默认邮箱 - -#### 应用专用密码获取 -- **Gmail**: Google Account → Security → 2-Step Verification → App passwords -- **QQ邮箱**: 设置 → 账户 → POP3/IMAP/SMTP/Exchange/CardDAV/CalDAV服务 → 生成授权码 -- **163邮箱**: 设置 → POP3/SMTP/IMAP → 开启服务 → 获取授权码 - -### 2. 配置每日报告 - -#### 步骤 -1. 在邮箱设置页面,找到 **"每日报告配置"** 卡片 -2. 点击 **"启用"** 开启总开关 -3. 配置自动精读: - - 勾选 **"自动精读新论文"** - - 设置 **"每日精读数量限制"**(建议 5-20 篇) -4. 配置邮件发送: - - 勾选 **"发送邮件报告"** - - 填写 **"收件人邮箱"**(多个邮箱用逗号分隔) - - 设置 **"发送时间(UTC)"**(北京时间 = UTC + 8) - - 例如:北京时间早上 8 点 = UTC 0 点 -5. 配置报告内容: - - 勾选 **"包含论文详情"** - - 勾选 **"包含图谱洞察"**(可选) - -### 3. 手动触发工作流 - -#### 步骤 -1. 在邮箱设置页面,点击 **"立即执行"** 按钮 -2. 确认执行 -3. 等待自动精读、生成简报、发送邮件完成 -4. 查看成功提示(精读了 X 篇论文) - ---- - -## 📊 工作流时序图 - -``` -时间轴(每日) - ↓ -[定时任务触发] - ↓ -[搜集新论文] ← 已有功能(TopicSubscription) - ↓ -[自动精读] ← 新功能(AutoReadService) - ├─ 筛选未精读论文 - ├─ 限制数量(deep_read_limit) - └─ 并行执行精读(Pipeline) - ↓ -[生成每日简报] ← 已有功能(DailyBriefService) - ├─ 新搜集的论文列表 - ├─ 自动精读的关键论文 - ├─ 研究趋势分析 - └─ 个性化推荐 - ↓ -[发送邮件报告] ← 新功能(EmailService) - ├─ HTML 格式邮件 - ├─ 响应式设计 - └─ 多收件人支持 - ↓ -[完成] -``` - ---- - -## ⚙️ 定时任务集成 - -### 现有定时任务(每日) -```python -# packages/ai/daily_runner.py - -async def run_daily_brief(): - """生成每日简报(已有)""" - ... - -# 新增:集成自动精读和邮件发送 -async def run_daily_workflow(): - """完整的每日工作流""" - # 1. 搜集新论文(已有) - await run_daily_ingest() - - # 2. 自动精读(新增) - await AutoReadService().run_daily_workflow() -``` - -### Cron 配置 -```python -# packages/ai/daily_runner.py - -# 每日简报生成(已有) -DAILY_CRON = "0 21 * * *" # UTC 21:00(北京时间早上 5:00) - -# 自动精读和邮件发送(新增) -# 在每日简报生成后立即执行 -``` - ---- - -## 🎨 前端界面 - -### 邮箱设置页面 -- **位置**: `/email-settings` -- **功能**: - - 邮箱配置列表 - - 添加/编辑/删除邮箱 - - 激活邮箱 - - 发送测试邮件 - - 每日报告配置 - - 手动触发工作流 - -### UI 特性 -- ✅ 响应式设计 -- ✅ 暗色模式支持 -- ✅ 实时状态反馈 -- ✅ 错误提示和确认对话框 -- ✅ 加载状态和进度显示 - ---- - -## 🔧 技术栈 - -### 后端 -- **FastAPI** - API 框架 -- **SQLAlchemy** - ORM -- **Alembic** - 数据库迁移 -- **smtplib** - 邮件发送(Python 标准库) -- **APScheduler** - 定时任务调度(已有) - -### 前端 -- **React 18** - UI 框架 -- **TypeScript** - 类型安全 -- **Lucide Icons** - 图标库 -- **Tailwind CSS** - 样式 - ---- - -## 🐛 常见问题 - -### Q1: 测试邮件发送失败? -**A**: 检查以下几点: -1. SMTP 服务器地址和端口是否正确 -2. 是否使用了应用专用密码(而非账户密码) -3. 是否开启了 SMTP 服务(QQ/163 需要手动开启) -4. 防火墙是否阻止了 SMTP 端口 - -### Q2: 没有收到每日报告邮件? -**A**: 检查以下几点: -1. 每日报告总开关是否启用 -2. 邮箱配置是否已激活 -3. 收件人邮箱是否正确填写 -4. 检查垃圾邮件文件夹 - -### Q3: 自动精读没有执行? -**A**: 检查以下几点: -1. 是否有新搜集的论文 -2. 自动精读开关是否启用 -3. 检查后端日志是否有错误 - -### Q4: 如何调整发送时间? -**A**: -1. 设置 **"报告时间(UTC)"** -2. 北京时间 = UTC + 8 -3. 例如: - - 北京时间早上 8 点 → UTC 0 点 - - 北京时间晚上 8 点 → UTC 12 点 - ---- - -## 📝 后续优化建议 - -1. **定时任务可视化** - - 添加 Cron 表达式可视化编辑器 - - 显示下次执行时间 - -2. **邮件模板定制** - - 支持自定义邮件模板 - - 支持多种报告格式(PDF、Markdown) - -3. **报告历史** - - 保存历史报告记录 - - 支持重新发送历史报告 - -4. **失败重试** - - 添加邮件发送失败重试机制 - - 失败通知和日志记录 - -5. **收件人分组** - - 支持创建收件人分组 - - 不同分组发送不同内容 - ---- - -## 📄 License - -MIT - ---- - -**Built with ❤️ by Color2333** diff --git a/Dockerfile b/Dockerfile deleted file mode 100644 index 90adc4c..0000000 --- a/Dockerfile +++ /dev/null @@ -1,50 +0,0 @@ -# ============================================================ -# PaperMind Docker - 单容器部署(Nginx + API + Worker) -# @author Bamzc -# ============================================================ - -# Stage 1: 前端构建 -FROM node:20-slim AS frontend -WORKDIR /build -COPY frontend/package.json frontend/package-lock.json ./ -RUN npm ci --no-audit --no-fund -COPY frontend/ ./ -RUN npm run build - -# Stage 2: Python 后端 + Nginx + Supervisor -FROM python:3.11-slim - -RUN apt-get update && apt-get install -y --no-install-recommends \ - nginx supervisor curl sqlite3 && \ - rm -rf /var/lib/apt/lists/* && \ - rm -f /etc/nginx/sites-enabled/default - -WORKDIR /app - -# 后端源码 + 依赖安装 -COPY pyproject.toml ./ -COPY packages/ packages/ -COPY apps/ apps/ -RUN pip install --no-cache-dir ".[llm,pdf]" && \ - pip install --no-cache-dir umap-learn - -# Alembic 数据库迁移 -COPY alembic.ini ./ -COPY infra/migrations/ infra/migrations/ - -# 前端构建产物 -COPY --from=frontend /build/dist /app/frontend/dist - -# 部署配置 -COPY infra/nginx.conf /etc/nginx/conf.d/papermind.conf -COPY infra/supervisord.conf /etc/supervisor/conf.d/papermind.conf - -# 数据目录(运行时由 volume 覆盖) -RUN mkdir -p /app/data/papers /app/data/briefs - -EXPOSE 80 - -HEALTHCHECK --interval=30s --timeout=5s --start-period=15s --retries=3 \ - CMD curl -sf http://localhost:8000/health || exit 1 - -CMD ["supervisord", "-n", "-c", "/etc/supervisor/conf.d/papermind.conf"] diff --git a/IMPROVEMENTS.md b/IMPROVEMENTS.md deleted file mode 100644 index c2fb853..0000000 --- a/IMPROVEMENTS.md +++ /dev/null @@ -1,424 +0,0 @@ -# PaperMind 性能优化与代码质量改进 - -> 完成时间:2026-02-26 -> 作者:Color2333 - -## 概述 - -本次重构和优化针对 PaperMind 项目进行了全面的代码质量提升和性能优化,涵盖后端数据库、前端架构、工具类等多个方面。 - ---- - -## 🔴 高优先级改进 - -### 1. 数据库索引优化 - -#### 问题描述 -- `Paper` 模型缺少关键索引 -- 常用查询字段(`read_status`, `created_at`, `favorited`)没有索引 -- 复合查询场景性能差 - -#### 解决方案 -**文件**: `packages/storage/models.py` - -添加了以下索引: -```python -class Paper(Base): - # ... 其他字段 ... - - read_status: Mapped[ReadStatus] = mapped_column( - Enum(ReadStatus, name="read_status"), - nullable=False, - default=ReadStatus.unread, - index=True, # ✅ 新增索引 - ) - - favorited: Mapped[bool] = mapped_column( - nullable=False, default=False, - index=True, # ✅ 新增索引 - ) - - created_at: Mapped[datetime] = mapped_column( - DateTime, default=_utcnow, nullable=False, index=True # ✅ 新增索引 - ) - - __table_args__ = ( - # ✅ 新增复合索引 - Index('ix_papers_read_status_created_at', 'read_status', 'created_at'), - ) -``` - -#### 预期效果 -- 按阅读状态查询:**10-100x 性能提升** -- 按时间排序:**5-50x 性能提升** -- 复合查询(按状态+时间):**20-200x 性能提升** - ---- - -### 2. 修复 N+1 查询问题 - -#### 问题描述 -`CitationRepository.list_all()` 方法无限制返回所有引用关系,在大数据量下会导致内存溢出和性能问题。 - -#### 解决方案 -**文件**: `packages/storage/repositories.py` - -```python -def list_all(self, limit: int = 10000) -> list[Citation]: - """ - 查询所有引用关系(带分页限制) - - Args: - limit: 最大返回数量,默认 10000 - """ - q = select(Citation).order_by(Citation.source_paper_id).limit(limit) - return list(self.session.execute(q).scalars()) -``` - -#### 影响 -- **内存占用减少 90%+**(大数据量场景) -- **查询时间稳定**,不会随数据量线性增长 - ---- - -### 3. 创建基础查询类(减少重复代码) - -#### 问题描述 -Repository 层存在大量重复的查询模式,违反 DRY 原则。 - -#### 解决方案 -**文件**: `packages/storage/repositories.py` - -创建了 `BaseQuery` 基础类: -```python -class BaseQuery: - """基础查询类 - 提供通用的查询方法减少重复代码""" - - def __init__(self, session: Session): - self.session = session - - def _paginate(self, query: Select, page: int, page_size: int) -> Select: - """添加分页到查询""" - offset = (max(1, page) - 1) * page_size - return query.offset(offset).limit(page_size) - - def _execute_paginated( - self, query: Select, page: int = 1, page_size: int = 20 - ) -> tuple[list, int]: - """执行分页查询,返回 (结果列表, 总数)""" - count_query = select(func.count()).select_from(query.alias()) - total = self.session.execute(count_query).scalar() or 0 - - paginated_query = self._paginate(query, page, page_size) - results = list(self.session.execute(paginated_query).scalars()) - - return results, total -``` - -#### 效果 -- **代码重复减少 60%+** -- 分页逻辑统一,维护更容易 - ---- - -## 🟡 中优先级改进 - -### 4. 前端架构优化(提取 Hooks) - -#### 问题描述 -`AgentSessionContext.tsx` 文件过大(586 行),职责不清,难以维护。 - -#### 解决方案 -创建了 4 个专用 Hooks: - -#### 4.1 `useSSEStream` - SSE 流处理 -**文件**: `frontend/src/hooks/useSSEStream.ts` - -```typescript -/** - * SSE 流处理 Hook - 提取流式处理的公共逻辑 - */ -export function useStreamBuffer() { - // 流缓冲管理 -} - -export function useSSEStream(options: UseSSEStreamOptions) { - // SSE 流解析和处理 -} -``` - -#### 4.2 `useMessageHistory` - 消息历史构建 -**文件**: `frontend/src/hooks/useMessageHistory.ts` - -```typescript -/** - * 消息历史构建 Hook - 提取消息构建逻辑 - */ -export function useMessageHistory() { - const buildMessageHistory = useCallback((items: ChatItem[]): AgentMessage[] => { - // 消息转换逻辑 - }, []); - - return { buildMessageHistory }; -} -``` - -#### 4.3 `useCanvasState` - Canvas 状态管理 -**文件**: `frontend/src/hooks/useCanvasState.ts` - -```typescript -/** - * Canvas 状态管理 Hook - */ -export function useCanvasState() { - // Canvas 更新、清空、显示 Markdown/HTML -} -``` - -#### 4.4 `useAgentActions` - 工具操作管理 -**文件**: `frontend/src/hooks/useAgentActions.ts` - -```typescript -/** - * Agent 工具操作管理 Hook - */ -export function useAgentActions(/* ... */) { - // 工具确认、拒绝、状态管理 -} -``` - -#### 效果 -- **代码可读性提升 80%+** -- **单元测试更容易编写** -- **职责清晰,维护更简单** - ---- - -### 5. 前端输入验证工具 - -#### 问题描述 -前端缺少统一的输入验证,用户体验不佳。 - -#### 解决方案 -**文件**: `frontend/src/lib/validation.ts` - -提供了完整的验证工具: -```typescript -// ArXiv ID 验证 -validateArxivId(arxivId: string): ValidationResult - -// 主题名称验证 -validateTopicName(name: string): ValidationResult - -// 搜索查询验证 -validateSearchQuery(query: string): ValidationResult - -// API Key 验证 -validateApiKey(apiKey: string): ValidationResult - -// 邮箱验证 -validateEmail(email: string): ValidationResult - -// URL 验证 -validateUrl(url: string): ValidationResult - -// 数字范围验证 -validateNumberRange(value: number, min: number, max: number): ValidationResult - -// 字符串长度验证 -validateStringLength(value: string, minLength: number, maxLength: number): ValidationResult -``` - -#### 效果 -- **用户体验提升**(即时反馈错误) -- **减少无效请求**(降低服务器压力) -- **统一错误提示**(专业一致性) - ---- - -### 6. 统一错误处理工具 - -#### 问题描述 -错误处理分散,错误信息对用户不友好。 - -#### 解决方案 -**文件**: `frontend/src/lib/errorHandler.ts` - -提供了完整的错误处理工具: -```typescript -// 错误类型枚举 -enum ErrorType { - NETWORK = "network", - VALIDATION = "validation", - AUTH = "auth", - NOT_FOUND = "not_found", - SERVER = "server", - UNKNOWN = "unknown", -} - -// 核心函数 -parseErrorType(error: unknown): ErrorType -getErrorMessage(error: unknown): string -handleError(error: unknown): HandledError -createErrorHandler(onError?: Function): Function -safeAsync(fn: () => Promise): Promise -shouldRetry(error: unknown): boolean -retryAsync(fn: () => Promise, maxRetries?: number): Promise -``` - -#### 效果 -- **错误信息用户友好**(技术细节隐藏) -- **自动重试机制**(网络故障恢复) -- **统一的错误日志**(便于调试) - ---- - -## 🟢 低优先级改进 - -### 7. 性能监控工具 - -#### 解决方案 -**文件**: `packages/ai/performance.py` - -提供了完整的性能监控工具: -```python -# 性能监控器 -class PerformanceMonitor: - def record(self, name, duration_ms, success, error, metadata) - def get_metrics(self, name=None) - def get_average_duration(self, name) - def get_slowest(self, name, limit=10) - def print_summary(self) - -# 装饰器 -@track_performance(name="database_query") -def query_users(): - ... - -@log_slow_queries(threshold_ms=500) -def expensive_operation(): - ... - -# 上下文管理器 -with performance_context("data_processing"): - process_data() -``` - -#### 效果 -- **性能瓶颈可视化** -- **慢查询自动告警** -- **性能回归检测** - ---- - -### 8. 数据库迁移脚本 - -#### 解决方案 -**文件**: `infra/migrations/versions/20260226_0006_add_performance_indexes.py` - -创建了 Alembic 迁移脚本来自动应用索引: -```bash -# 应用迁移 -alembic upgrade head - -# 回滚迁移 -alembic downgrade -1 -``` - -#### 效果 -- **自动化索引部署** -- **版本可追溯** -- **安全回滚机制** - ---- - -## 总结 - -### 代码质量提升 -- ✅ **数据库索引优化** - 查询性能提升 10-200x -- ✅ **N+1 查询修复** - 内存占用减少 90%+ -- ✅ **重复代码减少** - BaseQuery 类减少 60%+ 重复 -- ✅ **前端架构优化** - 4 个专用 Hooks 提升可维护性 -- ✅ **输入验证工具** - 统一验证提升 UX -- ✅ **错误处理工具** - 用户友好的错误提示 -- ✅ **性能监控工具** - 可视化性能瓶颈 - -### 架构改进 -- ✅ **职责清晰** - 前端 Hooks 职责单一 -- ✅ **依赖解耦** - 工具类独立可测试 -- ✅ **可扩展性** - 易于添加新功能 - -### 预期效果 -- **查询性能**:10-200x 提升(索引优化) -- **内存占用**:90%+ 减少(N+1 查询修复) -- **代码可维护性**:80%+ 提升(架构优化) -- **用户体验**:显著提升(验证 + 错误处理) - ---- - -## 使用建议 - -### 1. 应用数据库迁移 -```bash -cd /path/to/PaperMind -python -m alembic upgrade head -``` - -### 2. 使用新的 Hooks -```typescript -// 在组件中使用 -import { useSSEStream } from "@/hooks/useSSEStream"; -import { useMessageHistory } from "@/hooks/useMessageHistory"; -import { useCanvasState } from "@/hooks/useCanvasState"; -import { useAgentActions } from "@/hooks/useAgentActions"; -``` - -### 3. 使用验证工具 -```typescript -import { validateArxivId, validateTopicName } from "@/lib/validation"; - -const result = validateArxivId("2301.12345"); -if (!result.valid) { - console.error(result.error); -} -``` - -### 4. 使用错误处理 -```typescript -import { safeAsync, createErrorHandler } from "@/lib/errorHandler"; - -const errorHandler = createErrorHandler((error) => { - toast.error(error.message); -}); - -const result = await safeAsync( - () => api.call(), - errorHandler -); -``` - -### 5. 使用性能监控 -```python -from packages.ai.performance import track_performance, log_slow_queries - -@track_performance("database_query") -def query_users(): - ... - -@log_slow_queries(threshold_ms=500) -def expensive_operation(): - ... -``` - ---- - -## 下一步建议 - -1. **运行性能测试** - 验证索引效果 -2. **添加单元测试** - 覆盖新的工具类 -3. **监控生产环境** - 使用 PerformanceMonitor -4. **持续优化** - 根据实际使用情况调整 - ---- - -**Built with ❤️ by Color2333** diff --git a/README.md b/README.md index c3ac43d..dbb5d0d 100644 --- a/README.md +++ b/README.md @@ -42,12 +42,11 @@ git clone https://github.com/Color2333/PaperMind.git && cd PaperMind # 2️⃣ 配置环境变量 -cp deploy/.env.example deploy/.env -vim deploy/.env # 编辑配置,至少填写 LLM API Key 和 SMTP +cp .env.example .env +vim .env # 编辑配置,至少填写 LLM API Key # 3️⃣ 一键部署 -chmod +x scripts/docker_deploy.sh -./scripts/docker_deploy.sh +docker compose up -d --build # 4️⃣ 访问服务 # 🌐 前端:http://localhost:3002 @@ -61,15 +60,21 @@ chmod +x scripts/docker_deploy.sh # 1️⃣ 克隆项目 git clone https://github.com/Color2333/PaperMind.git && cd PaperMind -# 2️⃣ 后端 +# 2️⃣ 一键初始化(推荐) +python scripts/dev_setup.py +# 脚本会自动:检查Python版本 → 创建虚拟环境 → 安装依赖 → 复制配置 → 初始化数据库 + +# 或手动初始化: python -m venv .venv && source .venv/bin/activate pip install -e ".[llm,pdf]" cp .env.example .env vim .env # 编辑 .env 填入 LLM API Key -python scripts/local_bootstrap.py # 初始化数据库 +python scripts/local_bootstrap.py + +# 3️⃣ 启动后端 uvicorn apps.api.main:app --reload --port 8000 -# 3️⃣ 前端 +# 4️⃣ 启动前端 cd frontend && npm install && npm run dev # 🌐 打开 http://localhost:5173 ``` diff --git a/apps/api/deps.py b/apps/api/deps.py index 3d481db..b801c3a 100644 --- a/apps/api/deps.py +++ b/apps/api/deps.py @@ -26,21 +26,38 @@ class TTLCache: - """简单的 TTL 内存缓存,避免引入 cachetools 依赖""" + """TTL 内存缓存,带最大容量限制,线程安全""" - def __init__(self): + def __init__(self, max_size: int = 1024): self._store: dict[str, tuple[float, Any]] = {} self._lock = threading.Lock() + self._max_size = max_size + + def _evict_expired(self) -> None: + """""" + now = time.time() + expired = [k for k, (exp, _) in self._store.items() if now >= exp] + for k in expired: + del self._store[k] def get(self, key: str) -> Any: with self._lock: entry = self._store.get(key) if entry and time.time() < entry[0]: return entry[1] + # 过期则删除 + if entry: + del self._store[key] return None def set(self, key: str, value: Any, ttl: float): with self._lock: + # 容量达上限时清理过期项,仍不够则删最旧的 + if len(self._store) >= self._max_size and key not in self._store: + self._evict_expired() + if len(self._store) >= self._max_size: + oldest_key = min(self._store, key=lambda k: self._store[k][0]) + del self._store[oldest_key] self._store[key] = (time.time() + ttl, value) def invalidate(self, key: str): @@ -53,7 +70,6 @@ def invalidate_prefix(self, prefix: str): for k in keys: del self._store[k] - cache = TTLCache() diff --git a/apps/api/main.py b/apps/api/main.py index bd64337..f6ee0a8 100644 --- a/apps/api/main.py +++ b/apps/api/main.py @@ -13,9 +13,9 @@ from starlette.middleware.base import BaseHTTPMiddleware from starlette.middleware.gzip import GZipMiddleware +from packages.auth import decode_access_token from packages.config import get_settings from packages.domain.exceptions import AppError -from packages.auth import decode_access_token from packages.logging_setup import setup_logging setup_logging() @@ -63,6 +63,10 @@ async def dispatch(self, request: Request, call_next): if not settings.auth_password: return await call_next(request) + # OPTIONS preflight 请求放行(CORS 需要) + if request.method == "OPTIONS": + return await call_next(request) + # 白名单路径跳过认证 if request.url.path in self.WHITELIST: return await call_next(request) @@ -97,14 +101,23 @@ async def dispatch(self, request: Request, call_next): request.state.user = payload return await call_next(request) - +# ---------- 启动时检查认证配置 ---------- settings = get_settings() + +if settings.auth_password and not settings.auth_secret_key: + raise RuntimeError( + "安全错误: 启用了 AUTH_PASSWORD 但未配置 AUTH_SECRET_KEY。" + "请在 .env 中设置一个强随机密钥,例如: AUTH_SECRET_KEY=$(openssl rand -hex 32)" + ) + app = FastAPI(title=settings.app_name) + +# 中间件注册顺序:Starlette 中间件为倒序执行(最后注册的最先执行) +# 执行顺序: CORS -> GZip -> Auth -> RequestLog -> 路由处理 app.add_middleware(RequestLogMiddleware) app.add_middleware(AuthMiddleware) -app.add_middleware(GZipMiddleware, minimum_size=1000) - +app.add_middleware(GZipMiddleware, minimum_size=1000) # Starlette 内置跳过 text/event-stream @app.exception_handler(AppError) async def app_error_handler(_request: Request, exc: AppError): diff --git a/apps/api/routers/agent.py b/apps/api/routers/agent.py index afd40de..c4eb119 100644 --- a/apps/api/routers/agent.py +++ b/apps/api/routers/agent.py @@ -24,6 +24,9 @@ async def agent_chat(req: AgentChatRequest): from packages.storage.db import session_scope from packages.storage.repositories import AgentConversationRepository, AgentMessageRepository + # 追踪已保存的用户消息内容,避免重复保存 + saved_user_contents: set[str] = set() + # 如果有 conversation_id,保存到该会话;否则创建新会话 conversation_id = getattr(req, "conversation_id", None) with session_scope() as session: @@ -31,28 +34,34 @@ async def agent_chat(req: AgentChatRequest): msg_repo = AgentMessageRepository(session) if conversation_id: - # 保存到现有会话 conv = conv_repo.get_by_id(conversation_id) if not conv: - conversation_id = None # 会话不存在,创建新的 + conversation_id = None if not conversation_id: - # 创建新会话 - # 用第一条用户消息作为标题 first_user_msg = next((m for m in req.messages if m.role == "user"), None) title = first_user_msg.content[:50] if first_user_msg else "新对话" conv = conv_repo.create(title=title) conversation_id = conv.id - # 保存用户消息 - for msg in req.messages: + # 只保存最新一条用户消息(避免重复) + # 找到最后一条用户消息 + latest_user_msg = None + for msg in reversed(req.messages): if msg.role == "user": + latest_user_msg = msg + break + + if latest_user_msg: + # 用内容的 hash 作为去重 key + content_key = latest_user_msg.content[:200] + if content_key not in saved_user_contents: msg_repo.create( conversation_id=conversation_id, - role=msg.role, - content=msg.content, + role=latest_user_msg.role, + content=latest_user_msg.content, ) - + saved_user_contents.add(content_key) # 流式响应 msgs = [m.model_dump() for m in req.messages] @@ -68,28 +77,59 @@ def _save_assistant_response(content: str, tool_calls: list | None = None): meta=meta, ) - # 从 stream_chat 中提取工具调用信息并保存 - full_response = "" - tool_calls = [] + # SSE 解析:提取文本和工具调用 + import json + import re + + # 用于累积助手响应 + text_content = "" + tool_calls_records: list[dict] = [] + + # SSE 格式: "event: xxx\ndata: {...}\n\n" + _sse_pattern = re.compile(r"^event:\s*(\S+)\ndata:\s*(.+?)\n\n", re.DOTALL) + + def _parse_sse_chunk(chunk: str) -> tuple[str | None, dict | None]: + """解析 SSE chunk,返回 (event_type, data)""" + match = _sse_pattern.match(chunk) + if match: + event_type = match.group(1) + try: + data = json.loads(match.group(2)) + return event_type, data + except json.JSONDecodeError: + pass + return None, None def stream_with_save(): - nonlocal full_response, tool_calls + nonlocal text_content, tool_calls_records for chunk in stream_chat(msgs, confirmed_action_id=req.confirmed_action_id): - full_response += chunk - # 尝试解析工具调用 - if chunk.startswith('{"tool_'): - try: - import json - - tool_info = json.loads(chunk) - if "tool_name" in tool_info: - tool_calls.append(tool_info) - except Exception: - pass + # 解析 SSE 事件 + event_type, data = _parse_sse_chunk(chunk) + if event_type and data: + if event_type == "text_delta": + # 累积文本内容 + text_content += data.get("content", "") + elif event_type == "tool_result": + # 记录工具调用结果 + tool_calls_records.append({ + "name": data.get("name"), + "success": data.get("success"), + "summary": data.get("summary"), + "data": data.get("data"), + }) + elif event_type == "action_result": + # 记录用户确认的操作结果 + tool_calls_records.append({ + "action_id": data.get("id"), + "success": data.get("success"), + "summary": data.get("summary"), + "data": data.get("data"), + }) yield chunk - # 流结束后保存完整响应 - if full_response: - _save_assistant_response(full_response, tool_calls if tool_calls else None) + + # 流结束后保存助手响应 + if text_content or tool_calls_records: + _save_assistant_response(text_content, tool_calls_records if tool_calls_records else None) return StreamingResponse( stream_with_save(), diff --git a/apps/api/routers/topics.py b/apps/api/routers/topics.py index ac31aed..383fd87 100644 --- a/apps/api/routers/topics.py +++ b/apps/api/routers/topics.py @@ -79,7 +79,7 @@ def upsert_topic(req: TopicCreate) -> dict: schedule_frequency=req.schedule_frequency, schedule_time_utc=req.schedule_time_utc, enable_date_filter=req.enable_date_filter, - + date_filter_days=req.date_filter_days, ) return _topic_dict(topic, session) @@ -108,7 +108,7 @@ def update_topic(topic_id: str, req: TopicUpdate) -> dict: schedule_frequency=req.schedule_frequency, schedule_time_utc=req.schedule_time_utc, enable_date_filter=req.enable_date_filter, - + date_filter_days=req.date_filter_days, ) except ValueError as exc: raise NotFoundError(str(exc)) from exc diff --git a/apps/desktop/server.py b/apps/desktop/server.py index 4fb02c8..a9e73df 100644 --- a/apps/desktop/server.py +++ b/apps/desktop/server.py @@ -1,7 +1,7 @@ """ PaperMind Desktop Server — PyInstaller 入口 Tauri sidecar 调用此二进制,自动选端口 + 内嵌 scheduler。 -@author Bamzc +@author Color2333 """ from __future__ import annotations diff --git a/apps/worker/main.py b/apps/worker/main.py index 28e88b0..4ec40e9 100644 --- a/apps/worker/main.py +++ b/apps/worker/main.py @@ -1,6 +1,6 @@ """ PaperMind Worker - 智能定时任务调度(UTC 时间 + 闲时处理) -@author Bamzc +@author Color2333 @author Color2333 """ diff --git a/docker-compose.yml b/docker-compose.yml index 0ec27a1..a7d659c 100644 --- a/docker-compose.yml +++ b/docker-compose.yml @@ -1,13 +1,12 @@ -# PaperMind Docker Compose - 多容器分离部署 + 端口预留 -# @author Bamzc +# PaperMind Docker Compose - 分离部署配置 # @author Color2333 # # 端口规划: -# - 现有项目:3001(前端) + 8001(后端) -# - PaperMind: 3002(前端) + 8002(后端) +# - 前端:3002 +# - 后端:8002 # # 使用方法: -# 1. cp .env.example deploy/.env && 编辑 deploy/.env 填入 API Key 和 SMTP +# 1. cp .env.example .env && 编辑 .env 填入 API Key # 2. docker compose up -d --build # 3. 访问 http://localhost:3002 @@ -27,7 +26,7 @@ services: - pm_logs:/app/logs - pm_pip_cache:/.pip-cache env_file: - - ./deploy/.env + - ./.env environment: - APP_ENV=production - API_HOST=0.0.0.0 @@ -64,7 +63,7 @@ services: - pm_logs:/app/logs - pm_pip_cache:/.pip-cache env_file: - - ./deploy/.env + - ./.env environment: - APP_ENV=production - SITE_URL=https://pm.vibingu.cn diff --git a/PaperMind.md b/docs/PaperMind.md similarity index 100% rename from PaperMind.md rename to docs/PaperMind.md diff --git a/TECH_DETAILS.md b/docs/TECH_DETAILS.md similarity index 100% rename from TECH_DETAILS.md rename to docs/TECH_DETAILS.md diff --git a/DOCKER_DEPLOYMENT.md b/docs/deployment/DOCKER_DEPLOYMENT.md similarity index 100% rename from DOCKER_DEPLOYMENT.md rename to docs/deployment/DOCKER_DEPLOYMENT.md diff --git a/DOCKER_FIXES.md b/docs/deployment/DOCKER_FIXES.md similarity index 100% rename from DOCKER_FIXES.md rename to docs/deployment/DOCKER_FIXES.md diff --git a/WIKI_DEPLOYMENT.md b/docs/deployment/WIKI_DEPLOYMENT.md similarity index 100% rename from WIKI_DEPLOYMENT.md rename to docs/deployment/WIKI_DEPLOYMENT.md diff --git a/frontend/.prettierignore b/frontend/.prettierignore new file mode 100644 index 0000000..ee55121 --- /dev/null +++ b/frontend/.prettierignore @@ -0,0 +1,5 @@ +node_modules +dist +build +*.min.js +coverage diff --git a/frontend/.prettierrc b/frontend/.prettierrc new file mode 100644 index 0000000..a79b3d6 --- /dev/null +++ b/frontend/.prettierrc @@ -0,0 +1,11 @@ +{ + "semi": true, + "singleQuote": false, + "tabWidth": 2, + "trailingComma": "es5", + "printWidth": 100, + "bracketSpacing": true, + "arrowParens": "always", + "endOfLine": "lf", + "plugins": ["prettier-plugin-tailwindcss"] +} diff --git a/frontend/eslint.config.js b/frontend/eslint.config.js new file mode 100644 index 0000000..3fa0c80 --- /dev/null +++ b/frontend/eslint.config.js @@ -0,0 +1,28 @@ +import js from "@eslint/js"; +import tseslint from "typescript-eslint"; +import reactHooks from "eslint-plugin-react-hooks"; +import reactRefresh from "eslint-plugin-react-refresh"; + +export default tseslint.config( + { ignores: ["dist", "node_modules"] }, + { + extends: [js.configs.recommended, ...tseslint.configs.recommended], + files: ["**/*.{ts,tsx}"], + languageOptions: { + parserOptions: { + ecmaVersion: 2022, + sourceType: "module", + }, + }, + plugins: { + "react-hooks": reactHooks, + "react-refresh": reactRefresh, + }, + rules: { + ...reactHooks.configs.recommended.rules, + "react-refresh/only-export-components": ["warn", { allowConstantExport: true }], + "@typescript-eslint/no-unused-vars": ["warn", { argsIgnorePattern: "^_" }], + "@typescript-eslint/no-explicit-any": "warn", + }, + }, +); diff --git a/frontend/package.json b/frontend/package.json index 087bf03..bc64529 100644 --- a/frontend/package.json +++ b/frontend/package.json @@ -6,7 +6,11 @@ "scripts": { "dev": "vite", "build": "vite build", - "preview": "vite preview" + "preview": "vite preview", + "format": "prettier --write \"src/**/*.{ts,tsx,js,jsx,json,css,md}\"", + "format:check": "prettier --check \"src/**/*.{ts,tsx,js,jsx,json,css,md}\"", + "lint": "eslint \"src/**/*.{ts,tsx}\"", + "lint:fix": "eslint \"src/**/*.{ts,tsx}\" --fix" }, "dependencies": { "@tauri-apps/api": "^2.10.1", @@ -29,12 +33,19 @@ "remark-math": "^6.0.0" }, "devDependencies": { + "@eslint/js": "^9.18.0", "@tailwindcss/vite": "^4.1.18", - "@types/react": "^19.2.14", - "@types/react-dom": "^19.2.3", + "@types/react": "^18.3.18", + "@types/react-dom": "^18.3.5", "@vitejs/plugin-react": "^4.4.1", + "eslint": "^9.18.0", + "eslint-plugin-react-hooks": "^5.1.0", + "eslint-plugin-react-refresh": "^0.4.18", + "prettier": "^3.4.2", + "prettier-plugin-tailwindcss": "^0.6.11", "tailwindcss": "^4.1.18", "typescript": "^5.9.3", + "typescript-eslint": "^8.20.0", "vite": "^5.4.11", "vite-plugin-svgr": "^4.5.0" } diff --git a/frontend/src/App.tsx b/frontend/src/App.tsx index 7c7d767..3f74040 100644 --- a/frontend/src/App.tsx +++ b/frontend/src/App.tsx @@ -1,6 +1,6 @@ /** * PaperMind - 主应用路由(懒加载) - * @author Bamzc + * @author Color2333 */ import { lazy, Suspense, useEffect } from "react"; import { BrowserRouter, Routes, Route, Navigate } from "react-router-dom"; diff --git a/frontend/src/DesktopBootstrap.tsx b/frontend/src/DesktopBootstrap.tsx index a645bb7..57fcf5f 100644 --- a/frontend/src/DesktopBootstrap.tsx +++ b/frontend/src/DesktopBootstrap.tsx @@ -1,7 +1,7 @@ /** * PaperMind 启动引导 * Web 模式直接渲染 App;Tauri 模式先检测配置、等待后端就绪。 - * @author Bamzc + * @author Color2333 */ import { lazy, Suspense, useEffect, useState } from "react"; import { Loader2, Sparkles } from "lucide-react"; diff --git a/frontend/src/components/ConfirmDialog.tsx b/frontend/src/components/ConfirmDialog.tsx index 934668f..13875da 100644 --- a/frontend/src/components/ConfirmDialog.tsx +++ b/frontend/src/components/ConfirmDialog.tsx @@ -1,6 +1,6 @@ /** * 通用确认弹窗组件 - * @author Bamzc + * @author Color2333 */ import { useCallback, useEffect, useRef, useState } from "react"; import { AlertTriangle, Loader2 } from "lucide-react"; diff --git a/frontend/src/components/ErrorBoundary.tsx b/frontend/src/components/ErrorBoundary.tsx index 4a35b80..bf959c3 100644 --- a/frontend/src/components/ErrorBoundary.tsx +++ b/frontend/src/components/ErrorBoundary.tsx @@ -1,6 +1,6 @@ /** * 全局错误边界 - 防止子组件崩溃导致白屏 - * @author Bamzc + * @author Color2333 */ import { Component, type ReactNode } from "react"; import { AlertTriangle, RotateCcw } from "lucide-react"; diff --git a/frontend/src/components/GlobalTaskBar.tsx b/frontend/src/components/GlobalTaskBar.tsx index 7872dd4..30a3a9d 100644 --- a/frontend/src/components/GlobalTaskBar.tsx +++ b/frontend/src/components/GlobalTaskBar.tsx @@ -1,6 +1,6 @@ /** * 全局任务进度条 — 固定在页面底部 - * @author Bamzc + * @author Color2333 */ import { useGlobalTasks, type ActiveTask } from "@/contexts/GlobalTaskContext"; import { Loader2, CheckCircle2, XCircle, ChevronUp, ChevronDown } from "lucide-react"; diff --git a/frontend/src/components/ImageUploader.tsx b/frontend/src/components/ImageUploader.tsx index 74b4ef5..6cd08c0 100644 --- a/frontend/src/components/ImageUploader.tsx +++ b/frontend/src/components/ImageUploader.tsx @@ -1,6 +1,6 @@ /** * ImageUploader - 图片上传组件(拖拽 + 粘贴 + 点击选文件) - * @author Bamzc + * @author Color2333 */ import { useState, useRef, useEffect, useCallback } from "react"; import { ImagePlus, X, Upload } from "lucide-react"; diff --git a/frontend/src/components/Layout.tsx b/frontend/src/components/Layout.tsx index 3d8c717..1ccae4c 100644 --- a/frontend/src/components/Layout.tsx +++ b/frontend/src/components/Layout.tsx @@ -1,6 +1,6 @@ /** * 主布局组件 - * @author Bamzc + * @author Color2333 */ import { Outlet, useLocation } from "react-router-dom"; import Sidebar from "./Sidebar"; diff --git a/frontend/src/components/Markdown.tsx b/frontend/src/components/Markdown.tsx index 8227c55..f24366e 100644 --- a/frontend/src/components/Markdown.tsx +++ b/frontend/src/components/Markdown.tsx @@ -1,6 +1,6 @@ /** * 统一 Markdown 渲染组件(含 LaTeX 支持) - * @author Bamzc + * @author Color2333 */ import { memo } from "react"; import ReactMarkdown from "react-markdown"; diff --git a/frontend/src/components/PdfReader.tsx b/frontend/src/components/PdfReader.tsx index 5cd967e..187dc76 100644 --- a/frontend/src/components/PdfReader.tsx +++ b/frontend/src/components/PdfReader.tsx @@ -1,6 +1,6 @@ /** * PDF Reader - 沉浸式论文阅读器(连续滚动 + AI 功能) - * @author Bamzc + * @author Color2333 */ import { useState, useCallback, useRef, useEffect, useMemo } from "react"; import { Document, Page, pdfjs } from "react-pdf"; diff --git a/frontend/src/components/SettingsDialog.tsx b/frontend/src/components/SettingsDialog.tsx index 60f6c69..c8a09dc 100644 --- a/frontend/src/components/SettingsDialog.tsx +++ b/frontend/src/components/SettingsDialog.tsx @@ -1,6 +1,6 @@ /** * 设置弹窗 - LLM 配置 / 邮箱与报告 / Pipeline 运行 / 运维操作 - * @author Bamzc + * @author Color2333 */ import { useState, useEffect, useCallback, type ReactNode } from "react"; import { useToast } from "@/contexts/ToastContext"; diff --git a/frontend/src/components/Sidebar.tsx b/frontend/src/components/Sidebar.tsx index 3e2fb7f..58d0563 100644 --- a/frontend/src/components/Sidebar.tsx +++ b/frontend/src/components/Sidebar.tsx @@ -30,8 +30,9 @@ import { X, PenTool, Loader2, + LogOut, } from "lucide-react"; -import { paperApi } from "@/services/api"; +import { paperApi, clearAuth } from "@/services/api"; /* 工具网格定义 */ const TOOLS = [ @@ -278,6 +279,13 @@ export default function Sidebar() {
v0.2.0 +
)} diff --git a/frontend/src/pages/Wiki.tsx b/frontend/src/pages/Wiki.tsx index 1ca1c8d..058feca 100644 --- a/frontend/src/pages/Wiki.tsx +++ b/frontend/src/pages/Wiki.tsx @@ -1,11 +1,10 @@ /** * Wiki - Manus 风格结构化知识百科 - * @author Bamzc + * @author Color2333 */ import { useState, useEffect, useCallback, useRef } from "react"; import { Card, CardHeader, Button, Tabs, Spinner, Empty } from "@/components/ui"; import { wikiApi, generatedApi, tasksApi } from "@/services/api"; -import type { TaskStatus } from "@/services/api"; import type { PaperWiki, TopicWiki, @@ -18,6 +17,7 @@ import type { ScholarMetadataItem, GeneratedContentListItem, GeneratedContent, + TaskStatus, } from "@/types"; import Markdown from "@/components/Markdown"; import { diff --git a/frontend/src/pages/Writing.tsx b/frontend/src/pages/Writing.tsx index 056a09b..8d147d1 100644 --- a/frontend/src/pages/Writing.tsx +++ b/frontend/src/pages/Writing.tsx @@ -1,7 +1,7 @@ /** * Writing Assistant - 学术写作助手(支持多轮微调对话) * Prompt 模板来源:https://github.com/Leey21/awesome-ai-research-writing - * @author Bamzc + * @author Color2333 */ import { useState, useEffect, useCallback, useMemo, useRef } from "react"; import { Button, Spinner } from "@/components/ui"; diff --git a/frontend/src/services/api.ts b/frontend/src/services/api.ts index e448c8b..7ea345e 100644 --- a/frontend/src/services/api.ts +++ b/frontend/src/services/api.ts @@ -1,12 +1,13 @@ /** * PaperMind - API 服务层 - * @author Bamzc + * @author Color2333 */ import type { SystemStatus, Topic, TopicCreate, TopicUpdate, + TopicFetchResult, Paper, PipelineRun, SkimReport, @@ -35,8 +36,39 @@ import type { BridgesResponse, FrontierResponse, CocitationResponse, + TodaySummary, + FolderStats, + PaperListResponse, + FigureAnalysisItem, + ReferenceImportEntry, + ImportTaskStatus, + CollectionAction, + EmailConfig, + EmailConfigForm, + DailyReportConfig, + TaskStatus, + ActiveTaskInfo, + LoginResponse, + AuthStatusResponse, } from "@/types"; +export type { + TodaySummary, + TopicFetchResult, + FolderStats, + PaperListResponse, + FigureAnalysisItem, + ReferenceImportEntry, + ImportTaskStatus, + CollectionAction, + EmailConfig, + EmailConfigForm, + DailyReportConfig, + TaskStatus, + ActiveTaskInfo, + LoginResponse, + AuthStatusResponse, +} from "@/types"; import { resolveApiBase } from "@/lib/tauri"; function getApiBase(): string { @@ -82,9 +114,11 @@ async function request(path: string, options: RequestInit = {}): Promise { const text = await resp.text().catch(() => ""); if (text) msg = text; } - // 401 未认证,清除 token + // 401 未认证,清除 token 并刷新页面跳转登录 if (resp.status === 401) { clearAuth(); + // 强制刷新页面触发 App 重新渲染登录页 + window.location.reload(); } throw new Error(msg); } @@ -117,23 +151,6 @@ export const systemApi = { status: () => get("/system/status"), }; -/* ========== 今日速览 ========== */ -export interface TodaySummary { - today_new: number; - week_new: number; - total_papers: number; - recommendations: { - id: string; - title: string; - arxiv_id: string; - abstract: string; - similarity: number; - title_zh?: string; - keywords?: string[]; - categories?: string[]; - }[]; - hot_keywords: { keyword: string; count: number }[]; -} export const todayApi = { summary: () => get("/today"), @@ -154,36 +171,7 @@ export const topicApi = { post<{ suggestions: KeywordSuggestion[] }>("/topics/suggest-keywords", { description }), }; -export interface TopicFetchResult { - topic_id: string; - topic_name?: string; - status: string; - inserted: number; - processed?: number; - attempts?: number; - error?: string; - topic?: Topic; -} - /* ========== 论文 ========== */ -export interface FolderStats { - total: number; - favorites: number; - recent_7d: number; - unclassified: number; - by_topic: { topic_id: string; topic_name: string; count: number }[]; - by_status: Record; - by_date: { date: string; count: number }[]; -} - -export interface PaperListResponse { - items: Paper[]; - total: number; - page: number; - page_size: number; - total_pages: number; -} - export const paperApi = { latest: (opts: { page?: number; @@ -240,41 +228,7 @@ export const paperApi = { post<{ action: string; result: string }>(`/papers/${id}/ai/explain`, { text, action }), }; -export interface FigureAnalysisItem { - id?: string; - page_number: number; - image_index?: number; - image_type: string; - caption: string; - description: string; - image_url?: string | null; - has_image?: boolean; -} - /* ========== 摄入 ========== */ -export interface ReferenceImportEntry { - scholar_id: string | null; - title: string; - year: number | null; - venue: string | null; - citation_count: number | null; - arxiv_id: string | null; - abstract: string | null; - direction?: string; -} - -export interface ImportTaskStatus { - task_id: string; - status: "running" | "completed" | "failed"; - total: number; - completed: number; - imported: number; - skipped: number; - failed: number; - current: string; - error?: string; - results: { title: string; status: string; reason?: string; paper_id?: string; source?: string }[]; -} export const ingestApi = { arxiv: (query: string, maxResults = 20, topicId?: string, sortBy = "submittedDate") => { @@ -316,16 +270,6 @@ export const citationApi = { }; /* ========== 行动记录 ========== */ -export interface CollectionAction { - id: string; - action_type: string; - title: string; - query: string | null; - topic_id: string | null; - paper_count: number; - created_at: string; -} - export const actionApi = { list: (opts: { actionType?: string; topicId?: string; limit?: number; offset?: number } = {}) => { const params = new URLSearchParams(); @@ -458,9 +402,10 @@ async function fetchSSE(url: string, init?: RequestInit): Promise { }, }); if (!resp.ok) { - // 401 清除 token + // 401 未认证,清除 token 并刷新页面跳转登录 if (resp.status === 401) { clearAuth(); + window.location.reload(); } const text = await resp.text().catch(() => ""); throw new Error(`请求失败 (${resp.status}): ${text || resp.statusText}`); @@ -469,13 +414,14 @@ async function fetchSSE(url: string, init?: RequestInit): Promise { } export const agentApi = { - chat: async (messages: AgentMessage[], confirmedActionId?: string): Promise => { - const url = `${getApiBase().replace(/\/+$/, "")}/agent/chat`; + chat: async (messages: AgentMessage[], conversationId?: string, confirmedActionId?: string): Promise => { + const url = `${getApiBase().replace(/\/\/+$/, "")}/agent/chat`; return fetchSSE(url, { method: "POST", headers: { "Content-Type": "application/json" }, body: JSON.stringify({ messages, + conversation_id: conversationId || null, confirmed_action_id: confirmedActionId || null, }), }); @@ -491,30 +437,6 @@ export const agentApi = { }; /* ========== 邮箱配置 ========== */ -export interface EmailConfig { - id: string; - name: string; - smtp_server: string; - smtp_port: number; - smtp_use_tls: boolean; - sender_email: string; - sender_name: string; - username: string; - is_active: boolean; - created_at: string; -} - -export interface EmailConfigForm { - name: string; - smtp_server: string; - smtp_port: number; - smtp_use_tls: boolean; - sender_email: string; - sender_name: string; - username: string; - password: string; -} - export const emailConfigApi = { list: () => get("/settings/email-configs"), create: (data: EmailConfigForm) => post("/settings/email-configs", data), @@ -526,17 +448,6 @@ export const emailConfigApi = { }; /* ========== 每日报告配置 ========== */ -export interface DailyReportConfig { - enabled: boolean; - auto_deep_read: boolean; - deep_read_limit: number; - send_email_report: boolean; - recipient_emails: string[]; - report_time_utc: number; - include_paper_details: boolean; - include_graph_insights: boolean; -} - export const dailyReportApi = { getConfig: () => get("/settings/daily-report-config"), updateConfig: (data: Record) => @@ -549,19 +460,6 @@ export const dailyReportApi = { }; /* ========== 后台任务 ========== */ -export interface TaskStatus { - task_id: string; - task_type: string; - title: string; - status: "pending" | "running" | "completed" | "failed"; - progress: number; - message: string; - error: string | null; - created_at: number; - updated_at: number; - has_result: boolean; -} - export const tasksApi = { active: () => get<{ tasks: ActiveTaskInfo[] }>("/tasks/active"), startTopicWiki: (keyword: string, limit = 120) => @@ -580,29 +478,7 @@ export const tasksApi = { post<{ ok: boolean }>("/tasks/track", body), }; -export interface ActiveTaskInfo { - task_id: string; - task_type: string; - title: string; - current: number; - total: number; - message: string; - elapsed_seconds: number; - progress_pct: number; - finished: boolean; - success: boolean; - error: string | null; -} - /* ========== 认证 ========== */ -export interface LoginResponse { - access_token: string; - token_type: string; -} - -export interface AuthStatusResponse { - auth_enabled: boolean; -} export const authApi = { login: (password: string) => diff --git a/frontend/src/types/index.ts b/frontend/src/types/index.ts index 6a1366a..65aacdd 100644 --- a/frontend/src/types/index.ts +++ b/frontend/src/types/index.ts @@ -1,6 +1,6 @@ /** * PaperMind - TypeScript 类型定义 - * @author Bamzc + * @author Color2333 */ /* ========== 系统 ========== */ @@ -36,7 +36,6 @@ export interface Topic { schedule_time_utc: number; enable_date_filter: boolean; date_filter_days: number; - schedule_time_utc: number; paper_count?: number; last_run_at?: string | null; last_run_count?: number | null; @@ -52,7 +51,6 @@ export interface TopicCreate { schedule_time_utc?: number; enable_date_filter?: boolean; date_filter_days?: number; - schedule_time_utc?: number; } export interface TopicUpdate { @@ -64,7 +62,6 @@ export interface TopicUpdate { schedule_time_utc?: number; enable_date_filter?: boolean; date_filter_days?: number; - schedule_time_utc?: number; } export interface KeywordSuggestion { @@ -75,29 +72,14 @@ export interface KeywordSuggestion { /* ========== 抓取任务 ========== */ export interface TopicFetchResult { - status: "started" | "already_running" | "ok" | "failed" | "no_new_papers"; - task_id?: string; + topic_id: string; topic_name?: string; - topic_id?: string; - message?: string; - inserted?: number; - new_count?: number; // 新论文数量 - total_count?: number; // 总抓取数量(包含重复) - processed?: number; - error?: string; -} - -export interface TopicFetchStatus { - status: "running" | "ok" | "failed" | "no_new_papers"; - task_id: string; - progress_pct: number; - message?: string; - inserted?: number; - new_count?: number; - total_count?: number; + status: string; + inserted: number; processed?: number; + attempts?: number; error?: string; - topic?: Partial; + topic?: Topic; } /* ========== 论文 ========== */ @@ -763,6 +745,165 @@ export interface PendingAction { description: string; } +/* ========== 今日速览 ========== */ +export interface TodaySummary { + today_new: number; + week_new: number; + total_papers: number; + recommendations: { + id: string; + title: string; + arxiv_id: string; + abstract: string; + similarity: number; + title_zh?: string; + keywords?: string[]; + categories?: string[]; + }[]; + hot_keywords: { keyword: string; count: number }[]; +} + +/* ========== 论文列表 ========== */ +export interface FolderStats { + total: number; + favorites: number; + recent_7d: number; + unclassified: number; + by_topic: { topic_id: string; topic_name: string; count: number }[]; + by_status: Record; + by_date: { date: string; count: number }[]; +} + +export interface PaperListResponse { + items: Paper[]; + total: number; + page: number; + page_size: number; + total_pages: number; +} + +export interface FigureAnalysisItem { + id?: string; + page_number: number; + image_index?: number; + image_type: string; + caption: string; + description: string; + image_url?: string | null; + has_image?: boolean; +} + +/* ========== 引用入库 ========== */ +export interface ReferenceImportEntry { + scholar_id: string | null; + title: string; + year: number | null; + venue: string | null; + citation_count: number | null; + arxiv_id: string | null; + abstract: string | null; + direction?: string; +} + +export interface ImportTaskStatus { + task_id: string; + status: "running" | "completed" | "failed"; + total: number; + completed: number; + imported: number; + skipped: number; + failed: number; + current: string; + error?: string; + results: { title: string; status: string; reason?: string; paper_id?: string; source?: string }[]; +} + +/* ========== 行动记录 ========== */ +export interface CollectionAction { + id: string; + action_type: string; + title: string; + query: string | null; + topic_id: string | null; + paper_count: number; + created_at: string; +} + +/* ========== 邮箱配置 ========== */ +export interface EmailConfig { + id: string; + name: string; + smtp_server: string; + smtp_port: number; + smtp_use_tls: boolean; + sender_email: string; + sender_name: string; + username: string; + is_active: boolean; + created_at: string; +} + +export interface EmailConfigForm { + name: string; + smtp_server: string; + smtp_port: number; + smtp_use_tls: boolean; + sender_email: string; + sender_name: string; + username: string; + password: string; +} + +/* ========== 每日报告配置 ========== */ +export interface DailyReportConfig { + enabled: boolean; + auto_deep_read: boolean; + deep_read_limit: number; + send_email_report: boolean; + recipient_emails: string[]; + report_time_utc: number; + include_paper_details: boolean; + include_graph_insights: boolean; +} + +/* ========== 后台任务 ========== */ +export interface TaskStatus { + task_id: string; + task_type: string; + title: string; + status: "pending" | "running" | "completed" | "failed"; + progress: number; + message: string; + error: string | null; + created_at: number; + updated_at: number; + has_result: boolean; +} + +export interface ActiveTaskInfo { + task_id: string; + task_type: string; + title: string; + current: number; + total: number; + message: string; + elapsed_seconds: number; + progress_pct: number; + finished: boolean; + success: boolean; + error: string | null; +} + +/* ========== 认证 ========== */ +export interface LoginResponse { + access_token: string; + token_type: string; +} + +export interface AuthStatusResponse { + auth_enabled: boolean; +} + export type SSEEventType = | "text_delta" | "tool_start" diff --git a/infra/docker-compose.yml b/infra/docker-compose.postgres.yml similarity index 100% rename from infra/docker-compose.yml rename to infra/docker-compose.postgres.yml diff --git a/infra/migrations/versions/20260311_0010_add_schedule_frequency.py b/infra/migrations/versions/20260311_0010_add_schedule_frequency.py new file mode 100644 index 0000000..3275a2c --- /dev/null +++ b/infra/migrations/versions/20260311_0010_add_schedule_frequency.py @@ -0,0 +1,32 @@ +"""add schedule_frequency to topic_subscriptions + +Revision ID: 20260311_0010_add_schedule_frequency +Revises: 140e104869f9 +Create Date: 2026-03-11 + +@author Color2333 +""" +from __future__ import annotations + +from typing import Sequence, Union + +from alembic import op +import sqlalchemy as sa + + +# revision identifiers, used by Alembic. +revision: str = "20260311_0010_add_schedule_frequency" +down_revision: Union[str, None] = "140e104869f9" +branch_labels: Union[str, Sequence[str], None] = None +depends_on: Union[str, Sequence[str], None] = None + + +def upgrade() -> None: + op.add_column( + "topic_subscriptions", + sa.Column("schedule_frequency", sa.String(32), nullable=False, server_default="daily"), + ) + + +def downgrade() -> None: + op.drop_column("topic_subscriptions", "schedule_frequency") diff --git a/infra/migrations/versions/5ae0d73c1013_add_agent_pending_action_table.py b/infra/migrations/versions/5ae0d73c1013_add_agent_pending_action_table.py new file mode 100644 index 0000000..35a8eca --- /dev/null +++ b/infra/migrations/versions/5ae0d73c1013_add_agent_pending_action_table.py @@ -0,0 +1,194 @@ +"""add_agent_pending_action_table + +Revision ID: 5ae0d73c1013 +Revises: 20260311_0010_add_schedule_frequency +Create Date: 2026-03-12 01:32:59.424095 +""" +from alembic import op +import sqlalchemy as sa + + +# revision identifiers, used by Alembic. +revision = '5ae0d73c1013' +down_revision = '20260311_0010_add_schedule_frequency' +branch_labels = None +depends_on = None + + +def upgrade() -> None: + # ### commands auto generated by Alembic - please adjust! ### + op.create_table('agent_pending_actions', + sa.Column('id', sa.String(length=36), nullable=False), + sa.Column('conversation_id', sa.String(length=36), nullable=True), + sa.Column('tool_name', sa.String(length=128), nullable=False), + sa.Column('tool_args', sa.JSON(), nullable=False), + sa.Column('tool_call_id', sa.String(length=64), nullable=True), + sa.Column('conversation_state', sa.JSON(), nullable=True), + sa.Column('created_at', sa.DateTime(), nullable=False), + sa.Column('paper_id', sa.String(length=36), nullable=True), + sa.Column('markdown', sa.Text(), nullable=False), + sa.Column('metadata_json', sa.JSON(), nullable=False), + sa.ForeignKeyConstraint(['conversation_id'], ['agent_conversations.id'], ondelete='CASCADE'), + sa.ForeignKeyConstraint(['paper_id'], ['papers.id'], ondelete='SET NULL'), + sa.PrimaryKeyConstraint('id') + ) + op.create_index(op.f('ix_agent_pending_actions_conversation_id'), 'agent_pending_actions', ['conversation_id'], unique=False) + op.create_index(op.f('ix_agent_pending_actions_paper_id'), 'agent_pending_actions', ['paper_id'], unique=False) + op.drop_index(op.f('ix_ieee_quotas_date'), table_name='ieee_api_quotas') + op.drop_index(op.f('ix_ieee_quotas_topic_id'), table_name='ieee_api_quotas') + op.drop_table('ieee_api_quotas') + op.alter_column('action_papers', 'id', + existing_type=sa.VARCHAR(length=36), + nullable=False) + op.create_unique_constraint('uq_action_paper', 'action_papers', ['action_id', 'paper_id']) + op.drop_constraint(None, 'action_papers', type_='foreignkey') + op.drop_constraint(None, 'action_papers', type_='foreignkey') + op.create_foreign_key(None, 'action_papers', 'collection_actions', ['action_id'], ['id'], ondelete='CASCADE') + op.create_foreign_key(None, 'action_papers', 'papers', ['paper_id'], ['id'], ondelete='CASCADE') + op.alter_column('agent_conversations', 'user_id', + existing_type=sa.VARCHAR(length=256), + type_=sa.String(length=36), + existing_nullable=True) + op.alter_column('agent_conversations', 'title', + existing_type=sa.VARCHAR(length=512), + type_=sa.String(length=256), + existing_nullable=True) + op.create_index(op.f('ix_agent_conversations_created_at'), 'agent_conversations', ['created_at'], unique=False) + op.alter_column('agent_messages', 'role', + existing_type=sa.VARCHAR(length=32), + type_=sa.String(length=20), + existing_nullable=False) + op.create_index(op.f('ix_agent_messages_created_at'), 'agent_messages', ['created_at'], unique=False) + op.drop_column('agent_messages', 'metadata_json') + op.drop_column('agent_messages', 'markdown') + op.drop_column('agent_messages', 'paper_id') + op.alter_column('collection_actions', 'id', + existing_type=sa.VARCHAR(length=36), + nullable=False) + op.alter_column('collection_actions', 'action_type', + existing_type=sa.VARCHAR(length=32), + type_=sa.Enum('initial_import', 'manual_collect', 'auto_collect', 'agent_collect', 'subscription_ingest', 'reference_import', name='action_type'), + existing_nullable=False) + op.drop_index(op.f('ix_collection_actions_created_at'), table_name='collection_actions') + op.drop_index(op.f('ix_collection_actions_type'), table_name='collection_actions') + op.create_index(op.f('ix_collection_actions_action_type'), 'collection_actions', ['action_type'], unique=False) + op.drop_constraint(None, 'collection_actions', type_='foreignkey') + op.create_foreign_key(None, 'collection_actions', 'topic_subscriptions', ['topic_id'], ['id'], ondelete='SET NULL') + op.alter_column('generated_contents', 'metadata_json', + existing_type=sqlite.JSON(), + nullable=True) + op.alter_column('image_analyses', 'id', + existing_type=sa.VARCHAR(length=36), + nullable=False) + op.drop_constraint(None, 'image_analyses', type_='foreignkey') + op.create_foreign_key(None, 'image_analyses', 'papers', ['paper_id'], ['id'], ondelete='CASCADE') + op.alter_column('papers', 'read_status', + existing_type=sa.VARCHAR(length=8), + type_=sa.Enum('unread', 'skimmed', 'deep_read', name='read_status'), + existing_nullable=False, + existing_server_default=sa.text("'Unread'")) + op.drop_index(op.f('ix_papers_doi'), table_name='papers') + op.drop_index(op.f('ix_papers_source'), table_name='papers') + op.drop_index(op.f('ix_papers_source_id'), table_name='papers') + op.drop_column('papers', 'source_id') + op.drop_column('papers', 'doi') + op.drop_column('papers', 'source') + op.drop_index(op.f('ix_pipeline_runs_created_at'), table_name='pipeline_runs') + op.drop_index(op.f('ix_prompt_traces_created_at'), table_name='prompt_traces') + op.alter_column('topic_subscriptions', 'schedule_frequency', + existing_type=sa.VARCHAR(length=20), + type_=sa.String(length=32), + existing_nullable=False, + existing_server_default=sa.text("'daily'")) + op.drop_column('topic_subscriptions', 'ieee_api_key_override') + op.drop_column('topic_subscriptions', 'sources') + op.drop_column('topic_subscriptions', 'ieee_daily_quota') + # ### end Alembic commands ### + + +def downgrade() -> None: + # ### commands auto generated by Alembic - please adjust! ### + op.add_column('topic_subscriptions', sa.Column('ieee_daily_quota', sa.INTEGER(), server_default=sa.text("'10'"), nullable=False)) + op.add_column('topic_subscriptions', sa.Column('sources', sqlite.JSON(), server_default=sa.text('\'["arxiv"]\''), nullable=False)) + op.add_column('topic_subscriptions', sa.Column('ieee_api_key_override', sa.VARCHAR(length=512), nullable=True)) + op.alter_column('topic_subscriptions', 'schedule_frequency', + existing_type=sa.String(length=32), + type_=sa.VARCHAR(length=20), + existing_nullable=False, + existing_server_default=sa.text("'daily'")) + op.create_index(op.f('ix_prompt_traces_created_at'), 'prompt_traces', ['created_at'], unique=False) + op.create_index(op.f('ix_pipeline_runs_created_at'), 'pipeline_runs', ['created_at'], unique=False) + op.add_column('papers', sa.Column('source', sa.VARCHAR(length=32), server_default=sa.text("'arxiv'"), nullable=False)) + op.add_column('papers', sa.Column('doi', sa.VARCHAR(length=128), nullable=True)) + op.add_column('papers', sa.Column('source_id', sa.VARCHAR(length=128), nullable=True)) + op.create_index(op.f('ix_papers_source_id'), 'papers', ['source_id'], unique=False) + op.create_index(op.f('ix_papers_source'), 'papers', ['source'], unique=False) + op.create_index(op.f('ix_papers_doi'), 'papers', ['doi'], unique=False) + op.alter_column('papers', 'read_status', + existing_type=sa.Enum('unread', 'skimmed', 'deep_read', name='read_status'), + type_=sa.VARCHAR(length=8), + existing_nullable=False, + existing_server_default=sa.text("'Unread'")) + op.drop_constraint(None, 'image_analyses', type_='foreignkey') + op.create_foreign_key(None, 'image_analyses', 'papers', ['paper_id'], ['id']) + op.alter_column('image_analyses', 'id', + existing_type=sa.VARCHAR(length=36), + nullable=True) + op.alter_column('generated_contents', 'metadata_json', + existing_type=sqlite.JSON(), + nullable=False) + op.drop_constraint(None, 'collection_actions', type_='foreignkey') + op.create_foreign_key(None, 'collection_actions', 'topic_subscriptions', ['topic_id'], ['id']) + op.drop_index(op.f('ix_collection_actions_action_type'), table_name='collection_actions') + op.create_index(op.f('ix_collection_actions_type'), 'collection_actions', ['action_type'], unique=False) + op.create_index(op.f('ix_collection_actions_created_at'), 'collection_actions', ['created_at'], unique=False) + op.alter_column('collection_actions', 'action_type', + existing_type=sa.Enum('initial_import', 'manual_collect', 'auto_collect', 'agent_collect', 'subscription_ingest', 'reference_import', name='action_type'), + type_=sa.VARCHAR(length=32), + existing_nullable=False) + op.alter_column('collection_actions', 'id', + existing_type=sa.VARCHAR(length=36), + nullable=True) + op.add_column('agent_messages', sa.Column('paper_id', sa.VARCHAR(length=36), nullable=True)) + op.add_column('agent_messages', sa.Column('markdown', sa.TEXT(), server_default=sa.text("('')"), nullable=False)) + op.add_column('agent_messages', sa.Column('metadata_json', sqlite.JSON(), server_default=sa.text("'{}'"), nullable=False)) + op.drop_index(op.f('ix_agent_messages_created_at'), table_name='agent_messages') + op.alter_column('agent_messages', 'role', + existing_type=sa.String(length=20), + type_=sa.VARCHAR(length=32), + existing_nullable=False) + op.drop_index(op.f('ix_agent_conversations_created_at'), table_name='agent_conversations') + op.alter_column('agent_conversations', 'title', + existing_type=sa.String(length=256), + type_=sa.VARCHAR(length=512), + existing_nullable=True) + op.alter_column('agent_conversations', 'user_id', + existing_type=sa.String(length=36), + type_=sa.VARCHAR(length=256), + existing_nullable=True) + op.drop_constraint(None, 'action_papers', type_='foreignkey') + op.drop_constraint(None, 'action_papers', type_='foreignkey') + op.create_foreign_key(None, 'action_papers', 'papers', ['paper_id'], ['id']) + op.create_foreign_key(None, 'action_papers', 'collection_actions', ['action_id'], ['id']) + op.drop_constraint('uq_action_paper', 'action_papers', type_='unique') + op.alter_column('action_papers', 'id', + existing_type=sa.VARCHAR(length=36), + nullable=True) + op.create_table('ieee_api_quotas', + sa.Column('id', sa.VARCHAR(length=36), nullable=False), + sa.Column('topic_id', sa.VARCHAR(length=36), nullable=True), + sa.Column('date', sa.DATE(), nullable=False), + sa.Column('api_calls_used', sa.INTEGER(), nullable=False), + sa.Column('api_calls_limit', sa.INTEGER(), nullable=False), + sa.Column('last_reset_at', sa.DATETIME(), nullable=True), + sa.Column('created_at', sa.DATETIME(), nullable=False), + sa.ForeignKeyConstraint(['topic_id'], ['topic_subscriptions.id'], ondelete='SET NULL'), + sa.PrimaryKeyConstraint('id'), + sa.UniqueConstraint('topic_id', 'date', name=op.f('uq_ieee_quota_daily')) + ) + op.create_index(op.f('ix_ieee_quotas_topic_id'), 'ieee_api_quotas', ['topic_id'], unique=False) + op.create_index(op.f('ix_ieee_quotas_date'), 'ieee_api_quotas', ['date'], unique=False) + op.drop_index(op.f('ix_agent_pending_actions_paper_id'), table_name='agent_pending_actions') + op.drop_index(op.f('ix_agent_pending_actions_conversation_id'), table_name='agent_pending_actions') + op.drop_table('agent_pending_actions') + # ### end Alembic commands ### diff --git a/package.json b/package.json deleted file mode 100644 index a281b05..0000000 --- a/package.json +++ /dev/null @@ -1,18 +0,0 @@ -{ - "name": "papermind", - "version": "1.0.0", - "description": "PaperMind is an AI-native research workflow backend with:", - "main": "index.js", - "scripts": { - "test": "echo \"Error: no test specified\" && exit 1" - }, - "keywords": [], - "author": "", - "license": "ISC", - "type": "commonjs", - "devDependencies": { - "@playwright/test": "^1.58.2", - "claude-skills-frontend": "^1.4.0", - "playwright": "^1.58.2" - } -} diff --git a/packages/ai/agent_service.py b/packages/ai/agent_service.py index 56e306b..68a103e 100644 --- a/packages/ai/agent_service.py +++ b/packages/ai/agent_service.py @@ -1,11 +1,14 @@ """ Agent 核心服务 - 对话管理、工具调度、确认流程 -@author Bamzc +@author Color2333 """ from __future__ import annotations import json import logging +from collections.abc import Iterator +from uuid import uuid4 +import logging import threading import time from collections.abc import Iterator @@ -20,7 +23,7 @@ ) from packages.integrations.llm_client import LLMClient, StreamEvent from packages.storage.db import session_scope -from packages.storage.repositories import PromptTraceRepository +from packages.storage.repositories import AgentPendingActionRepository, PromptTraceRepository logger = logging.getLogger(__name__) @@ -98,25 +101,19 @@ _CONFIRM_TOOLS = {t.name for t in TOOL_REGISTRY if t.requires_confirm} -# 待确认操作(含对话上下文,用于恢复执行) -_pending_actions: dict[str, dict] = {} -_pending_lock = threading.Lock() _ACTION_TTL = 1800 # 30 分钟过期 def _cleanup_expired_actions(): - """清理过期的 pending actions""" - cutoff = time.time() - _ACTION_TTL - with _pending_lock: - expired = [ - k for k, v in _pending_actions.items() - if v.get("created_at", 0) < cutoff - ] - for k in expired: - del _pending_actions[k] - if expired: - logger.info("清理 %d 个过期 pending_actions", len(expired)) - + """清理过期的 pending actions(数据库)""" + try: + with session_scope() as session: + repo = AgentPendingActionRepository(session) + deleted = repo.cleanup_expired(_ACTION_TTL) + if deleted > 0: + logger.info("清理 %d 个过期 pending_actions", deleted) + except Exception as exc: + logger.warning("清理过期 pending_actions 失败: %s", exc) def _record_agent_usage( provider: str, model: str, @@ -373,15 +370,20 @@ def _llm_loop( "确认操作挂起: %s [%s] args=%s", action_id, tc.tool_name, args, ) - with _pending_lock: - _cleanup_expired_actions() - _pending_actions[action_id] = { - "tool": tc.tool_name, - "args": args, - "tool_call_id": tc.tool_call_id, - "conversation": conversation, - "created_at": time.time(), - } + # 持久化到数据库 + _cleanup_expired_actions() + try: + with session_scope() as session: + repo = AgentPendingActionRepository(session) + repo.create( + action_id=action_id, + tool_name=tc.tool_name, + tool_args=args, + tool_call_id=tc.tool_call_id, + conversation_state={"conversation": conversation}, + ) + except Exception as exc: + logger.warning("存储 pending_action 失败: %s", exc) desc = _describe_action(tc.tool_name, args) yield _make_sse("action_confirm", { "id": action_id, @@ -407,8 +409,23 @@ def stream_chat( # 处理确认操作 if confirmed_action_id: - with _pending_lock: - action = _pending_actions.pop(confirmed_action_id, None) + # 从数据库读取并删除 + action = None + try: + with session_scope() as session: + repo = AgentPendingActionRepository(session) + action_record = repo.get_by_id(confirmed_action_id) + if action_record: + action = { + "tool": action_record.tool_name, + "args": action_record.tool_args, + "tool_call_id": action_record.tool_call_id, + "conversation": (action_record.conversation_state or {}).get("conversation", []), + } + repo.delete(confirmed_action_id) + except Exception as exc: + logger.warning("读取 pending_action 失败: %s", exc) + if not action: yield _make_sse( "error", @@ -463,8 +480,24 @@ def stream_chat( def confirm_action(action_id: str) -> Iterator[str]: """确认执行挂起的操作并继续对话""" logger.info("用户确认操作: %s", action_id) - with _pending_lock: - action = _pending_actions.pop(action_id, None) + + # 从数据库读取并删除 + action = None + try: + with session_scope() as session: + repo = AgentPendingActionRepository(session) + action_record = repo.get_by_id(action_id) + if action_record: + action = { + "tool": action_record.tool_name, + "args": action_record.tool_args, + "tool_call_id": action_record.tool_call_id, + "conversation": (action_record.conversation_state or {}).get("conversation", []), + } + repo.delete(action_id) + except Exception as exc: + logger.warning("读取 pending_action 失败: %s", exc) + if not action: yield _make_sse( "error", @@ -490,12 +523,27 @@ def confirm_action(action_id: str) -> Iterator[str]: yield _make_sse("done", {}) - def reject_action(action_id: str) -> Iterator[str]: """拒绝挂起的操作并让 LLM 给出替代建议""" logger.info("用户拒绝操作: %s", action_id) - with _pending_lock: - action = _pending_actions.pop(action_id, None) + + # 从数据库读取并删除 + action = None + try: + with session_scope() as session: + repo = AgentPendingActionRepository(session) + action_record = repo.get_by_id(action_id) + if action_record: + action = { + "tool": action_record.tool_name, + "args": action_record.tool_args, + "tool_call_id": action_record.tool_call_id, + "conversation": (action_record.conversation_state or {}).get("conversation", []), + } + repo.delete(action_id) + except Exception as exc: + logger.warning("读取 pending_action 失败: %s", exc) + yield _make_sse("action_result", { "id": action_id, "success": False, @@ -521,7 +569,6 @@ def reject_action(action_id: str) -> Iterator[str]: yield _make_sse("done", {}) - def _describe_action(tool_name: str, args: dict) -> str: """生成操作描述""" descriptions = { diff --git a/packages/ai/agent_tools.py b/packages/ai/agent_tools.py index 3f1e98f..4b2f997 100644 --- a/packages/ai/agent_tools.py +++ b/packages/ai/agent_tools.py @@ -1,13 +1,13 @@ """ Agent 工具注册表和执行函数 -@author Bamzc +@author Color2333 """ from __future__ import annotations import logging from collections.abc import Iterator from dataclasses import dataclass, field -from uuid import UUID +from uuid import UUID, uuid4 from packages.ai.brief_service import DailyBriefService from packages.ai.graph_service import GraphService diff --git a/packages/ai/brief_service.py b/packages/ai/brief_service.py index 561730d..c283934 100644 --- a/packages/ai/brief_service.py +++ b/packages/ai/brief_service.py @@ -1,6 +1,6 @@ """ 每日简报服务 - 精美日报生成 -@author Bamzc +@author Color2333 """ from __future__ import annotations diff --git a/packages/ai/cost_guard.py b/packages/ai/cost_guard.py index 77224c1..645dcc0 100644 --- a/packages/ai/cost_guard.py +++ b/packages/ai/cost_guard.py @@ -1,6 +1,6 @@ """ LLM 成本守卫 - 自动降级模型选择 -@author Bamzc +@author Color2333 """ from __future__ import annotations diff --git a/packages/ai/daily_runner.py b/packages/ai/daily_runner.py index 0774fdb..6633f36 100644 --- a/packages/ai/daily_runner.py +++ b/packages/ai/daily_runner.py @@ -1,6 +1,6 @@ """ 每日/每周定时任务编排 - 智能调度 + 精读限额 -@author Bamzc +@author Color2333 @author Color2333 """ @@ -138,6 +138,11 @@ def run_topic_ingest(topic_id: str) -> dict: # 获取精读配额配置 max_deep_reads = getattr(topic, "max_deep_reads_per_run", 2) + # 读取日期过滤配置 + enable_date_filter = getattr(topic, "enable_date_filter", False) + date_filter_days = getattr(topic, "date_filter_days", 7) + days_back = date_filter_days if enable_date_filter else 0 + last_error: str | None = None ids: list[str] = [] new_count: int = 0 @@ -151,6 +156,7 @@ def run_topic_ingest(topic_id: str) -> dict: max_results=topic.max_results_per_run, topic_id=topic.id, action_type=ActionType.auto_collect, + days_back=days_back, ) ids = result["inserted_ids"] new_count = result["new_count"] diff --git a/packages/ai/figure_service.py b/packages/ai/figure_service.py index cc79ac2..1b87e01 100644 --- a/packages/ai/figure_service.py +++ b/packages/ai/figure_service.py @@ -1,7 +1,7 @@ """ 图表/公式智能识别与解读服务 从 PDF 中提取 Figure/Table/公式区域,送 Vision 模型解读 -@author Bamzc +@author Color2333 """ from __future__ import annotations diff --git a/packages/ai/graph_service.py b/packages/ai/graph_service.py index 69eb0b1..2d095a4 100644 --- a/packages/ai/graph_service.py +++ b/packages/ai/graph_service.py @@ -1,6 +1,6 @@ """ 图谱分析服务 - 引用树、时间线、质量评估、演化分析、综述生成 -@author Bamzc +@author Color2333 """ from __future__ import annotations diff --git a/packages/ai/keyword_service.py b/packages/ai/keyword_service.py index afff069..b98aa96 100644 --- a/packages/ai/keyword_service.py +++ b/packages/ai/keyword_service.py @@ -1,6 +1,6 @@ """ AI 关键词建议服务 - 自然语言 → arXiv 搜索关键词 -@author Bamzc +@author Color2333 """ from __future__ import annotations diff --git a/packages/ai/pipelines.py b/packages/ai/pipelines.py index d1c47da..35b1754 100644 --- a/packages/ai/pipelines.py +++ b/packages/ai/pipelines.py @@ -1,6 +1,6 @@ """ 论文处理 Pipeline - 摄入 / 粗读 / 精读 / 向量化 / 参考文献导入 -@author Bamzc +@author Color2333 """ from __future__ import annotations @@ -88,7 +88,7 @@ def ingest_arxiv( topic_id: str | None = None, action_type: ActionType = ActionType.manual_collect, sort_by: str = "submittedDate", - + days_back: int = 7, ) -> tuple[int, list[str], int]: """搜索 arXiv 并入库,upsert 去重。返回 (total_count, inserted_ids, new_papers_count) @@ -119,12 +119,12 @@ def ingest_arxiv( needed = max_results - new_papers_count this_batch = min(batch_size, needed + 20) # 多抓 20 篇作为缓冲 - # 默认查询最近 30 天的论文 papers = self.arxiv.fetch_latest( query=query, max_results=this_batch, sort_by=sort_by, start=start, + days_back=days_back, ) total_fetched += len(papers) @@ -195,6 +195,7 @@ def ingest_arxiv_with_ids( topic_id: str | None = None, action_type: ActionType = ActionType.subscription_ingest, sort_by: str = "submittedDate", + days_back: int = 7, ) -> list[str]: """ingest_arxiv 的别名,返回 inserted_ids""" _, ids = self.ingest_arxiv( @@ -203,6 +204,7 @@ def ingest_arxiv_with_ids( topic_id=topic_id, action_type=action_type, sort_by=sort_by, + days_back=days_back, ) return ids @@ -213,6 +215,7 @@ def ingest_arxiv_with_stats( topic_id: str | None = None, action_type: ActionType = ActionType.subscription_ingest, sort_by: str = "submittedDate", + days_back: int = 7, ) -> dict: """ingest_arxiv 返回详细统计信息""" total_count, inserted_ids, new_count = self.ingest_arxiv( @@ -221,6 +224,7 @@ def ingest_arxiv_with_stats( topic_id=topic_id, action_type=action_type, sort_by=sort_by, + days_back=days_back, ) return { "total_count": total_count, diff --git a/packages/ai/prompts.py b/packages/ai/prompts.py index 3b34883..9a385dd 100644 --- a/packages/ai/prompts.py +++ b/packages/ai/prompts.py @@ -1,6 +1,6 @@ """ LLM Prompt 模板 -@author Bamzc +@author Color2333 """ diff --git a/packages/ai/rag_service.py b/packages/ai/rag_service.py index ebc1866..14b8393 100644 --- a/packages/ai/rag_service.py +++ b/packages/ai/rag_service.py @@ -1,6 +1,6 @@ """ RAG 检索增强生成服务 -@author Bamzc +@author Color2333 """ from __future__ import annotations diff --git a/packages/ai/reasoning_service.py b/packages/ai/reasoning_service.py index f190291..16d100b 100644 --- a/packages/ai/reasoning_service.py +++ b/packages/ai/reasoning_service.py @@ -1,7 +1,7 @@ """ 推理链深度分析服务 引导 LLM 进行分步推理,提供方法论推导链、实验验证链、创新性评估 -@author Bamzc +@author Color2333 """ from __future__ import annotations diff --git a/packages/ai/recommendation_service.py b/packages/ai/recommendation_service.py index 867ee14..d7f7b52 100644 --- a/packages/ai/recommendation_service.py +++ b/packages/ai/recommendation_service.py @@ -1,6 +1,6 @@ """ 推荐引擎 + 热点趋势检测 -@author Bamzc +@author Color2333 """ from __future__ import annotations diff --git a/packages/ai/task_manager.py b/packages/ai/task_manager.py index bc47b19..48ed317 100644 --- a/packages/ai/task_manager.py +++ b/packages/ai/task_manager.py @@ -1,6 +1,6 @@ """ 后台任务管理器 - 管理 wiki/brief 等耗时生成任务 -@author Bamzc +@author Color2333 """ from __future__ import annotations diff --git a/packages/ai/wiki_context.py b/packages/ai/wiki_context.py index d6d222a..28023ce 100644 --- a/packages/ai/wiki_context.py +++ b/packages/ai/wiki_context.py @@ -1,7 +1,7 @@ """ Wiki 生成上下文收集模块 从多源聚合富化上下文供 Wiki 生成使用 -@author Bamzc +@author Color2333 """ from __future__ import annotations diff --git a/packages/ai/writing_service.py b/packages/ai/writing_service.py index 9047a92..92710af 100644 --- a/packages/ai/writing_service.py +++ b/packages/ai/writing_service.py @@ -1,7 +1,7 @@ """ 学术写作助手服务 - 封装高质量写作 Prompt 模板 Prompt 模板来源:https://github.com/Leey21/awesome-ai-research-writing -@author Bamzc +@author Color2333 """ from __future__ import annotations diff --git a/packages/auth.py b/packages/auth.py index c67cd69..8b07344 100644 --- a/packages/auth.py +++ b/packages/auth.py @@ -3,6 +3,7 @@ @author Color2333 """ +import hmac from datetime import datetime, timedelta, timezone from typing import Any @@ -53,9 +54,9 @@ def decode_access_token(token: str) -> dict[str, Any] | None: def authenticate_user(password: str) -> bool: """ 验证站点密码 - 简单模式:直接对比明文密码 + 使用 hmac.compare_digest 防止时序攻击 """ settings = get_settings() if not settings.auth_password: return False - return password == settings.auth_password \ No newline at end of file + return hmac.compare_digest(password, settings.auth_password) \ No newline at end of file diff --git a/packages/config.py b/packages/config.py index 5de4492..b5a07c1 100644 --- a/packages/config.py +++ b/packages/config.py @@ -1,11 +1,11 @@ """ 应用配置 - Pydantic Settings 支持桌面模式通过 PAPERMIND_ENV_FILE / PAPERMIND_DATA_DIR 环境变量注入路径。 -@author Bamzc +@author Color2333 """ -from functools import lru_cache import os +from functools import lru_cache from pathlib import Path from pydantic_settings import BaseSettings, SettingsConfigDict @@ -27,7 +27,7 @@ class Settings(BaseSettings): # 认证配置 auth_password: str = "" # 站点密码,为空则禁用认证 - auth_secret_key: str = "papermind-secret-key-change-in-production" # JWT 密钥 + auth_secret_key: str = "" # JWT 密钥,生产环境必须配置,为空时启用认证会报错 database_url: str = "sqlite:////app/data/papermind.db" pdf_storage_root: Path = Path("./data/papers") diff --git a/packages/domain/math_utils.py b/packages/domain/math_utils.py index c45910e..654fe17 100644 --- a/packages/domain/math_utils.py +++ b/packages/domain/math_utils.py @@ -1,6 +1,6 @@ """ 向量数学工具函数 -@author Bamzc +@author Color2333 """ import math diff --git a/packages/domain/schemas.py b/packages/domain/schemas.py index 50a9205..055b1f9 100644 --- a/packages/domain/schemas.py +++ b/packages/domain/schemas.py @@ -51,6 +51,7 @@ class TopicCreate(BaseModel): enabled: bool = True max_results_per_run: int = 20 retry_limit: int = 2 + schedule_frequency: str = "daily" schedule_time_utc: int = 21 enable_date_filter: bool = False date_filter_days: int = 7 @@ -61,6 +62,7 @@ class TopicUpdate(BaseModel): enabled: bool | None = None max_results_per_run: int | None = None retry_limit: int | None = None + schedule_frequency: str | None = None schedule_time_utc: int | None = None enable_date_filter: bool | None = None date_filter_days: int | None = None @@ -111,9 +113,9 @@ class AgentChatRequest(BaseModel): """Agent 对话请求""" messages: list[AgentMessage] + conversation_id: str | None = None confirmed_action_id: str | None = None - # ---------- API Request Bodies ---------- diff --git a/packages/integrations/citation_provider.py b/packages/integrations/citation_provider.py index fede16f..8e745c5 100644 --- a/packages/integrations/citation_provider.py +++ b/packages/integrations/citation_provider.py @@ -1,7 +1,7 @@ """ 双源引用数据提供者 OpenAlex(10 req/s)为主力,Semantic Scholar 为兜底 -@author Bamzc +@author Color2333 """ from __future__ import annotations diff --git a/packages/integrations/llm_client.py b/packages/integrations/llm_client.py index 2d92bc4..f3d936d 100644 --- a/packages/integrations/llm_client.py +++ b/packages/integrations/llm_client.py @@ -1,7 +1,7 @@ """ LLM 提供者抽象层 - OpenAI / Anthropic / ZhipuAI / Pseudo 支持从数据库动态加载激活的 LLM 配置 -@author Bamzc +@author Color2333 """ from __future__ import annotations diff --git a/packages/integrations/notifier.py b/packages/integrations/notifier.py index 571731d..2ac023d 100644 --- a/packages/integrations/notifier.py +++ b/packages/integrations/notifier.py @@ -1,6 +1,6 @@ """ 通知服务 - 邮件发送 + HTML 存储 -@author Bamzc +@author Color2333 """ from __future__ import annotations diff --git a/packages/integrations/openalex_client.py b/packages/integrations/openalex_client.py index 974b13d..a70a2ef 100644 --- a/packages/integrations/openalex_client.py +++ b/packages/integrations/openalex_client.py @@ -1,7 +1,7 @@ """ OpenAlex API 客户端 高速率引用数据源(10 req/s, 100k/day),覆盖 4.7 亿论文 -@author Bamzc +@author Color2333 """ from __future__ import annotations diff --git a/packages/integrations/semantic_scholar_client.py b/packages/integrations/semantic_scholar_client.py index 8962a9f..a94ae86 100644 --- a/packages/integrations/semantic_scholar_client.py +++ b/packages/integrations/semantic_scholar_client.py @@ -1,7 +1,7 @@ """ Semantic Scholar API 客户端 连接复用 + 429 重试 + 日志 -@author Bamzc +@author Color2333 """ from __future__ import annotations diff --git a/packages/storage/db.py b/packages/storage/db.py index c265996..265f4b2 100644 --- a/packages/storage/db.py +++ b/packages/storage/db.py @@ -1,6 +1,6 @@ """ 数据库引擎和会话管理 -@author Bamzc +@author Color2333 """ from __future__ import annotations @@ -120,6 +120,20 @@ def run_migrations() -> None: "INTEGER", "21", ) + _safe_add_column( + conn, + "topic_subscriptions", + "enable_date_filter", + "BOOLEAN", + "0", + ) + _safe_add_column( + conn, + "topic_subscriptions", + "date_filter_days", + "INTEGER", + "7", + ) _safe_add_column(conn, "papers", "favorited", "BOOLEAN", "0") # 关键列索引加速 ORDER BY / WHERE 查询 _safe_create_index(conn, "ix_papers_created_at", "papers", "created_at") diff --git a/packages/storage/models.py b/packages/storage/models.py index 7b5e662..1eff248 100644 --- a/packages/storage/models.py +++ b/packages/storage/models.py @@ -1,6 +1,6 @@ """ SQLAlchemy ORM 模型定义 -@author Bamzc +@author Color2333 """ from datetime import UTC, date, datetime @@ -195,6 +195,7 @@ class TopicSubscription(Base): enabled: Mapped[bool] = mapped_column(nullable=False, default=True) max_results_per_run: Mapped[int] = mapped_column(nullable=False, default=20) retry_limit: Mapped[int] = mapped_column(nullable=False, default=2) + schedule_frequency: Mapped[str] = mapped_column(String(32), nullable=False, default="daily") schedule_time_utc: Mapped[int] = mapped_column(nullable=False, default=21) enable_date_filter: Mapped[bool] = mapped_column(nullable=False, default=False) # 是否启用日期过滤 date_filter_days: Mapped[int] = mapped_column(nullable=False, default=7) # 日期范围(最近 N 天) @@ -307,6 +308,26 @@ class AgentMessage(Base): ) +class AgentPendingAction(Base): + """Agent 待确认操作 - 持久化存储""" + + __tablename__ = "agent_pending_actions" + + id: Mapped[str] = mapped_column(String(36), primary_key=True) + conversation_id: Mapped[str | None] = mapped_column( + String(36), + ForeignKey("agent_conversations.id", ondelete="CASCADE"), + nullable=True, + index=True, + ) + tool_name: Mapped[str] = mapped_column(String(128), nullable=False) + tool_args: Mapped[dict] = mapped_column(JSON, nullable=False, default=dict) + tool_call_id: Mapped[str | None] = mapped_column(String(64), nullable=True) + conversation_state: Mapped[dict | None] = mapped_column(JSON, nullable=True) + created_at: Mapped[datetime] = mapped_column( + DateTime, default=_utcnow, nullable=False, index=True + ) + paper_id: Mapped[str | None] = mapped_column( String(36), ForeignKey("papers.id", ondelete="SET NULL"), diff --git a/packages/storage/repositories.py b/packages/storage/repositories.py index d0d9ba9..f03ca9f 100644 --- a/packages/storage/repositories.py +++ b/packages/storage/repositories.py @@ -1,11 +1,10 @@ """ 数据仓储层 -@author Bamzc +@author Color2333 """ from __future__ import annotations -import math from datetime import UTC, date, datetime, timedelta from uuid import UUID @@ -18,6 +17,7 @@ ActionPaper, AgentConversation, AgentMessage, + AgentPendingAction, AnalysisReport, Citation, CollectionAction, @@ -795,7 +795,8 @@ def upsert_topic( found.retry_limit = max(retry_limit, 0) found.schedule_frequency = schedule_frequency found.schedule_time_utc = max(0, min(23, schedule_time_utc)) - found.updated_at = datetime.now(UTC) + found.enable_date_filter = enable_date_filter + found.date_filter_days = max(1, date_filter_days) found.updated_at = datetime.now(UTC) self.session.flush() return found @@ -808,7 +809,7 @@ def upsert_topic( schedule_frequency=schedule_frequency, schedule_time_utc=max(0, min(23, schedule_time_utc)), enable_date_filter=enable_date_filter, - + date_filter_days=max(1, date_filter_days), ) self.session.add(topic) self.session.flush() @@ -1293,5 +1294,57 @@ def update_config(self, **kwargs) -> DailyReportConfig: for key, value in kwargs.items(): if hasattr(config, key): setattr(config, key, value) + return config + + +class AgentPendingActionRepository: + """Agent 待确认操作持久化 Repository""" + + def __init__(self, session: Session): + self.session = session + + def create( + self, + action_id: str, + tool_name: str, + tool_args: dict, + tool_call_id: str | None = None, + conversation_id: str | None = None, + conversation_state: dict | None = None, + ) -> AgentPendingAction: + """创建待确认操作""" + action = AgentPendingAction( + id=action_id, + tool_name=tool_name, + tool_args=tool_args, + tool_call_id=tool_call_id, + conversation_id=conversation_id, + conversation_state=conversation_state, + ) + self.session.add(action) self.session.flush() + return action + + def get_by_id(self, action_id: str) -> AgentPendingAction | None: + """根据 ID 获取待确认操作""" + return self.session.get(AgentPendingAction, action_id) + + def delete(self, action_id: str) -> bool: + """删除待确认操作""" + action = self.get_by_id(action_id) + if action: + self.session.delete(action) + self.session.flush() + return True + return False + + def cleanup_expired(self, ttl_seconds: int = 1800) -> int: + """清理过期的待确认操作""" + cutoff = datetime.now(UTC) - timedelta(seconds=ttl_seconds) + q = delete(AgentPendingAction).where(AgentPendingAction.created_at < cutoff) + result = self.session.execute(q) + self.session.flush() + return result.rowcount + + return config diff --git a/paper.db b/paper.db deleted file mode 100644 index e69de29..0000000 diff --git a/pyproject.toml b/pyproject.toml index 3202173..efb2eac 100644 --- a/pyproject.toml +++ b/pyproject.toml @@ -22,6 +22,7 @@ dependencies = [ dev = [ "pytest>=8.4.1", "ruff>=0.12.12", + "pre-commit>=4.0.0", ] llm = [ "openai>=1.102.0", @@ -51,6 +52,38 @@ packages = [ [tool.ruff] line-length = 100 target-version = "py311" +exclude = [ + ".git", + ".venv", + "__pycache__", + "node_modules", + "data", + "infra/migrations/versions", +] [tool.ruff.lint] -select = ["E", "F", "I", "UP"] +select = [ + "E", # pycodestyle errors + "F", # pyflakes + "I", # isort + "UP", # pyupgrade + "B", # flake8-bugbear + "C4", # flake8-comprehensions + "SIM", # flake8-simplify + "TCH", # flake8-type-checking +] +ignore = [ + "E501", # line too long (handled by formatter) + "B008", # do not perform function calls in argument defaults + "B905", # zip without strict + "SIM108", # use ternary operator +] + +[tool.ruff.lint.isort] +known-first-party = ["apps", "packages"] + +[tool.ruff.format] +quote-style = "double" +indent-style = "space" +skip-magic-trailing-comma = false +line-ending = "auto" diff --git a/infra/backup.sh b/scripts/backup.sh similarity index 98% rename from infra/backup.sh rename to scripts/backup.sh index 4621a97..68eefce 100755 --- a/infra/backup.sh +++ b/scripts/backup.sh @@ -1,6 +1,6 @@ #!/bin/bash # PaperMind 数据备份脚本 -# @author Bamzc +# @author Color2333 # # 用法: 添加到 crontab: # 0 3 * * * /opt/papermind/backup.sh >> /opt/papermind/backups/backup.log 2>&1 diff --git a/scripts/build-desktop.sh b/scripts/build-desktop.sh index 503809f..4215e24 100755 --- a/scripts/build-desktop.sh +++ b/scripts/build-desktop.sh @@ -3,7 +3,7 @@ # 1. PyInstaller 打包 Python 后端 # 2. 安装 Tauri 前端依赖 # 3. Tauri build 生成 .dmg -# @author Bamzc +# @author Color2333 set -euo pipefail diff --git a/scripts/dev_setup.py b/scripts/dev_setup.py new file mode 100755 index 0000000..bbe83c5 --- /dev/null +++ b/scripts/dev_setup.py @@ -0,0 +1,182 @@ +#!/usr/bin/env python3 +""" +PaperMind 开发环境一键初始化脚本 + +功能: +- 检查 Python 版本 +- 创建虚拟环境 +- 安装依赖 +- 复制环境变量配置 +- 初始化数据库 + +使用方法: + python scripts/dev_setup.py +""" + +import os +import sys +import subprocess +import shutil +from pathlib import Path + + +# 颜色输出 +class Colors: + RED = "\033[0;31m" + GREEN = "\033[0;32m" + YELLOW = "\033[0;33m" + BLUE = "\033[0;34m" + NC = "\033[0m" # No Color + + +def print_step(step: str): + """打印步骤信息""" + print(f"\n{Colors.BLUE}▶ {step}{Colors.NC}") + + +def print_success(msg: str): + """打印成功信息""" + print(f"{Colors.GREEN}✓ {msg}{Colors.NC}") + + +def print_error(msg: str): + """打印错误信息""" + print(f"{Colors.RED}✗ {msg}{Colors.NC}", file=sys.stderr) + + +def print_warning(msg: str): + """打印警告信息""" + print(f"{Colors.YELLOW}⚠ {msg}{Colors.NC}") + + +def run_command(cmd: list, check: bool = True) -> subprocess.CompletedProcess: + """运行命令并返回结果""" + return subprocess.run(cmd, check=check, capture_output=True, text=True) + + +def check_python_version(): + """检查 Python 版本""" + print_step("检查 Python 版本") + + version = sys.version_info + if version.major < 3 or (version.major == 3 and version.minor < 11): + print_error(f"Python 版本过低: {version.major}.{version.minor}") + print_error("需要 Python 3.11 或更高版本") + sys.exit(1) + + print_success(f"Python 版本: {version.major}.{version.minor}.{version.micro}") + + +def create_venv(project_root: Path): + """创建虚拟环境""" + venv_path = project_root / ".venv" + + print_step("创建虚拟环境") + + if venv_path.exists(): + print_warning("虚拟环境已存在,跳过创建") + return + + run_command([sys.executable, "-m", "venv", str(venv_path)]) + print_success(f"虚拟环境创建成功: {venv_path}") + + +def install_dependencies(project_root: Path): + """安装依赖""" + print_step("安装 Python 依赖") + + venv_python = project_root / ".venv" / "bin" / "python" + if not venv_python.exists(): + venv_python = project_root / ".venv" / "Scripts" / "python.exe" # Windows + + # 升级 pip + run_command([str(venv_python), "-m", "pip", "install", "--upgrade", "pip"]) + + # 安装项目依赖 + run_command([str(venv_python), "-m", "pip", "install", "-e", ".[llm,pdf]"]) + + print_success("依赖安装完成") + + +def setup_env_file(project_root: Path): + """设置环境变量文件""" + print_step("配置环境变量") + + env_example = project_root / ".env.example" + env_file = project_root / ".env" + + if env_file.exists(): + print_warning(".env 文件已存在,跳过复制") + return + + if not env_example.exists(): + print_error(".env.example 文件不存在") + sys.exit(1) + + shutil.copy(env_example, env_file) + print_success(".env 文件已创建") + print_warning("请编辑 .env 文件,填写必要的配置项(如 LLM API Key)") + + +def init_database(project_root: Path): + """初始化数据库""" + print_step("初始化数据库") + + venv_python = project_root / ".venv" / "bin" / "python" + if not venv_python.exists(): + venv_python = project_root / ".venv" / "Scripts" / "python.exe" # Windows + + bootstrap_script = project_root / "scripts" / "local_bootstrap.py" + + if not bootstrap_script.exists(): + print_warning("local_bootstrap.py 不存在,跳过数据库初始化") + return + + result = run_command([str(venv_python), str(bootstrap_script)], check=False) + + if result.returncode == 0: + print_success("数据库初始化完成") + else: + print_warning("数据库初始化失败,可能已存在") + + +def main(): + """主函数""" + print(f""" +{Colors.BLUE}╔══════════════════════════════════════════╗ +║ PaperMind 开发环境初始化 ║ +╚══════════════════════════════════════════╝{Colors.NC} +""") + + # 获取项目根目录 + project_root = Path(__file__).parent.parent.absolute() + os.chdir(project_root) + + # 执行初始化步骤 + check_python_version() + create_venv(project_root) + install_dependencies(project_root) + setup_env_file(project_root) + init_database(project_root) + + print(f""" +{Colors.GREEN}╔══════════════════════════════════════════╗ +║ 初始化完成! ║ +╚══════════════════════════════════════════╝{Colors.NC} + +下一步: + 1. 编辑 .env 文件,填写 LLM API Key + 2. 激活虚拟环境: + - macOS/Linux: source .venv/bin/activate + - Windows: .venv\\Scripts\\activate + 3. 启动后端: + uvicorn apps.api.main:app --reload --port 8000 + 4. 启动前端: + cd frontend && npm install && npm run dev + +API 文档: http://localhost:8000/docs +""") + + +if __name__ == "__main__": + main() diff --git a/scripts/e2e-full.mjs b/scripts/e2e-full.mjs index c5fedc1..19fe5d0 100644 --- a/scripts/e2e-full.mjs +++ b/scripts/e2e-full.mjs @@ -1,7 +1,7 @@ /** * PaperMind 全流程 E2E 深度测试 * 模拟真实用户操作:导航、输入、点击、滚动、验证 - * @author Bamzc + * @author Color2333 */ import { chromium } from "playwright"; import { mkdirSync } from "fs"; diff --git a/scripts/e2e-test.mjs b/scripts/e2e-test.mjs index 5e3780b..5b1ff64 100644 --- a/scripts/e2e-test.mjs +++ b/scripts/e2e-test.mjs @@ -1,7 +1,7 @@ /** * PaperMind 深度用户体验自动化测试 * 模拟真人操作:导航、点击、输入、滚动、截图 - * @author Bamzc + * @author Color2333 */ import { chromium } from "playwright"; import { mkdirSync } from "fs"; diff --git a/papermind-server.spec b/scripts/papermind-server.spec similarity index 100% rename from papermind-server.spec rename to scripts/papermind-server.spec diff --git a/scripts/test-phase5-e2e.ts b/scripts/test-phase5-e2e.ts index c67b438..62ac3cd 100644 --- a/scripts/test-phase5-e2e.ts +++ b/scripts/test-phase5-e2e.ts @@ -2,7 +2,7 @@ * Phase 5 功能 E2E 测试脚本 * 使用: npx playwright test scripts/test-phase5-e2e.ts * 或: npx ts-node scripts/test-phase5-e2e.ts (需安装 playwright) - * @author Bamzc + * @author Color2333 */ import { chromium } from "playwright";