diff --git a/src/main/java/com/involutionhell/backend/community/service/ClassificationResult.java b/src/main/java/com/involutionhell/backend/community/service/ClassificationResult.java index d1cbfb8..f5fb51d 100644 --- a/src/main/java/com/involutionhell/backend/community/service/ClassificationResult.java +++ b/src/main/java/com/involutionhell/backend/community/service/ClassificationResult.java @@ -4,11 +4,13 @@ * DeepSeek 分类结果(M3)。 * * category 已由 LinkCategory.normalize() 保证合法; - * flags 对应 DeepSeek 返回的安全判定: + * flags 对应 DeepSeek 返回的安全/质量判定: * - nsfw:色情/暴力等不适宜内容 * - ad:纯商业推广软文(技术公告/版本更新等不算) * - flame:引战/情绪化内容 * - illegal:疑似违反中国法律法规(反动/颠覆/分裂/邪教/赌博/毒品等) + * - notResource:链接本身不是"可分享的内容资源"(表情包/贴纸/GIF/裸图片/ + * 登录墙/错误页/dev PR 通知页等),客户端 listener 拦不住的兜底 * * 任一 flag 为 true → worker 将 status 推到 FLAGGED(进人工复核)。 */ @@ -17,17 +19,18 @@ public record ClassificationResult( boolean nsfw, boolean ad, boolean flame, - boolean illegal + boolean illegal, + boolean notResource ) { - /** 是否命中任意安全 flag。 */ + /** 是否命中任意安全/质量 flag。 */ public boolean anyFlagSet() { - return nsfw || ad || flame || illegal; + return nsfw || ad || flame || illegal || notResource; } /** 降级结果:分类为 other,flags 全 false(网络/解析等**非内容过滤**原因的失败用)。 */ public static ClassificationResult fallback() { - return new ClassificationResult("other", false, false, false, false); + return new ClassificationResult("other", false, false, false, false, false); } /** @@ -36,6 +39,6 @@ public static ClassificationResult fallback() { * 本系统将 illegal 置为 true 让其走 FLAGGED 进人工复核,而不是 fallback 静默放行。 */ public static ClassificationResult blockedByContentFilter() { - return new ClassificationResult("other", false, false, false, true); + return new ClassificationResult("other", false, false, false, true, false); } } diff --git a/src/main/java/com/involutionhell/backend/community/service/ClassificationService.java b/src/main/java/com/involutionhell/backend/community/service/ClassificationService.java index 306eda0..5164982 100644 --- a/src/main/java/com/involutionhell/backend/community/service/ClassificationService.java +++ b/src/main/java/com/involutionhell/backend/community/service/ClassificationService.java @@ -53,7 +53,7 @@ public class ClassificationService { 中国大陆现行法律法规。根据输入信息,把链接分到以下分类之一: %s - 同时判断内容是否存在 4 类安全问题。对 nsfw/ad/flame 采用"宁松勿严" + 同时判断内容是否存在 5 类问题。对 nsfw/ad/flame/notResource 采用"宁松勿严" 策略(社群正常技术分享放行);对 illegal 必须严格,宁可误报。 - nsfw: 色情、裸露、血腥暴力、猎奇不适。仅当**明确**涉及时为 true。 @@ -67,6 +67,19 @@ public class ClassificationService { 新闻报道、个人作品集。 - flame: 明显引战 / 人身攻击 / 极端言论 / 刻意煽动对立。技术路线之争、 理性观点分歧**不算**。 + - notResource: 链接本身不是"可分享的内容资源"(不是色情/广告/引战, + 只是没有信息价值,不该上架到社群分享库)。任一命中即 true: + · 表情包 / 贴纸 / GIF(tenor / klipy / giphy / 微博表情等) + · 单张图片 / 截图 / 头像(孤立的纯图片页面,非文章配图) + · 视频/音频/媒体文件直链(路径以 .mp4 .mp3 .gif 等结尾) + · 登录墙 / 错误页 / 验证码 / 404 / 维护页(OG 抓不到正文) + · 内部 dev 通知页(GitHub PR/Issue/Commit、Jira 工单、CI 报告) + · 空白页 / 广告聚合页 / 跳转中转页 + **反例(全部 false)**:技术博客文章、论文、开源项目主页(README)、 + 新闻报道、文档教程、知乎/小红书/微博正常帖子、视频教程页 + (含正文/字幕的播放页,不是裸 .mp4 文件)。 + 注意:仓库主页(如 github.com/foo/bar)允许,dev 子路径 + (github.com/foo/bar/pull/123)才命中本规则。 - illegal: 疑似违反中国大陆法律法规的内容。任一命中即 true: · 反对宪法基本原则、颠覆国家政权、煽动分裂国家、破坏国家统一 · 攻击党和政府、宣扬港独 / 台独 / 藏独 / 疆独 @@ -80,7 +93,7 @@ public class ClassificationService { 技术讨论涉及敏感话题但论点中立且学术讨论 **不算** illegal。 严格只返回 JSON,不要任何解释、代码块标记(不要 ```json)或其他文字: - {"category": "", "nsfw": false, "ad": false, "flame": false, "illegal": false} + {"category": "", "nsfw": false, "ad": false, "flame": false, "illegal": false, "notResource": false} """; private final HttpClient httpClient; @@ -249,9 +262,10 @@ ClassificationResult parseResponse(String responseBody, String host) { boolean nsfw = result.path("nsfw").asBoolean(false); boolean ad = result.path("ad").asBoolean(false); boolean flame = result.path("flame").asBoolean(false); - // 旧模型可能不返回 illegal 字段,缺失时按 false 降级(不阻拦), - // 命中 nsfw/ad/flame 任一时已经会走 FLAGGED - boolean illegal = result.path("illegal").asBoolean(false); + // 旧模型可能不返回 illegal / notResource 字段,缺失时按 false 降级(不阻拦), + // 反正命中其它 flag 任一时已经会走 FLAGGED + boolean illegal = result.path("illegal").asBoolean(false); + boolean notResource = result.path("notResource").asBoolean(false); // normalize 兜底:非法 slug 转 other String category = LinkCategory.normalize(rawCategory); @@ -259,9 +273,9 @@ ClassificationResult parseResponse(String responseBody, String host) { log.warn("classification 返回非法分类,降级为 other: host={} raw={}", host, rawCategory); } - log.debug("classification 完成: host={} category={} nsfw={} ad={} flame={} illegal={}", - host, category, nsfw, ad, flame, illegal); - return new ClassificationResult(category, nsfw, ad, flame, illegal); + log.debug("classification 完成: host={} category={} nsfw={} ad={} flame={} illegal={} notResource={}", + host, category, nsfw, ad, flame, illegal, notResource); + return new ClassificationResult(category, nsfw, ad, flame, illegal, notResource); } catch (Exception e) { log.warn("classification 响应解析失败,降级: host={} error={}", host, e.getMessage()); diff --git a/src/main/java/com/involutionhell/backend/community/service/SharedLinkEnrichmentWorker.java b/src/main/java/com/involutionhell/backend/community/service/SharedLinkEnrichmentWorker.java index 223dee5..81b61e9 100644 --- a/src/main/java/com/involutionhell/backend/community/service/SharedLinkEnrichmentWorker.java +++ b/src/main/java/com/involutionhell/backend/community/service/SharedLinkEnrichmentWorker.java @@ -110,8 +110,8 @@ private void doEnrich(Long linkId) { if (cls.anyFlagSet()) { // 任一安全 flag 命中 → FLAGGED,进人工待审 finalStatus = SharedLinkStatus.FLAGGED; - log.info("enrichment 标记 FLAGGED: linkId={} nsfw={} ad={} flame={} illegal={}", - linkId, cls.nsfw(), cls.ad(), cls.flame(), cls.illegal()); + log.info("enrichment 标记 FLAGGED: linkId={} nsfw={} ad={} flame={} illegal={} notResource={}", + linkId, cls.nsfw(), cls.ad(), cls.flame(), cls.illegal(), cls.notResource()); } else { finalStatus = SharedLinkStatus.APPROVED; log.info("enrichment AI 放行 APPROVED: linkId={} host={}", linkId, host); @@ -119,10 +119,11 @@ private void doEnrich(Long linkId) { // ── 步骤 4:回填数据库 ─────────────────────────────────────────── Map flags = Map.of( - "nsfw", cls.nsfw(), - "ad", cls.ad(), - "flame", cls.flame(), - "illegal", cls.illegal() + "nsfw", cls.nsfw(), + "ad", cls.ad(), + "flame", cls.flame(), + "illegal", cls.illegal(), + "notResource", cls.notResource() ); sharedLinkService.enrich( @@ -157,7 +158,7 @@ private void tryFallbackStatus(Long linkId) { null, null, null, null, "enrichment worker 未捕获异常,降级", "other", - Map.of("nsfw", false, "ad", false, "flame", false), + Map.of("nsfw", false, "ad", false, "flame", false, "illegal", false, "notResource", false), SharedLinkStatus.PENDING_MANUAL ); log.info("enrichment 降级完成: linkId={} -> PENDING_MANUAL", linkId); diff --git a/src/test/java/com/involutionhell/backend/community/service/SharedLinkEnrichmentWorkerTests.java b/src/test/java/com/involutionhell/backend/community/service/SharedLinkEnrichmentWorkerTests.java index 92d28f4..3c972e7 100644 --- a/src/test/java/com/involutionhell/backend/community/service/SharedLinkEnrichmentWorkerTests.java +++ b/src/test/java/com/involutionhell/backend/community/service/SharedLinkEnrichmentWorkerTests.java @@ -74,7 +74,7 @@ void enrich_nonFlagged_doesNotFireWebhook() { when(ogFetchService.fetch(anyString())).thenReturn( new OgFetchResult("标题", null, null, null, null)); when(classificationService.classify(any(), any(), any())).thenReturn( - new ClassificationResult("other", false, false, false, false)); + new ClassificationResult("other", false, false, false, false, false)); worker.enrich(100L); @@ -91,7 +91,7 @@ void enrich_whitelistDomain_noFlags_statusBecomesApproved() { when(ogFetchService.fetch(anyString())).thenReturn( new OgFetchResult("标题", "描述", "https://cover.jpg", "某公众号", null)); when(classificationService.classify(anyString(), anyString(), anyString())).thenReturn( - new ClassificationResult("engineering", false, false, false, false)); + new ClassificationResult("engineering", false, false, false, false, false)); worker.enrich(1L); @@ -115,7 +115,7 @@ void enrich_nonWhitelistDomain_noFlags_statusBecomesApproved_afterSimplification when(ogFetchService.fetch(anyString())).thenReturn( new OgFetchResult("非白名单文章", null, null, null, null)); when(classificationService.classify(any(), any(), any())).thenReturn( - new ClassificationResult("other", false, false, false, false)); + new ClassificationResult("other", false, false, false, false, false)); worker.enrich(2L); @@ -136,7 +136,7 @@ void enrich_flaggedByAd_statusBecomesFlagged_regardlessOfWhitelist() { when(ogFetchService.fetch(anyString())).thenReturn( new OgFetchResult("限时特卖!", "买一送一", null, null, null)); when(classificationService.classify(any(), any(), any())).thenReturn( - new ClassificationResult("other", false, true, false, false)); // ad=true + new ClassificationResult("other", false, true, false, false, false)); // ad=true worker.enrich(3L); @@ -156,7 +156,7 @@ void enrich_nsfwFlag_statusBecomesFlagged() { when(ogFetchService.fetch(anyString())).thenReturn( new OgFetchResult("问题标题", null, null, null, null)); when(classificationService.classify(any(), any(), any())).thenReturn( - new ClassificationResult("lifestyle", true, false, false, false)); // nsfw=true + new ClassificationResult("lifestyle", true, false, false, false, false)); // nsfw=true worker.enrich(4L); @@ -178,7 +178,7 @@ void enrich_ogFetchFails_stillCompletesEnrichment() { when(ogFetchService.fetch(anyString())).thenReturn( OgFetchResult.failure("HTTP 403")); when(classificationService.classify(isNull(), isNull(), eq(host))).thenReturn( - new ClassificationResult("other", false, false, false, false)); + new ClassificationResult("other", false, false, false, false, false)); worker.enrich(5L); @@ -236,7 +236,7 @@ void enrich_flameFlag_flagsMapContainsCorrectValues() { when(ogFetchService.fetch(anyString())).thenReturn( new OgFetchResult("引战标题", null, null, null, null)); when(classificationService.classify(any(), any(), any())).thenReturn( - new ClassificationResult("industry", false, false, true, false)); // flame=true + new ClassificationResult("industry", false, false, true, false, false)); // flame=true worker.enrich(7L); @@ -251,4 +251,32 @@ void enrich_flameFlag_flagsMapContainsCorrectValues() { assertThat(flags.get("ad")).isFalse(); assertThat(flags.get("flame")).isTrue(); } + + // ── 场景 8:notResource=true → FLAGGED(兜底拦表情包/裸图片/dev URL) ──── + + @Test + void enrich_notResourceFlag_routesToFlagged() { + String host = "klipy.com"; + SharedLink link = stubLink(8L, "https://klipy.com/gifs/hello-1234", host); + when(sharedLinkService.findById(8L)).thenReturn(Optional.of(link)); + when(ogFetchService.fetch(anyString())).thenReturn( + new OgFetchResult(null, null, null, null, null)); + when(classificationService.classify(any(), any(), any())).thenReturn( + new ClassificationResult("other", false, false, false, false, true)); // notResource=true + + worker.enrich(8L); + + @SuppressWarnings("unchecked") + ArgumentCaptor> flagsCaptor = ArgumentCaptor.forClass(Map.class); + verify(sharedLinkService).enrich(eq(8L), + any(), any(), any(), any(), any(), + any(), flagsCaptor.capture(), eq(SharedLinkStatus.FLAGGED)); + + Map flags = flagsCaptor.getValue(); + assertThat(flags.get("nsfw")).isFalse(); + assertThat(flags.get("ad")).isFalse(); + assertThat(flags.get("flame")).isFalse(); + assertThat(flags.get("illegal")).isFalse(); + assertThat(flags.get("notResource")).isTrue(); + } }