Skip to content
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
Original file line number Diff line number Diff line change
Expand Up @@ -4,11 +4,13 @@
* DeepSeek 分类结果(M3)。
*
* category 已由 LinkCategory.normalize() 保证合法;
* flags 对应 DeepSeek 返回的安全判定
* flags 对应 DeepSeek 返回的安全/质量判定
* - nsfw:色情/暴力等不适宜内容
* - ad:纯商业推广软文(技术公告/版本更新等不算)
* - flame:引战/情绪化内容
* - illegal:疑似违反中国法律法规(反动/颠覆/分裂/邪教/赌博/毒品等)
* - notResource:链接本身不是"可分享的内容资源"(表情包/贴纸/GIF/裸图片/
* 登录墙/错误页/dev PR 通知页等),客户端 listener 拦不住的兜底
*
* 任一 flag 为 true → worker 将 status 推到 FLAGGED(进人工复核)。
*/
Expand All @@ -17,17 +19,18 @@ public record ClassificationResult(
boolean nsfw,
boolean ad,
boolean flame,
boolean illegal
boolean illegal,
boolean notResource
) {

/** 是否命中任意安全 flag。 */
/** 是否命中任意安全/质量 flag。 */
public boolean anyFlagSet() {
return nsfw || ad || flame || illegal;
return nsfw || ad || flame || illegal || notResource;
}

/** 降级结果:分类为 other,flags 全 false(网络/解析等**非内容过滤**原因的失败用)。 */
public static ClassificationResult fallback() {
return new ClassificationResult("other", false, false, false, false);
return new ClassificationResult("other", false, false, false, false, false);
}

/**
Expand All @@ -36,6 +39,6 @@ public static ClassificationResult fallback() {
* 本系统将 illegal 置为 true 让其走 FLAGGED 进人工复核,而不是 fallback 静默放行。
*/
public static ClassificationResult blockedByContentFilter() {
return new ClassificationResult("other", false, false, false, true);
return new ClassificationResult("other", false, false, false, true, false);
}
}
Original file line number Diff line number Diff line change
Expand Up @@ -53,7 +53,7 @@ public class ClassificationService {
中国大陆现行法律法规。根据输入信息,把链接分到以下分类之一:
%s

同时判断内容是否存在 4 类安全问题。对 nsfw/ad/flame 采用"宁松勿严"
同时判断内容是否存在 5 类问题。对 nsfw/ad/flame/notResource 采用"宁松勿严"
策略(社群正常技术分享放行);对 illegal 必须严格,宁可误报。

- nsfw: 色情、裸露、血腥暴力、猎奇不适。仅当**明确**涉及时为 true。
Expand All @@ -67,6 +67,19 @@ public class ClassificationService {
新闻报道、个人作品集。
- flame: 明显引战 / 人身攻击 / 极端言论 / 刻意煽动对立。技术路线之争、
理性观点分歧**不算**。
- notResource: 链接本身不是"可分享的内容资源"(不是色情/广告/引战,
只是没有信息价值,不该上架到社群分享库)。任一命中即 true:
· 表情包 / 贴纸 / GIF(tenor / klipy / giphy / 微博表情等)
· 单张图片 / 截图 / 头像(孤立的纯图片页面,非文章配图)
· 视频/音频/媒体文件直链(路径以 .mp4 .mp3 .gif 等结尾)
· 登录墙 / 错误页 / 验证码 / 404 / 维护页(OG 抓不到正文)
· 内部 dev 通知页(GitHub PR/Issue/Commit、Jira 工单、CI 报告)
· 空白页 / 广告聚合页 / 跳转中转页
**反例(全部 false)**:技术博客文章、论文、开源项目主页(README)、
新闻报道、文档教程、知乎/小红书/微博正常帖子、视频教程页
(含正文/字幕的播放页,不是裸 .mp4 文件)。
注意:仓库主页(如 github.com/foo/bar)允许,dev 子路径
(github.com/foo/bar/pull/123)才命中本规则。
- illegal: 疑似违反中国大陆法律法规的内容。任一命中即 true:
· 反对宪法基本原则、颠覆国家政权、煽动分裂国家、破坏国家统一
· 攻击党和政府、宣扬港独 / 台独 / 藏独 / 疆独
Expand All @@ -80,7 +93,7 @@ public class ClassificationService {
技术讨论涉及敏感话题但论点中立且学术讨论 **不算** illegal。

严格只返回 JSON,不要任何解释、代码块标记(不要 ```json)或其他文字:
{"category": "<slug>", "nsfw": false, "ad": false, "flame": false, "illegal": false}
{"category": "<slug>", "nsfw": false, "ad": false, "flame": false, "illegal": false, "notResource": false}
""";

private final HttpClient httpClient;
Expand Down Expand Up @@ -249,19 +262,20 @@ ClassificationResult parseResponse(String responseBody, String host) {
boolean nsfw = result.path("nsfw").asBoolean(false);
boolean ad = result.path("ad").asBoolean(false);
boolean flame = result.path("flame").asBoolean(false);
// 旧模型可能不返回 illegal 字段,缺失时按 false 降级(不阻拦),
// 命中 nsfw/ad/flame 任一时已经会走 FLAGGED
boolean illegal = result.path("illegal").asBoolean(false);
// 旧模型可能不返回 illegal / notResource 字段,缺失时按 false 降级(不阻拦),
// 反正命中其它 flag 任一时已经会走 FLAGGED
boolean illegal = result.path("illegal").asBoolean(false);
boolean notResource = result.path("notResource").asBoolean(false);

// normalize 兜底:非法 slug 转 other
String category = LinkCategory.normalize(rawCategory);
if (!category.equals(rawCategory)) {
log.warn("classification 返回非法分类,降级为 other: host={} raw={}", host, rawCategory);
}

log.debug("classification 完成: host={} category={} nsfw={} ad={} flame={} illegal={}",
host, category, nsfw, ad, flame, illegal);
return new ClassificationResult(category, nsfw, ad, flame, illegal);
log.debug("classification 完成: host={} category={} nsfw={} ad={} flame={} illegal={} notResource={}",
host, category, nsfw, ad, flame, illegal, notResource);
return new ClassificationResult(category, nsfw, ad, flame, illegal, notResource);

} catch (Exception e) {
log.warn("classification 响应解析失败,降级: host={} error={}", host, e.getMessage());
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -110,19 +110,20 @@ private void doEnrich(Long linkId) {
if (cls.anyFlagSet()) {
// 任一安全 flag 命中 → FLAGGED,进人工待审
finalStatus = SharedLinkStatus.FLAGGED;
log.info("enrichment 标记 FLAGGED: linkId={} nsfw={} ad={} flame={} illegal={}",
linkId, cls.nsfw(), cls.ad(), cls.flame(), cls.illegal());
log.info("enrichment 标记 FLAGGED: linkId={} nsfw={} ad={} flame={} illegal={} notResource={}",
linkId, cls.nsfw(), cls.ad(), cls.flame(), cls.illegal(), cls.notResource());
} else {
finalStatus = SharedLinkStatus.APPROVED;
log.info("enrichment AI 放行 APPROVED: linkId={} host={}", linkId, host);
}

// ── 步骤 4:回填数据库 ───────────────────────────────────────────
Map<String, Boolean> flags = Map.of(
"nsfw", cls.nsfw(),
"ad", cls.ad(),
"flame", cls.flame(),
"illegal", cls.illegal()
"nsfw", cls.nsfw(),
"ad", cls.ad(),
"flame", cls.flame(),
"illegal", cls.illegal(),
"notResource", cls.notResource()
);

sharedLinkService.enrich(
Expand Down Expand Up @@ -157,7 +158,7 @@ private void tryFallbackStatus(Long linkId) {
null, null, null, null,
"enrichment worker 未捕获异常,降级",
"other",
Map.of("nsfw", false, "ad", false, "flame", false),
Map.of("nsfw", false, "ad", false, "flame", false, "illegal", false, "notResource", false),
SharedLinkStatus.PENDING_MANUAL
);
log.info("enrichment 降级完成: linkId={} -> PENDING_MANUAL", linkId);
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -74,7 +74,7 @@ void enrich_nonFlagged_doesNotFireWebhook() {
when(ogFetchService.fetch(anyString())).thenReturn(
new OgFetchResult("标题", null, null, null, null));
when(classificationService.classify(any(), any(), any())).thenReturn(
new ClassificationResult("other", false, false, false, false));
new ClassificationResult("other", false, false, false, false, false));

worker.enrich(100L);

Expand All @@ -91,7 +91,7 @@ void enrich_whitelistDomain_noFlags_statusBecomesApproved() {
when(ogFetchService.fetch(anyString())).thenReturn(
new OgFetchResult("标题", "描述", "https://cover.jpg", "某公众号", null));
when(classificationService.classify(anyString(), anyString(), anyString())).thenReturn(
new ClassificationResult("engineering", false, false, false, false));
new ClassificationResult("engineering", false, false, false, false, false));

worker.enrich(1L);

Expand All @@ -115,7 +115,7 @@ void enrich_nonWhitelistDomain_noFlags_statusBecomesApproved_afterSimplification
when(ogFetchService.fetch(anyString())).thenReturn(
new OgFetchResult("非白名单文章", null, null, null, null));
when(classificationService.classify(any(), any(), any())).thenReturn(
new ClassificationResult("other", false, false, false, false));
new ClassificationResult("other", false, false, false, false, false));

worker.enrich(2L);

Expand All @@ -136,7 +136,7 @@ void enrich_flaggedByAd_statusBecomesFlagged_regardlessOfWhitelist() {
when(ogFetchService.fetch(anyString())).thenReturn(
new OgFetchResult("限时特卖!", "买一送一", null, null, null));
when(classificationService.classify(any(), any(), any())).thenReturn(
new ClassificationResult("other", false, true, false, false)); // ad=true
new ClassificationResult("other", false, true, false, false, false)); // ad=true

worker.enrich(3L);

Expand All @@ -156,7 +156,7 @@ void enrich_nsfwFlag_statusBecomesFlagged() {
when(ogFetchService.fetch(anyString())).thenReturn(
new OgFetchResult("问题标题", null, null, null, null));
when(classificationService.classify(any(), any(), any())).thenReturn(
new ClassificationResult("lifestyle", true, false, false, false)); // nsfw=true
new ClassificationResult("lifestyle", true, false, false, false, false)); // nsfw=true

worker.enrich(4L);

Expand All @@ -178,7 +178,7 @@ void enrich_ogFetchFails_stillCompletesEnrichment() {
when(ogFetchService.fetch(anyString())).thenReturn(
OgFetchResult.failure("HTTP 403"));
when(classificationService.classify(isNull(), isNull(), eq(host))).thenReturn(
new ClassificationResult("other", false, false, false, false));
new ClassificationResult("other", false, false, false, false, false));

worker.enrich(5L);

Expand Down Expand Up @@ -236,7 +236,7 @@ void enrich_flameFlag_flagsMapContainsCorrectValues() {
when(ogFetchService.fetch(anyString())).thenReturn(
new OgFetchResult("引战标题", null, null, null, null));
when(classificationService.classify(any(), any(), any())).thenReturn(
new ClassificationResult("industry", false, false, true, false)); // flame=true
new ClassificationResult("industry", false, false, true, false, false)); // flame=true

worker.enrich(7L);

Expand All @@ -251,4 +251,32 @@ void enrich_flameFlag_flagsMapContainsCorrectValues() {
assertThat(flags.get("ad")).isFalse();
assertThat(flags.get("flame")).isTrue();
}

// ── 场景 8:notResource=true → FLAGGED(兜底拦表情包/裸图片/dev URL) ────

@Test
void enrich_notResourceFlag_routesToFlagged() {
String host = "klipy.com";
SharedLink link = stubLink(8L, "https://klipy.com/gifs/hello-1234", host);
when(sharedLinkService.findById(8L)).thenReturn(Optional.of(link));
when(ogFetchService.fetch(anyString())).thenReturn(
new OgFetchResult(null, null, null, null, null));
when(classificationService.classify(any(), any(), any())).thenReturn(
new ClassificationResult("other", false, false, false, false, true)); // notResource=true

worker.enrich(8L);

@SuppressWarnings("unchecked")
ArgumentCaptor<Map<String, Boolean>> flagsCaptor = ArgumentCaptor.forClass(Map.class);
verify(sharedLinkService).enrich(eq(8L),
any(), any(), any(), any(), any(),
any(), flagsCaptor.capture(), eq(SharedLinkStatus.FLAGGED));

Map<String, Boolean> flags = flagsCaptor.getValue();
assertThat(flags.get("nsfw")).isFalse();
assertThat(flags.get("ad")).isFalse();
assertThat(flags.get("flame")).isFalse();
assertThat(flags.get("illegal")).isFalse();
assertThat(flags.get("notResource")).isTrue();
}
}