Skip to content

Commit b5f72ec

Browse files
committed
feat(media): 并发刮削(默认 5)+ 单条落库
增强 TMDB 搜索:模糊匹配 + 多候选关键词 管理页面新增导入/导出媒体库(全部 / 按扫描路径) 缩略图路径默认值改为 imgs
1 parent 5a27d5f commit b5f72ec

6 files changed

Lines changed: 595 additions & 87 deletions

File tree

internal/bootstrap/data/setting.go

Lines changed: 2 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -250,7 +250,8 @@ func InitialSettings() []model.SettingItem {
250250
{Key: conf.MediaDiscogsAPIURL, Value: "api.discogs.com", Type: conf.TypeString, Group: model.MEDIA, Flag: model.PRIVATE},
251251
{Key: conf.MediaStoreThumbnail, Value: "false", Type: conf.TypeBool, Group: model.MEDIA, Flag: model.PRIVATE},
252252
{Key: conf.MediaThumbnailMode, Value: "base64", Type: conf.TypeSelect, Options: "base64,local", Group: model.MEDIA, Flag: model.PRIVATE},
253-
{Key: conf.MediaThumbnailPath, Value: "/.thumbnail", Type: conf.TypeString, Group: model.MEDIA, Flag: model.PRIVATE},
253+
{Key: conf.MediaThumbnailPath, Value: "/imgs", Type: conf.TypeString, Group: model.MEDIA, Flag: model.PRIVATE},
254+
{Key: conf.MediaScrapeConcurrency, Value: "5", Type: conf.TypeNumber, Group: model.MEDIA, Flag: model.PRIVATE},
254255
}
255256
additionalSettingItems := tool.Tools.Items()
256257
// 固定顺序

internal/conf/const.go

Lines changed: 1 addition & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -170,6 +170,7 @@ const (
170170
MediaThumbnailMode = "media_thumbnail_mode"
171171
MediaThumbnailPath = "media_thumbnail_path"
172172
MediaStoreThumbnail = "media_store_thumbnail"
173+
MediaScrapeConcurrency = "media_scrape_concurrency"
173174
)
174175

175176
const (

internal/db/media.go

Lines changed: 121 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -3,6 +3,7 @@ package db
33
import (
44
"encoding/json"
55
"strings"
6+
"time"
67

78
"github.com/OpenListTeam/OpenList/v4/internal/model"
89
"gorm.io/gorm"
@@ -387,3 +388,123 @@ func GetUnscrappedItems(mediaType model.MediaType, limit int) ([]model.MediaItem
387388
Find(&items).Error
388389
return items, err
389390
}
391+
392+
// ==================== 导入/导出 ====================
393+
394+
// ListAllMediaItems 列出指定媒体类型下的所有条目(不分页,用于导出)
395+
// mediaType 为空时返回全部类型的所有条目
396+
func ListAllMediaItems(mediaType model.MediaType) ([]model.MediaItem, error) {
397+
var items []model.MediaItem
398+
tx := db.Model(&model.MediaItem{})
399+
if mediaType != "" {
400+
tx = tx.Where("media_type = ?", mediaType)
401+
}
402+
err := tx.Order("id asc").Find(&items).Error
403+
return items, err
404+
}
405+
406+
// ListMediaItemsByScanPath 列出指定扫描路径下的所有条目(不分页,用于导出)
407+
func ListMediaItemsByScanPath(scanPathID uint) ([]model.MediaItem, error) {
408+
var items []model.MediaItem
409+
err := db.Where("scan_path_id = ?", scanPathID).Order("id asc").Find(&items).Error
410+
return items, err
411+
}
412+
413+
// ListAllMediaScanPaths 列出所有扫描路径(不区分类型,用于全量导出)
414+
func ListAllMediaScanPaths() ([]model.MediaScanPath, error) {
415+
var paths []model.MediaScanPath
416+
err := db.Order("id asc").Find(&paths).Error
417+
return paths, err
418+
}
419+
420+
// ImportMediaItems 批量导入媒体条目
421+
// 策略:按 (folder_path, file_name, album_name) 唯一键 upsert:
422+
// - 已存在则覆盖刮削字段(导入是用户主动行为,覆盖优先于保留)
423+
// - 不存在则新建
424+
//
425+
// 注意:导入数据中的 ID/CreatedAt/UpdatedAt 会被忽略,由数据库重新分配,
426+
// 避免和现有记录的主键冲突。
427+
func ImportMediaItems(items []model.MediaItem, scanPathIDOverride *uint) (created, updated int, err error) {
428+
for i := range items {
429+
it := items[i]
430+
// 清空主键和时间戳,让数据库重新生成
431+
it.ID = 0
432+
it.CreatedAt = time.Time{}
433+
it.UpdatedAt = time.Time{}
434+
it.DeletedAt = gorm.DeletedAt{}
435+
// 如果指定了覆盖的 scan_path_id(按扫描路径导入场景),则强制覆盖
436+
if scanPathIDOverride != nil {
437+
it.ScanPathID = *scanPathIDOverride
438+
}
439+
440+
var existing model.MediaItem
441+
result := db.Unscoped().Where(
442+
"folder_path = ? AND file_name = ? AND album_name = ?",
443+
it.FolderPath, it.FileName, it.AlbumName,
444+
).First(&existing)
445+
if result.Error == gorm.ErrRecordNotFound {
446+
if e := db.Create(&it).Error; e != nil {
447+
return created, updated, e
448+
}
449+
created++
450+
continue
451+
}
452+
if result.Error != nil {
453+
return created, updated, result.Error
454+
}
455+
// 覆盖更新
456+
it.ID = existing.ID
457+
it.CreatedAt = existing.CreatedAt
458+
it.DeletedAt = gorm.DeletedAt{}
459+
if e := db.Unscoped().Save(&it).Error; e != nil {
460+
return created, updated, e
461+
}
462+
updated++
463+
}
464+
return created, updated, nil
465+
}
466+
467+
// ImportMediaScanPaths 批量导入扫描路径
468+
// 策略:按 (media_type, path) 唯一对去重:
469+
// - 已存在则更新名称/标签等可编辑字段
470+
// - 不存在则新建并返回新ID
471+
//
472+
// 返回值 idMap:导入数据中的原始ID -> 数据库实际ID 的映射,
473+
// 供随后的 MediaItem 导入用于把 scan_path_id 重新指向新ID。
474+
func ImportMediaScanPaths(paths []model.MediaScanPath) (idMap map[uint]uint, created, updated int, err error) {
475+
idMap = make(map[uint]uint, len(paths))
476+
for i := range paths {
477+
p := paths[i]
478+
originalID := p.ID
479+
p.ID = 0
480+
p.CreatedAt = time.Time{}
481+
p.UpdatedAt = time.Time{}
482+
p.DeletedAt = gorm.DeletedAt{}
483+
484+
var existing model.MediaScanPath
485+
result := db.Where("media_type = ? AND path = ?", p.MediaType, p.Path).First(&existing)
486+
if result.Error == gorm.ErrRecordNotFound {
487+
if e := db.Create(&p).Error; e != nil {
488+
return idMap, created, updated, e
489+
}
490+
idMap[originalID] = p.ID
491+
created++
492+
continue
493+
}
494+
if result.Error != nil {
495+
return idMap, created, updated, result.Error
496+
}
497+
// 已存在:更新可编辑字段
498+
existing.Name = p.Name
499+
existing.PathMerge = p.PathMerge
500+
existing.TypeTag = p.TypeTag
501+
existing.ContentTags = p.ContentTags
502+
existing.EnableScrape = p.EnableScrape
503+
if e := db.Save(&existing).Error; e != nil {
504+
return idMap, created, updated, e
505+
}
506+
idMap[originalID] = existing.ID
507+
updated++
508+
}
509+
return idMap, created, updated, nil
510+
}

internal/media/scraper/tmdb.go

Lines changed: 165 additions & 25 deletions
Original file line numberDiff line numberDiff line change
@@ -9,6 +9,7 @@ import (
99
"regexp"
1010
"strings"
1111
"time"
12+
"unicode/utf8"
1213

1314
"github.com/OpenListTeam/OpenList/v4/internal/model"
1415
)
@@ -19,6 +20,52 @@ var yearRegexp = regexp.MustCompile(`\b((?:19|20)\d{2})\b`)
1920
// chineseRegexp 匹配包含中文字符的片段
2021
var chineseRegexp = regexp.MustCompile(`[\p{Han}]`)
2122

23+
// 噪声词正则:发布组、编码、分辨率、音轨等无意义片段
24+
// 清洗中文标题尾部常见的污染词
25+
var noiseTokenRegexp = regexp.MustCompile(`(?i)(双语字幕|中字|国英|国粤|粤语|国语|英语|日语|韩语|HDTV|HR-HDTV|BluRay|BDRip|WEB-?DL|HDRip|DVDRip|REMUX|x264|x265|h264|h265|HEVC|AVC|AAC|AC3|DTS|FLAC|10bit|8bit|HDR|SDR|4K|2160P|1080P|720P|480P|完整版|未删减版)`)
26+
27+
// 中文数字到阿拉伯数字的简单映射,用于「钢铁侠三」=>「钢铁侠3」类的归一化
28+
var cnNumMap = map[string]string{
29+
"〇": "0", "零": "0", "一": "1", "二": "2", "三": "3", "四": "4",
30+
"五": "5", "六": "6", "七": "7", "八": "8", "九": "9", "十": "10",
31+
}
32+
33+
// normalizeTitle 对标题做模糊匹配前的归一化处理
34+
// - 去除括号及其中内容
35+
// - 去除版本/编码等噪声词
36+
// - 合并多余空白
37+
func normalizeTitle(s string) string {
38+
if s == "" {
39+
return s
40+
}
41+
// 去掉中英文括号包裹的内容
42+
bracketRe := regexp.MustCompile(`[\((\[【][^\))\]】]*[\))\]】]`)
43+
s = bracketRe.ReplaceAllString(s, " ")
44+
// 去掉常见噪声词
45+
s = noiseTokenRegexp.ReplaceAllString(s, " ")
46+
// 替换分隔符为空格
47+
s = strings.NewReplacer(".", " ", "_", " ", "-", " ", "+", " ").Replace(s)
48+
// 合并多余空白
49+
s = regexp.MustCompile(`\s+`).ReplaceAllString(s, " ")
50+
return strings.TrimSpace(s)
51+
}
52+
53+
// cnNumToArabic 将标题中的中文数字归一化为阿拉伯数字(仅做轻量处理)
54+
func cnNumToArabic(s string) string {
55+
if s == "" {
56+
return s
57+
}
58+
var b strings.Builder
59+
for _, r := range s {
60+
if v, ok := cnNumMap[string(r)]; ok {
61+
b.WriteString(v)
62+
} else {
63+
b.WriteRune(r)
64+
}
65+
}
66+
return b.String()
67+
}
68+
2269
// parsedVideoTitle 解析后的视频标题信息
2370
type parsedVideoTitle struct {
2471
EnglishTitle string // 英文标题(第一个中文片段之前、年份之前的部分)
@@ -165,13 +212,32 @@ type tmdbMovieDetail struct {
165212
}
166213

167214
// doTMDBSearch 执行一次TMDB搜索请求
168-
func (s *TMDBScraper) doTMDBSearch(query, year string) (*tmdbSearchResult, error) {
169-
searchURL := fmt.Sprintf("%s/search/multi?api_key=%s&query=%s&language=zh-CN&search_type=ngram",
170-
s.BaseURL, s.APIKey, url.QueryEscape(query))
215+
// endpoint 取值:multi / movie / tv
216+
// language 取值:zh-CN / en-US / 空(不传 language,TMDB 会按用户语言或英文)
217+
// 注意:search_type=ngram 是已废弃参数,不再使用;TMDB 默认即支持 substring 匹配
218+
func (s *TMDBScraper) doTMDBSearch(endpoint, query, year, language string) (*tmdbSearchResult, error) {
219+
if endpoint == "" {
220+
endpoint = "multi"
221+
}
222+
params := url.Values{}
223+
params.Set("api_key", s.APIKey)
224+
params.Set("query", query)
225+
params.Set("include_adult", "true")
226+
if language != "" {
227+
params.Set("language", language)
228+
}
171229
if year != "" {
172-
searchURL += "&year=" + year
230+
// movie 用 year / primary_release_year,tv 用 first_air_date_year
231+
switch endpoint {
232+
case "tv":
233+
params.Set("first_air_date_year", year)
234+
default:
235+
params.Set("year", year)
236+
}
173237
}
174238

239+
searchURL := fmt.Sprintf("%s/search/%s?%s", s.BaseURL, endpoint, params.Encode())
240+
175241
resp, err := s.client.Get(searchURL)
176242
if err != nil {
177243
return nil, fmt.Errorf("TMDB搜索请求失败: %w", err)
@@ -189,46 +255,117 @@ func (s *TMDBScraper) doTMDBSearch(query, year string) (*tmdbSearchResult, error
189255
return nil, fmt.Errorf("TMDB搜索结果解析失败(status=%d, url=%s): %w, body=%s",
190256
resp.StatusCode, searchURL, err, snippet)
191257
}
258+
// /search/multi 返回的结果带 media_type,/search/movie /search/tv 不带,需要补齐
259+
if endpoint == "movie" || endpoint == "tv" {
260+
for i := range result.Results {
261+
if result.Results[i].MediaType == "" {
262+
result.Results[i].MediaType = endpoint
263+
}
264+
}
265+
}
192266
return &result, nil
193267
}
194268

195-
// searchWithFallback 带降级重试的TMDB搜索
196-
// 策略:
197-
// 1. 有中文标题时,先用中文标题 + 年份搜索,再用中文标题不带年份搜索
198-
// 2. 有英文标题时,用英文标题 + 年份搜索,再用英文标题不带年份搜索
199-
// 3. 全部搜索失败才返回错误
200-
func (s *TMDBScraper) searchWithFallback(parsed parsedVideoTitle) (*tmdbSearchResult, error) {
201-
type searchAttempt struct {
202-
query string
203-
year string
269+
// searchAttempt 单次搜索尝试参数
270+
type searchAttempt struct {
271+
endpoint string // multi / movie / tv
272+
query string
273+
year string
274+
language string
275+
}
276+
277+
// buildTitleCandidates 根据原始标题构造一组候选搜索词(按优先级返回)
278+
// 候选包含:原始 -> 归一化 -> 阿拉伯数字归一化 -> 拆分子词
279+
func buildTitleCandidates(title string) []string {
280+
if title == "" {
281+
return nil
282+
}
283+
seen := make(map[string]bool)
284+
var out []string
285+
add := func(s string) {
286+
s = strings.TrimSpace(s)
287+
if s == "" || seen[s] {
288+
return
289+
}
290+
seen[s] = true
291+
out = append(out, s)
292+
}
293+
294+
add(title)
295+
norm := normalizeTitle(title)
296+
add(norm)
297+
add(cnNumToArabic(norm))
298+
299+
// 若中文标题里夹杂了空格分隔的多个词,把每个非短词单独作为候选
300+
for _, w := range strings.Fields(norm) {
301+
if utf8.RuneCountInString(w) >= 2 {
302+
add(w)
303+
}
204304
}
305+
return out
306+
}
205307

308+
// searchWithFallback 带降级重试的TMDB搜索
309+
// 策略(按优先级,命中即停止):
310+
// 1. 中文标题候选 × {带年份, 不带年份} × {movie, tv, multi} × language=zh-CN
311+
// 2. 英文标题候选 × {带年份, 不带年份} × {movie, tv, multi} × language=en-US
312+
// 3. 中文标题候选 × multi × 不指定 language(最后兜底)
313+
func (s *TMDBScraper) searchWithFallback(parsed parsedVideoTitle) (*tmdbSearchResult, error) {
206314
var attempts []searchAttempt
207315

208-
// 中文标题优先
209-
if parsed.ChineseTitle != "" {
210-
if parsed.Year != "" {
211-
attempts = append(attempts, searchAttempt{parsed.ChineseTitle, parsed.Year})
316+
addGroup := func(title, lang string) {
317+
if title == "" {
318+
return
319+
}
320+
cands := buildTitleCandidates(title)
321+
// 同一个候选词,先尝试 movie + 年份,再 tv + 年份,再 movie 无年份,再 tv 无年份,最后 multi
322+
for _, q := range cands {
323+
if parsed.Year != "" {
324+
attempts = append(attempts,
325+
searchAttempt{"movie", q, parsed.Year, lang},
326+
searchAttempt{"tv", q, parsed.Year, lang},
327+
)
328+
}
329+
attempts = append(attempts,
330+
searchAttempt{"movie", q, "", lang},
331+
searchAttempt{"tv", q, "", lang},
332+
searchAttempt{"multi", q, "", lang},
333+
)
212334
}
213-
attempts = append(attempts, searchAttempt{parsed.ChineseTitle, ""})
214335
}
215336

216-
// 英文标题兜底
217-
if parsed.EnglishTitle != "" {
218-
if parsed.Year != "" {
219-
attempts = append(attempts, searchAttempt{parsed.EnglishTitle, parsed.Year})
337+
// 中文标题优先(zh-CN)
338+
addGroup(parsed.ChineseTitle, "zh-CN")
339+
// 英文标题兜底(en-US)
340+
addGroup(parsed.EnglishTitle, "en-US")
341+
// 最后再用中文标题不指定语言搜一次(兜底,TMDB 多语言别名匹配可能命中)
342+
if parsed.ChineseTitle != "" {
343+
for _, q := range buildTitleCandidates(parsed.ChineseTitle) {
344+
attempts = append(attempts, searchAttempt{"multi", q, "", ""})
220345
}
221-
attempts = append(attempts, searchAttempt{parsed.EnglishTitle, ""})
222346
}
223347

224348
if len(attempts) == 0 {
225349
return nil, fmt.Errorf("无法从文件名中提取有效标题")
226350
}
227351

352+
// 去重,避免重复请求
353+
type key struct{ ep, q, y, l string }
354+
done := make(map[key]bool)
355+
356+
var lastErr error
228357
for _, attempt := range attempts {
229-
result, err := s.doTMDBSearch(attempt.query, attempt.year)
358+
k := key{attempt.endpoint, attempt.query, attempt.year, attempt.language}
359+
if done[k] {
360+
continue
361+
}
362+
done[k] = true
363+
364+
result, err := s.doTMDBSearch(attempt.endpoint, attempt.query, attempt.year, attempt.language)
230365
if err != nil {
231-
return nil, err
366+
// 网络/解析错误不立即终止,记录后继续尝试下一个候选
367+
lastErr = err
368+
continue
232369
}
233370
if len(result.Results) > 0 {
234371
return result, nil
@@ -240,6 +377,9 @@ func (s *TMDBScraper) searchWithFallback(parsed parsedVideoTitle) (*tmdbSearchRe
240377
if titleInfo == "" {
241378
titleInfo = parsed.EnglishTitle
242379
}
380+
if lastErr != nil {
381+
return nil, fmt.Errorf("TMDB未找到匹配结果: %s (last err: %v)", titleInfo, lastErr)
382+
}
243383
return nil, fmt.Errorf("TMDB未找到匹配结果: %s", titleInfo)
244384
}
245385

0 commit comments

Comments
 (0)