diff --git a/internal/pkg/ffmpeg_helper/audio_info.go b/internal/pkg/ffmpeg_helper/audio_info.go index 78329af77..74460f448 100644 --- a/internal/pkg/ffmpeg_helper/audio_info.go +++ b/internal/pkg/ffmpeg_helper/audio_info.go @@ -14,6 +14,7 @@ type AudioInfo struct { startTime string language string FullPath string + Duration float64 } func NewAudioInfo(index int, codecName, codecType, timeBase, startTime, language string) *AudioInfo { @@ -24,6 +25,7 @@ func NewAudioInfo(index int, codecName, codecType, timeBase, startTime, language timeBase: timeBase, startTime: startTime, language: language, + Duration: 0, } } diff --git a/internal/pkg/ffmpeg_helper/ffmpeg_helper.go b/internal/pkg/ffmpeg_helper/ffmpeg_helper.go index 3372a8dc5..da1210675 100644 --- a/internal/pkg/ffmpeg_helper/ffmpeg_helper.go +++ b/internal/pkg/ffmpeg_helper/ffmpeg_helper.go @@ -2,6 +2,7 @@ package ffmpeg_helper import ( "bytes" + "errors" "fmt" "github.com/allanpk716/ChineseSubFinder/internal/common" "github.com/allanpk716/ChineseSubFinder/internal/logic/sub_parser/ass" @@ -49,7 +50,7 @@ func (f *FFMPEGHelper) GetFFMPEGInfo(videoFileFullPath string) (bool, *FFMPEGInf return false, nil, err } // 解析得到的字符串反馈 - bok, ffMPEGInfo := f.parseJsonString2GetFFMPEGInfo(videoFileFullPath, buf.String()) + bok, ffMPEGInfo := f.parseJsonString2GetFFProbeInfo(videoFileFullPath, buf.String()) if bok == false { return false, nil, nil } @@ -102,6 +103,33 @@ func (f *FFMPEGHelper) GetFFMPEGInfo(videoFileFullPath string) (bool, *FFMPEGInf return bok, ffMPEGInfo, nil } +func (f *FFMPEGHelper) GetAudioInfo(audioFileFullPath string) (bool, float64, error) { + + const args = "-v error -show_format -show_streams -print_format json -f s16le -ac 1 -ar 16000" + cmdArgs := strings.Fields(args) + cmdArgs = append(cmdArgs, audioFileFullPath) + cmd := exec.Command("ffprobe", cmdArgs...) + buf := bytes.NewBufferString("") + //指定输出位置 + cmd.Stderr = buf + cmd.Stdout = buf + err := cmd.Start() + if err != nil { + return false, 0, err + } + err = cmd.Wait() + if err != nil { + return false, 0, err + } + + bok, duration := f.parseJsonString2GetAudioInfo(buf.String()) + if bok == false { + return false, 0, errors.New("ffprobe get " + audioFileFullPath + " duration error") + } + + return true, duration, nil +} + // ExportAudioArgsByTimeRange 根据输入的时间轴导出音频分段信息 func (f *FFMPEGHelper) ExportAudioArgsByTimeRange(audioFullPath string, startTimeString, timeLeng, outAudioFullPath string) (string, error) { @@ -120,8 +148,8 @@ func (f *FFMPEGHelper) ExportAudioArgsByTimeRange(audioFullPath string, startTim return "", nil } -// parseJsonString2GetFFMPEGInfo 使用 ffprobe 获取视频的 stream 信息,从中解析出字幕和音频的索引 -func (f *FFMPEGHelper) parseJsonString2GetFFMPEGInfo(videoFileFullPath, inputFFProbeString string) (bool, *FFMPEGInfo) { +// parseJsonString2GetFFProbeInfo 使用 ffprobe 获取视频的 stream 信息,从中解析出字幕和音频的索引 +func (f *FFMPEGHelper) parseJsonString2GetFFProbeInfo(videoFileFullPath, inputFFProbeString string) (bool, *FFMPEGInfo) { streamsValue := gjson.Get(inputFFProbeString, "streams.#") if streamsValue.Exists() == false { @@ -206,6 +234,16 @@ func (f *FFMPEGHelper) parseJsonString2GetFFMPEGInfo(videoFileFullPath, inputFFP return true, ffmpegInfo } +// parseJsonString2GetAudioInfo 获取 pcm 音频的长度 +func (f *FFMPEGHelper) parseJsonString2GetAudioInfo(inputFFProbeString string) (bool, float64) { + + durationValue := gjson.Get(inputFFProbeString, "format.duration") + if durationValue.Exists() == false { + return false, 0 + } + return true, durationValue.Float() +} + // exportAudioAndSubtitles 导出音频和字幕文件 func (f *FFMPEGHelper) exportAudioAndSubtitles(subArgs, audioArgs []string) (string, error) { @@ -289,6 +327,7 @@ func (f *FFMPEGHelper) getAudioExportArgsByTimeRange(audioFullPath string, start /* ffmpeg.exe -ar 16000 -ac 1 -f s16le -i aa.pcm -ss 00:1:27 -t 28 -acodec pcm_s16le -f s16le -ac 1 -ar 16000 bb.pcm + ffmpeg.exe -i aa.srt -ss 00:1:27 -t 28 bb.srt */ var audioArgs = make([]string, 0) diff --git a/internal/pkg/ffmpeg_helper/ffmpeg_helper_test.go b/internal/pkg/ffmpeg_helper/ffmpeg_helper_test.go index b6368a4dd..2cc739ec7 100644 --- a/internal/pkg/ffmpeg_helper/ffmpeg_helper_test.go +++ b/internal/pkg/ffmpeg_helper/ffmpeg_helper_test.go @@ -60,13 +60,13 @@ func Test_parseJsonString2GetFFMPEGInfo(t *testing.T) { for _, tt := range tests { t.Run(tt.name, func(t *testing.T) { - got, got1 := f.parseJsonString2GetFFMPEGInfo(tt.args.videoFileFullPath, tt.args.input) + got, got1 := f.parseJsonString2GetFFProbeInfo(tt.args.videoFileFullPath, tt.args.input) if got != tt.want { - t.Errorf("parseJsonString2GetFFMPEGInfo() got = %v, want %v", got, tt.want) + t.Errorf("parseJsonString2GetFFProbeInfo() got = %v, want %v", got, tt.want) } if len(got1.AudioInfoList) != tt.audios || len(got1.SubtitleInfoList) != tt.subs { - t.Fatal("parseJsonString2GetFFMPEGInfo result List < 1") + t.Fatal("parseJsonString2GetFFProbeInfo result List < 1") } }) } @@ -76,7 +76,7 @@ func TestFFMPEGHelper_ExportAudioArgsByTimeRange(t *testing.T) { audioFullPath := "C:\\Tmp\\Rick and Morty - S05E10\\英_1.pcm" startTimeString := "0:1:27" - timeLeng := "28" + timeLeng := "28.2" outAudioFullPath := "C:\\Tmp\\Rick and Morty - S05E10\\英_1_cut.pcm" f := NewFFMPEGHelper() @@ -87,3 +87,16 @@ func TestFFMPEGHelper_ExportAudioArgsByTimeRange(t *testing.T) { t.Fatal(err) } } + +func TestFFMPEGHelper_GetAudioInfo(t *testing.T) { + + audioFullPath := "C:\\Tmp\\Rick and Morty - S05E10\\英_1.pcm" + + f := NewFFMPEGHelper() + bok, duration, err := f.GetAudioInfo(audioFullPath) + if err != nil || bok == false { + t.Fatal(err) + } + + println(duration) +} diff --git a/internal/pkg/sub_timeline_fixer/fixer.go b/internal/pkg/sub_timeline_fixer/fixer.go index 27c903c51..a6f4675d3 100644 --- a/internal/pkg/sub_timeline_fixer/fixer.go +++ b/internal/pkg/sub_timeline_fixer/fixer.go @@ -2,8 +2,8 @@ package sub_timeline_fixer import ( "fmt" - "github.com/allanpk716/ChineseSubFinder/internal/common" "github.com/allanpk716/ChineseSubFinder/internal/pkg" + "github.com/allanpk716/ChineseSubFinder/internal/pkg/ffmpeg_helper" "github.com/allanpk716/ChineseSubFinder/internal/pkg/log_helper" "github.com/allanpk716/ChineseSubFinder/internal/pkg/vad" "github.com/allanpk716/ChineseSubFinder/internal/types/sub_timeline_fiexer" @@ -19,12 +19,14 @@ import ( ) type SubTimelineFixer struct { - fixerConfig sub_timeline_fiexer.SubTimelineFixerConfig + fixerConfig sub_timeline_fiexer.SubTimelineFixerConfig + ffmpegHelper *ffmpeg_helper.FFMPEGHelper } func NewSubTimelineFixer(fixerConfig sub_timeline_fiexer.SubTimelineFixerConfig) *SubTimelineFixer { return &SubTimelineFixer{ - fixerConfig: fixerConfig, + fixerConfig: fixerConfig, + ffmpegHelper: ffmpeg_helper.NewFFMPEGHelper(), } } @@ -67,12 +69,7 @@ func (s *SubTimelineFixer) FixSubTimeline(infoSrc *subparser.FileInfo, inOffsetT */ // 偏移时间 offsetTime := time.Duration(inOffsetTime*1000) * time.Millisecond - timeFormat := "" - if infoSrc.Ext == common.SubExtASS || infoSrc.Ext == common.SubExtSSA { - timeFormat = common.TimeFormatAss - } else { - timeFormat = common.TimeFormatSrt - } + timeFormat := infoSrc.GetTimeFormat() fixContent := infoSrc.Content for _, srcOneDialogue := range infoSrc.Dialogues { @@ -213,12 +210,7 @@ func (s *SubTimelineFixer) GetOffsetTimeV1(infoBase, infoSrc *subparser.FileInfo srcIndex++ } - timeFormat := "" - if infoBase.Ext == common.SubExtASS || infoBase.Ext == common.SubExtSSA { - timeFormat = common.TimeFormatAss - } else { - timeFormat = common.TimeFormatSrt - } + timeFormat := infoBase.GetTimeFormat() var startDiffTimeLineData = make([]opts.LineData, 0) var endDiffTimeLineData = make([]opts.LineData, 0) @@ -370,9 +362,52 @@ func (s *SubTimelineFixer) GetOffsetTimeV1(infoBase, infoSrc *subparser.FileInfo } // GetOffsetTimeV2 使用 VAD 检测语音是否有人声,输出连续的点标记,再通过 SimHash 进行匹配,找到最佳的偏移时间 -func (s *SubTimelineFixer) GetOffsetTimeV2(audioInfo vad.AudioInfo, infoSrc *subparser.FileInfo, staticLineFileSavePath string, debugInfoFileSavePath string) error { +func (s *SubTimelineFixer) GetOffsetTimeV2(audioInfo vad.AudioInfo, infoSrc *subparser.FileInfo, staticLineFileSavePath string, debugInfoFileSavePath string) (bool, float64, float64, error) { + + /* + 分割字幕成若干段,然后得到若干段的时间轴,将这些段从字幕文字转换成 VADInfo + 从上面若干段时间轴,把音频给分割成多段 + 然后使用 simhash 的进行比较,输出分析的曲线图等信息 + */ + + bok, duration, err := s.ffmpegHelper.GetAudioInfo(audioInfo.FileFullPath) + if err != nil || bok == false { + return false, 0, 0, err + } + + /* + 这里的字幕要求是完整的一个字幕 + 1. 抽取字幕的时间片段的时候,暂定,前 15% 和后 15% 要避开,前奏、主题曲、结尾曲 + 2. 将整个字幕,抽取连续 5 句对话为一个单元,提取时间片段信息 + */ + + timeFormat := infoSrc.GetTimeFormat() + for _, oneDialogueEx := range infoSrc.DialoguesEx { + + oneDialogueExTimeStart, err := time.Parse(timeFormat, oneDialogueEx.StartTime) + if err != nil { + return false, 0, 0, err + } + oneDialogueExTimeEnd, err := time.Parse(timeFormat, oneDialogueEx.EndTime) + if err != nil { + return false, 0, 0, err + } + + oneStart := pkg.Time2Number(oneDialogueExTimeStart) + oneEnd := pkg.Time2Number(oneDialogueExTimeEnd) + + if duration*0.15 > oneStart || duration*(1.0-0.15) < oneStart { + continue + } + if oneDialogueEx.ChLine == "" { + continue + } + + //baseCorpus = append(baseCorpus, oneDialogueEx.EnLine) + //baseDialogueFilterMap[len(baseCorpus)-1] = index + } - return nil + return false, -1, -1, nil } const FixMask = "-fix" diff --git a/internal/pkg/sub_timeline_fixer/fixer_test.go b/internal/pkg/sub_timeline_fixer/fixer_test.go index 611bbf6c8..b17ffee7b 100644 --- a/internal/pkg/sub_timeline_fixer/fixer_test.go +++ b/internal/pkg/sub_timeline_fixer/fixer_test.go @@ -7,6 +7,7 @@ import ( "github.com/allanpk716/ChineseSubFinder/internal/pkg" "github.com/allanpk716/ChineseSubFinder/internal/pkg/sub_helper" "github.com/allanpk716/ChineseSubFinder/internal/pkg/sub_parser_hub" + "github.com/allanpk716/ChineseSubFinder/internal/pkg/vad" "github.com/allanpk716/ChineseSubFinder/internal/types/sub_timeline_fiexer" "github.com/james-bowman/nlp" "github.com/james-bowman/nlp/measures/pairwise" @@ -43,7 +44,7 @@ func TestStopWordCounter(t *testing.T) { println(info.Name) } -func TestGetOffsetTime(t *testing.T) { +func TestGetOffsetTimeV1(t *testing.T) { testDataPath := "../../../TestData/FixTimeline" testRootDir, err := pkg.CopyTestData(testDataPath) if err != nil { @@ -385,3 +386,59 @@ func TestTFIDF(t *testing.T) { fmt.Printf("Matched '%s'", testCorpus[matched]) // Output: Matched 'The quick brown fox jumped over the lazy dog' } + +func TestSubTimelineFixer_GetOffsetTimeV2(t *testing.T) { + + subParserHub := sub_parser_hub.NewSubParserHub(ass.NewParser(), srt.NewParser()) + + type fields struct { + fixerConfig sub_timeline_fiexer.SubTimelineFixerConfig + } + type args struct { + audioInfo vad.AudioInfo + subFilePath string + staticLineFileSavePath string + debugInfoFileSavePath string + } + tests := []struct { + name string + fields fields + args args + want bool + want1 float64 + want2 float64 + wantErr bool + }{ + {name: "Rick and Morty - S05E10", args: args{audioInfo: vad.AudioInfo{FileFullPath: "C:\\Tmp\\Rick and Morty - S05E10\\英_1.pcm"}, subFilePath: "C:\\Tmp\\Rick and Morty - S05E10\\英_2.ass"}}, + } + for _, tt := range tests { + t.Run(tt.name, func(t *testing.T) { + s := &SubTimelineFixer{ + fixerConfig: tt.fields.fixerConfig, + } + + bok, fileInfo, err := subParserHub.DetermineFileTypeFromFile(tt.args.subFilePath) + if err != nil { + t.Fatal(err) + } + if bok == false { + t.Fatal("DetermineFileTypeFromFile == false") + } + + got, got1, got2, err := s.GetOffsetTimeV2(tt.args.audioInfo, fileInfo, tt.args.staticLineFileSavePath, tt.args.debugInfoFileSavePath) + if (err != nil) != tt.wantErr { + t.Errorf("GetOffsetTimeV2() error = %v, wantErr %v", err, tt.wantErr) + return + } + if got != tt.want { + t.Errorf("GetOffsetTimeV2() got = %v, want %v", got, tt.want) + } + if got1 != tt.want1 { + t.Errorf("GetOffsetTimeV2() got1 = %v, want %v", got1, tt.want1) + } + if got2 != tt.want2 { + t.Errorf("GetOffsetTimeV2() got2 = %v, want %v", got2, tt.want2) + } + }) + } +} diff --git a/internal/pkg/sub_timeline_fixer/sub_unit.go b/internal/pkg/sub_timeline_fixer/sub_unit.go new file mode 100644 index 000000000..103ae377e --- /dev/null +++ b/internal/pkg/sub_timeline_fixer/sub_unit.go @@ -0,0 +1,42 @@ +package sub_timeline_fixer + +import ( + "fmt" + "github.com/allanpk716/ChineseSubFinder/internal/pkg" + "github.com/allanpk716/ChineseSubFinder/internal/pkg/vad" + "time" +) + +type SubUnit struct { + StartTime time.Time + EndTime time.Time + vadList []vad.VADInfo +} + +func NewSubUnit() *SubUnit { + return &SubUnit{ + vadList: make([]vad.VADInfo, 0), + } +} + +func (s *SubUnit) Add(oneSubStartTime, oneSubEndTime time.Time) { + + if s.GetStartTimeNumber() == 0 { + s.StartTime = oneSubStartTime + } + s.EndTime = oneSubEndTime + // +} + +func (s SubUnit) GetStartTimeNumber() float64 { + return pkg.Time2Number(s.StartTime) +} + +func (s SubUnit) GetEndTimeNumber() float64 { + return pkg.Time2Number(s.EndTime) +} + +func (s SubUnit) GetFFMPEGCutRange() (string, string) { + return fmt.Sprintf("%d:%d:%d", s.StartTime.Hour(), s.StartTime.Minute(), s.StartTime.Second()), + fmt.Sprintf("%f", s.GetEndTimeNumber()-s.GetStartTimeNumber()) +} diff --git a/internal/pkg/util.go b/internal/pkg/util.go index 54c1fe503..e054b0eb8 100644 --- a/internal/pkg/util.go +++ b/internal/pkg/util.go @@ -18,6 +18,7 @@ import ( "runtime" "strconv" "strings" + "time" ) // NewHttpClient 新建一个 resty 的对象 @@ -444,3 +445,13 @@ func WriteStrings2File(desfilePath string, strings []string) error { } return nil } + +func Time2Number(inTime time.Time) float64 { + outSecend := 0.0 + outSecend += float64(inTime.Hour() * 60 * 60) + outSecend += float64(inTime.Minute() * 60) + outSecend += float64(inTime.Second()) + outSecend += float64(inTime.Nanosecond()) / 1000 / 1000 / 1000 + + return outSecend +} diff --git a/internal/pkg/vad/vad_helper.go b/internal/pkg/vad/vad_helper.go index f5ad03a2f..78267e65e 100644 --- a/internal/pkg/vad/vad_helper.go +++ b/internal/pkg/vad/vad_helper.go @@ -86,10 +86,11 @@ func GetVADInfoFromAudio(audioInfo AudioInfo) ([]VADInfo, error) { } // GetVADInfoFromSubtitle 分析字幕文件(暂时考虑的是外置的字幕),得到 VAD 分析信息,看样子是不支持并发的,只能单线程使用 -func GetVADInfoFromSubtitle(subFileInfo *subparser.FileInfo) ([]VADInfo, error) { +func GetVADInfoFromSubtitle(subFileInfo *subparser.FileInfo, startTime, endIndex int) ([]VADInfo, error) { var vadInfos = make([]VADInfo, 0) - + timeFormat := subFileInfo.GetTimeFormat() + println(timeFormat) for _, oneDialogueEx := range subFileInfo.DialoguesEx { // 考虑的是外置字幕,所以就应该是有中文的 diff --git a/internal/pkg/vad/vad_info.go b/internal/pkg/vad/vad_info.go index 7c11f7afe..785a6fd2a 100644 --- a/internal/pkg/vad/vad_info.go +++ b/internal/pkg/vad/vad_info.go @@ -39,7 +39,7 @@ func GetTimeRange(inVADInfos []VADInfo, starttime, timeRange int) []VADInfo { return outVADInfos } -// InsertVADInfo 得到的是 VAD 状态变换的节点,中间缺失了连续的 VAD 点信息,使用本函数可以进行插值 +// InsertVADInfo 整个函数待定,未必会实现。得到的是 VAD 状态变换的节点,中间缺失了连续的 VAD 点信息,使用本函数可以进行插值 func InsertVADInfo(inVADInfos []VADInfo, duration int) []VADInfo { var outVADInfos = make([]VADInfo, 0) diff --git a/internal/types/subparser/fileinfo.go b/internal/types/subparser/fileinfo.go index 380b98e34..7e37f5215 100644 --- a/internal/types/subparser/fileinfo.go +++ b/internal/types/subparser/fileinfo.go @@ -1,6 +1,7 @@ package subparser import ( + "github.com/allanpk716/ChineseSubFinder/internal/common" "github.com/allanpk716/ChineseSubFinder/internal/types/language" ) @@ -18,6 +19,14 @@ type FileInfo struct { OtherLines []string // 抽取出所有的第二语言对话,可能是英文、韩文、日文 } +func (f FileInfo) GetTimeFormat() string { + if f.Ext == common.SubExtASS || f.Ext == common.SubExtSSA { + return common.TimeFormatAss + } else { + return common.TimeFormatSrt + } +} + // OneDialogue 一句对话 type OneDialogue struct { StartTime string // 开始时间