Skip to content

Commit

Permalink
正在做 vad 时间轴校正功能
Browse files Browse the repository at this point in the history
Signed-off-by: allan716 <525223688@qq.com>
  • Loading branch information
allanpk716 committed Nov 3, 2021
1 parent 476935d commit 2fb5543
Show file tree
Hide file tree
Showing 10 changed files with 237 additions and 28 deletions.
2 changes: 2 additions & 0 deletions internal/pkg/ffmpeg_helper/audio_info.go
Expand Up @@ -14,6 +14,7 @@ type AudioInfo struct {
startTime string
language string
FullPath string
Duration float64
}

func NewAudioInfo(index int, codecName, codecType, timeBase, startTime, language string) *AudioInfo {
Expand All @@ -24,6 +25,7 @@ func NewAudioInfo(index int, codecName, codecType, timeBase, startTime, language
timeBase: timeBase,
startTime: startTime,
language: language,
Duration: 0,
}
}

Expand Down
45 changes: 42 additions & 3 deletions internal/pkg/ffmpeg_helper/ffmpeg_helper.go
Expand Up @@ -2,6 +2,7 @@ package ffmpeg_helper

import (
"bytes"
"errors"
"fmt"
"github.com/allanpk716/ChineseSubFinder/internal/common"
"github.com/allanpk716/ChineseSubFinder/internal/logic/sub_parser/ass"
Expand Down Expand Up @@ -49,7 +50,7 @@ func (f *FFMPEGHelper) GetFFMPEGInfo(videoFileFullPath string) (bool, *FFMPEGInf
return false, nil, err
}
// 解析得到的字符串反馈
bok, ffMPEGInfo := f.parseJsonString2GetFFMPEGInfo(videoFileFullPath, buf.String())
bok, ffMPEGInfo := f.parseJsonString2GetFFProbeInfo(videoFileFullPath, buf.String())
if bok == false {
return false, nil, nil
}
Expand Down Expand Up @@ -102,6 +103,33 @@ func (f *FFMPEGHelper) GetFFMPEGInfo(videoFileFullPath string) (bool, *FFMPEGInf
return bok, ffMPEGInfo, nil
}

func (f *FFMPEGHelper) GetAudioInfo(audioFileFullPath string) (bool, float64, error) {

const args = "-v error -show_format -show_streams -print_format json -f s16le -ac 1 -ar 16000"
cmdArgs := strings.Fields(args)
cmdArgs = append(cmdArgs, audioFileFullPath)
cmd := exec.Command("ffprobe", cmdArgs...)
buf := bytes.NewBufferString("")
//指定输出位置
cmd.Stderr = buf
cmd.Stdout = buf
err := cmd.Start()
if err != nil {
return false, 0, err
}
err = cmd.Wait()
if err != nil {
return false, 0, err
}

bok, duration := f.parseJsonString2GetAudioInfo(buf.String())
if bok == false {
return false, 0, errors.New("ffprobe get " + audioFileFullPath + " duration error")
}

return true, duration, nil
}

// ExportAudioArgsByTimeRange 根据输入的时间轴导出音频分段信息
func (f *FFMPEGHelper) ExportAudioArgsByTimeRange(audioFullPath string, startTimeString, timeLeng, outAudioFullPath string) (string, error) {

Expand All @@ -120,8 +148,8 @@ func (f *FFMPEGHelper) ExportAudioArgsByTimeRange(audioFullPath string, startTim
return "", nil
}

// parseJsonString2GetFFMPEGInfo 使用 ffprobe 获取视频的 stream 信息,从中解析出字幕和音频的索引
func (f *FFMPEGHelper) parseJsonString2GetFFMPEGInfo(videoFileFullPath, inputFFProbeString string) (bool, *FFMPEGInfo) {
// parseJsonString2GetFFProbeInfo 使用 ffprobe 获取视频的 stream 信息,从中解析出字幕和音频的索引
func (f *FFMPEGHelper) parseJsonString2GetFFProbeInfo(videoFileFullPath, inputFFProbeString string) (bool, *FFMPEGInfo) {

streamsValue := gjson.Get(inputFFProbeString, "streams.#")
if streamsValue.Exists() == false {
Expand Down Expand Up @@ -206,6 +234,16 @@ func (f *FFMPEGHelper) parseJsonString2GetFFMPEGInfo(videoFileFullPath, inputFFP
return true, ffmpegInfo
}

// parseJsonString2GetAudioInfo 获取 pcm 音频的长度
func (f *FFMPEGHelper) parseJsonString2GetAudioInfo(inputFFProbeString string) (bool, float64) {

durationValue := gjson.Get(inputFFProbeString, "format.duration")
if durationValue.Exists() == false {
return false, 0
}
return true, durationValue.Float()
}

// exportAudioAndSubtitles 导出音频和字幕文件
func (f *FFMPEGHelper) exportAudioAndSubtitles(subArgs, audioArgs []string) (string, error) {

Expand Down Expand Up @@ -289,6 +327,7 @@ func (f *FFMPEGHelper) getAudioExportArgsByTimeRange(audioFullPath string, start
/*
ffmpeg.exe -ar 16000 -ac 1 -f s16le -i aa.pcm -ss 00:1:27 -t 28 -acodec pcm_s16le -f s16le -ac 1 -ar 16000 bb.pcm
ffmpeg.exe -i aa.srt -ss 00:1:27 -t 28 bb.srt
*/

var audioArgs = make([]string, 0)
Expand Down
21 changes: 17 additions & 4 deletions internal/pkg/ffmpeg_helper/ffmpeg_helper_test.go
Expand Up @@ -60,13 +60,13 @@ func Test_parseJsonString2GetFFMPEGInfo(t *testing.T) {

for _, tt := range tests {
t.Run(tt.name, func(t *testing.T) {
got, got1 := f.parseJsonString2GetFFMPEGInfo(tt.args.videoFileFullPath, tt.args.input)
got, got1 := f.parseJsonString2GetFFProbeInfo(tt.args.videoFileFullPath, tt.args.input)
if got != tt.want {
t.Errorf("parseJsonString2GetFFMPEGInfo() got = %v, want %v", got, tt.want)
t.Errorf("parseJsonString2GetFFProbeInfo() got = %v, want %v", got, tt.want)
}

if len(got1.AudioInfoList) != tt.audios || len(got1.SubtitleInfoList) != tt.subs {
t.Fatal("parseJsonString2GetFFMPEGInfo result List < 1")
t.Fatal("parseJsonString2GetFFProbeInfo result List < 1")
}
})
}
Expand All @@ -76,7 +76,7 @@ func TestFFMPEGHelper_ExportAudioArgsByTimeRange(t *testing.T) {

audioFullPath := "C:\\Tmp\\Rick and Morty - S05E10\\英_1.pcm"
startTimeString := "0:1:27"
timeLeng := "28"
timeLeng := "28.2"
outAudioFullPath := "C:\\Tmp\\Rick and Morty - S05E10\\英_1_cut.pcm"

f := NewFFMPEGHelper()
Expand All @@ -87,3 +87,16 @@ func TestFFMPEGHelper_ExportAudioArgsByTimeRange(t *testing.T) {
t.Fatal(err)
}
}

func TestFFMPEGHelper_GetAudioInfo(t *testing.T) {

audioFullPath := "C:\\Tmp\\Rick and Morty - S05E10\\英_1.pcm"

f := NewFFMPEGHelper()
bok, duration, err := f.GetAudioInfo(audioFullPath)
if err != nil || bok == false {
t.Fatal(err)
}

println(duration)
}
69 changes: 52 additions & 17 deletions internal/pkg/sub_timeline_fixer/fixer.go
Expand Up @@ -2,8 +2,8 @@ package sub_timeline_fixer

import (
"fmt"
"github.com/allanpk716/ChineseSubFinder/internal/common"
"github.com/allanpk716/ChineseSubFinder/internal/pkg"
"github.com/allanpk716/ChineseSubFinder/internal/pkg/ffmpeg_helper"
"github.com/allanpk716/ChineseSubFinder/internal/pkg/log_helper"
"github.com/allanpk716/ChineseSubFinder/internal/pkg/vad"
"github.com/allanpk716/ChineseSubFinder/internal/types/sub_timeline_fiexer"
Expand All @@ -19,12 +19,14 @@ import (
)

type SubTimelineFixer struct {
fixerConfig sub_timeline_fiexer.SubTimelineFixerConfig
fixerConfig sub_timeline_fiexer.SubTimelineFixerConfig
ffmpegHelper *ffmpeg_helper.FFMPEGHelper
}

func NewSubTimelineFixer(fixerConfig sub_timeline_fiexer.SubTimelineFixerConfig) *SubTimelineFixer {
return &SubTimelineFixer{
fixerConfig: fixerConfig,
fixerConfig: fixerConfig,
ffmpegHelper: ffmpeg_helper.NewFFMPEGHelper(),
}
}

Expand Down Expand Up @@ -67,12 +69,7 @@ func (s *SubTimelineFixer) FixSubTimeline(infoSrc *subparser.FileInfo, inOffsetT
*/
// 偏移时间
offsetTime := time.Duration(inOffsetTime*1000) * time.Millisecond
timeFormat := ""
if infoSrc.Ext == common.SubExtASS || infoSrc.Ext == common.SubExtSSA {
timeFormat = common.TimeFormatAss
} else {
timeFormat = common.TimeFormatSrt
}
timeFormat := infoSrc.GetTimeFormat()
fixContent := infoSrc.Content
for _, srcOneDialogue := range infoSrc.Dialogues {

Expand Down Expand Up @@ -213,12 +210,7 @@ func (s *SubTimelineFixer) GetOffsetTimeV1(infoBase, infoSrc *subparser.FileInfo
srcIndex++
}

timeFormat := ""
if infoBase.Ext == common.SubExtASS || infoBase.Ext == common.SubExtSSA {
timeFormat = common.TimeFormatAss
} else {
timeFormat = common.TimeFormatSrt
}
timeFormat := infoBase.GetTimeFormat()

var startDiffTimeLineData = make([]opts.LineData, 0)
var endDiffTimeLineData = make([]opts.LineData, 0)
Expand Down Expand Up @@ -370,9 +362,52 @@ func (s *SubTimelineFixer) GetOffsetTimeV1(infoBase, infoSrc *subparser.FileInfo
}

// GetOffsetTimeV2 使用 VAD 检测语音是否有人声,输出连续的点标记,再通过 SimHash 进行匹配,找到最佳的偏移时间
func (s *SubTimelineFixer) GetOffsetTimeV2(audioInfo vad.AudioInfo, infoSrc *subparser.FileInfo, staticLineFileSavePath string, debugInfoFileSavePath string) error {
func (s *SubTimelineFixer) GetOffsetTimeV2(audioInfo vad.AudioInfo, infoSrc *subparser.FileInfo, staticLineFileSavePath string, debugInfoFileSavePath string) (bool, float64, float64, error) {

/*
分割字幕成若干段,然后得到若干段的时间轴,将这些段从字幕文字转换成 VADInfo
从上面若干段时间轴,把音频给分割成多段
然后使用 simhash 的进行比较,输出分析的曲线图等信息
*/

bok, duration, err := s.ffmpegHelper.GetAudioInfo(audioInfo.FileFullPath)
if err != nil || bok == false {
return false, 0, 0, err
}

/*
这里的字幕要求是完整的一个字幕
1. 抽取字幕的时间片段的时候,暂定,前 15% 和后 15% 要避开,前奏、主题曲、结尾曲
2. 将整个字幕,抽取连续 5 句对话为一个单元,提取时间片段信息
*/

timeFormat := infoSrc.GetTimeFormat()
for _, oneDialogueEx := range infoSrc.DialoguesEx {

oneDialogueExTimeStart, err := time.Parse(timeFormat, oneDialogueEx.StartTime)
if err != nil {
return false, 0, 0, err
}
oneDialogueExTimeEnd, err := time.Parse(timeFormat, oneDialogueEx.EndTime)
if err != nil {
return false, 0, 0, err
}

oneStart := pkg.Time2Number(oneDialogueExTimeStart)
oneEnd := pkg.Time2Number(oneDialogueExTimeEnd)

if duration*0.15 > oneStart || duration*(1.0-0.15) < oneStart {
continue
}
if oneDialogueEx.ChLine == "" {
continue
}

//baseCorpus = append(baseCorpus, oneDialogueEx.EnLine)
//baseDialogueFilterMap[len(baseCorpus)-1] = index
}

return nil
return false, -1, -1, nil
}

const FixMask = "-fix"
59 changes: 58 additions & 1 deletion internal/pkg/sub_timeline_fixer/fixer_test.go
Expand Up @@ -7,6 +7,7 @@ import (
"github.com/allanpk716/ChineseSubFinder/internal/pkg"
"github.com/allanpk716/ChineseSubFinder/internal/pkg/sub_helper"
"github.com/allanpk716/ChineseSubFinder/internal/pkg/sub_parser_hub"
"github.com/allanpk716/ChineseSubFinder/internal/pkg/vad"
"github.com/allanpk716/ChineseSubFinder/internal/types/sub_timeline_fiexer"
"github.com/james-bowman/nlp"
"github.com/james-bowman/nlp/measures/pairwise"
Expand Down Expand Up @@ -43,7 +44,7 @@ func TestStopWordCounter(t *testing.T) {
println(info.Name)
}

func TestGetOffsetTime(t *testing.T) {
func TestGetOffsetTimeV1(t *testing.T) {
testDataPath := "../../../TestData/FixTimeline"
testRootDir, err := pkg.CopyTestData(testDataPath)
if err != nil {
Expand Down Expand Up @@ -385,3 +386,59 @@ func TestTFIDF(t *testing.T) {
fmt.Printf("Matched '%s'", testCorpus[matched])
// Output: Matched 'The quick brown fox jumped over the lazy dog'
}

func TestSubTimelineFixer_GetOffsetTimeV2(t *testing.T) {

subParserHub := sub_parser_hub.NewSubParserHub(ass.NewParser(), srt.NewParser())

type fields struct {
fixerConfig sub_timeline_fiexer.SubTimelineFixerConfig
}
type args struct {
audioInfo vad.AudioInfo
subFilePath string
staticLineFileSavePath string
debugInfoFileSavePath string
}
tests := []struct {
name string
fields fields
args args
want bool
want1 float64
want2 float64
wantErr bool
}{
{name: "Rick and Morty - S05E10", args: args{audioInfo: vad.AudioInfo{FileFullPath: "C:\\Tmp\\Rick and Morty - S05E10\\英_1.pcm"}, subFilePath: "C:\\Tmp\\Rick and Morty - S05E10\\英_2.ass"}},
}
for _, tt := range tests {
t.Run(tt.name, func(t *testing.T) {
s := &SubTimelineFixer{
fixerConfig: tt.fields.fixerConfig,
}

bok, fileInfo, err := subParserHub.DetermineFileTypeFromFile(tt.args.subFilePath)
if err != nil {
t.Fatal(err)
}
if bok == false {
t.Fatal("DetermineFileTypeFromFile == false")
}

got, got1, got2, err := s.GetOffsetTimeV2(tt.args.audioInfo, fileInfo, tt.args.staticLineFileSavePath, tt.args.debugInfoFileSavePath)
if (err != nil) != tt.wantErr {
t.Errorf("GetOffsetTimeV2() error = %v, wantErr %v", err, tt.wantErr)
return
}
if got != tt.want {
t.Errorf("GetOffsetTimeV2() got = %v, want %v", got, tt.want)
}
if got1 != tt.want1 {
t.Errorf("GetOffsetTimeV2() got1 = %v, want %v", got1, tt.want1)
}
if got2 != tt.want2 {
t.Errorf("GetOffsetTimeV2() got2 = %v, want %v", got2, tt.want2)
}
})
}
}
42 changes: 42 additions & 0 deletions internal/pkg/sub_timeline_fixer/sub_unit.go
@@ -0,0 +1,42 @@
package sub_timeline_fixer

import (
"fmt"
"github.com/allanpk716/ChineseSubFinder/internal/pkg"
"github.com/allanpk716/ChineseSubFinder/internal/pkg/vad"
"time"
)

type SubUnit struct {
StartTime time.Time
EndTime time.Time
vadList []vad.VADInfo
}

func NewSubUnit() *SubUnit {
return &SubUnit{
vadList: make([]vad.VADInfo, 0),
}
}

func (s *SubUnit) Add(oneSubStartTime, oneSubEndTime time.Time) {

if s.GetStartTimeNumber() == 0 {
s.StartTime = oneSubStartTime
}
s.EndTime = oneSubEndTime
//
}

func (s SubUnit) GetStartTimeNumber() float64 {
return pkg.Time2Number(s.StartTime)
}

func (s SubUnit) GetEndTimeNumber() float64 {
return pkg.Time2Number(s.EndTime)
}

func (s SubUnit) GetFFMPEGCutRange() (string, string) {
return fmt.Sprintf("%d:%d:%d", s.StartTime.Hour(), s.StartTime.Minute(), s.StartTime.Second()),
fmt.Sprintf("%f", s.GetEndTimeNumber()-s.GetStartTimeNumber())
}
11 changes: 11 additions & 0 deletions internal/pkg/util.go
Expand Up @@ -18,6 +18,7 @@ import (
"runtime"
"strconv"
"strings"
"time"
)

// NewHttpClient 新建一个 resty 的对象
Expand Down Expand Up @@ -444,3 +445,13 @@ func WriteStrings2File(desfilePath string, strings []string) error {
}
return nil
}

func Time2Number(inTime time.Time) float64 {
outSecend := 0.0
outSecend += float64(inTime.Hour() * 60 * 60)
outSecend += float64(inTime.Minute() * 60)
outSecend += float64(inTime.Second())
outSecend += float64(inTime.Nanosecond()) / 1000 / 1000 / 1000

return outSecend
}

0 comments on commit 2fb5543

Please sign in to comment.