Skip to content

Commit

Permalink
正在做 vad 时间轴校正功能,添加字幕转 vadinfo 函数
Browse files Browse the repository at this point in the history
Signed-off-by: allan716 <525223688@qq.com>
  • Loading branch information
allanpk716 committed Nov 4, 2021
1 parent 2fb5543 commit b0bd8a4
Show file tree
Hide file tree
Showing 8 changed files with 137 additions and 72 deletions.
23 changes: 17 additions & 6 deletions internal/pkg/ffmpeg_helper/ffmpeg_helper.go
Expand Up @@ -130,22 +130,33 @@ func (f *FFMPEGHelper) GetAudioInfo(audioFileFullPath string) (bool, float64, er
return true, duration, nil
}

// ExportAudioArgsByTimeRange 根据输入的时间轴导出音频分段信息
func (f *FFMPEGHelper) ExportAudioArgsByTimeRange(audioFullPath string, startTimeString, timeLeng, outAudioFullPath string) (string, error) {
// ExportAudioArgsByTimeRange 根据输入的时间轴导出音频分段信息 "0:1:27" "28.2"
func (f *FFMPEGHelper) ExportAudioArgsByTimeRange(audioFullPath string, startTimeString, timeLength string) (string, string, error) {

outStartTimeString := strings.ReplaceAll(startTimeString, ":", "-")
outStartTimeString = strings.ReplaceAll(outStartTimeString, ".", "#")

outTimeLength := strings.ReplaceAll(timeLength, ".", "#")

frontName := strings.ReplaceAll(filepath.Base(audioFullPath), filepath.Ext(audioFullPath), "")

outAudioName := frontName + "_" + outStartTimeString + "_" + outTimeLength + filepath.Ext(audioFullPath)

var outAudioFullPath = filepath.Join(filepath.Dir(audioFullPath), outAudioName)

if pkg.IsFile(outAudioFullPath) == true {
err := os.Remove(outAudioFullPath)
if err != nil {
return "", err
return "", "", err
}
}
args := f.getAudioExportArgsByTimeRange(audioFullPath, startTimeString, timeLeng, outAudioFullPath)
args := f.getAudioExportArgsByTimeRange(audioFullPath, startTimeString, timeLength, outAudioFullPath)
execFFMPEG, err := f.execFFMPEG(args)
if err != nil {
return execFFMPEG, err
return "", execFFMPEG, err
}

return "", nil
return outAudioFullPath, "", nil
}

// parseJsonString2GetFFProbeInfo 使用 ffprobe 获取视频的 stream 信息,从中解析出字幕和音频的索引
Expand Down
3 changes: 1 addition & 2 deletions internal/pkg/ffmpeg_helper/ffmpeg_helper_test.go
Expand Up @@ -77,11 +77,10 @@ func TestFFMPEGHelper_ExportAudioArgsByTimeRange(t *testing.T) {
audioFullPath := "C:\\Tmp\\Rick and Morty - S05E10\\英_1.pcm"
startTimeString := "0:1:27"
timeLeng := "28.2"
outAudioFullPath := "C:\\Tmp\\Rick and Morty - S05E10\\英_1_cut.pcm"

f := NewFFMPEGHelper()

timeRange, err := f.ExportAudioArgsByTimeRange(audioFullPath, startTimeString, timeLeng, outAudioFullPath)
_, timeRange, err := f.ExportAudioArgsByTimeRange(audioFullPath, startTimeString, timeLeng)
if err != nil {
println(timeRange)
t.Fatal(err)
Expand Down
47 changes: 39 additions & 8 deletions internal/pkg/sub_timeline_fixer/fixer.go
Expand Up @@ -380,7 +380,8 @@ func (s *SubTimelineFixer) GetOffsetTimeV2(audioInfo vad.AudioInfo, infoSrc *sub
1. 抽取字幕的时间片段的时候,暂定,前 15% 和后 15% 要避开,前奏、主题曲、结尾曲
2. 将整个字幕,抽取连续 5 句对话为一个单元,提取时间片段信息
*/

subUnitList := make([]SubUnit, 0)
oneSubUnit := NewSubUnit()
timeFormat := infoSrc.GetTimeFormat()
for _, oneDialogueEx := range infoSrc.DialoguesEx {

Expand All @@ -393,21 +394,51 @@ func (s *SubTimelineFixer) GetOffsetTimeV2(audioInfo vad.AudioInfo, infoSrc *sub
return false, 0, 0, err
}

oneStart := pkg.Time2Number(oneDialogueExTimeStart)
oneEnd := pkg.Time2Number(oneDialogueExTimeEnd)
oneStart := pkg.Time2SecendNumber(oneDialogueExTimeStart)
//oneEnd := pkg.Time2SecendNumber(oneDialogueExTimeEnd)

if duration*0.15 > oneStart || duration*(1.0-0.15) < oneStart {
if duration*FrontAndEndPer > oneStart || duration*(1.0-FrontAndEndPer) < oneStart {
continue
}
if oneDialogueEx.ChLine == "" {
continue
// TODO V2 版本是时间轴校正,必须带有中文
//if oneDialogueEx.ChLine == "" {
// continue
//}
// 低于 5句对白,则添加
if oneSubUnit.GetDialogueCount() < SubUnitMaxCount {
oneSubUnit.AddAndInsert(oneDialogueExTimeStart, oneDialogueExTimeEnd)
} else {
subUnitList = append(subUnitList, *oneSubUnit)
oneSubUnit = NewSubUnit()
}
}

// 开始针对对白单元进行匹配
for _, subUnit := range subUnitList {
startTimeString, subLeng := subUnit.GetFFMPEGCutRange(ExpandTimeRange)

outAudioFPath, errString, err := s.ffmpegHelper.ExportAudioArgsByTimeRange(audioInfo.FileFullPath, startTimeString, subLeng)
if err != nil {
log_helper.GetLogger().Errorln("ExportAudioArgsByTimeRange", errString, err)
return false, 0, 0, err
}

audioVADInfos, err := vad.GetVADInfoFromAudio(vad.AudioInfo{
FileFullPath: outAudioFPath,
SampleRate: 16000,
BitDepth: 16,
})
if err != nil {
return false, 0, 0, err
}

//baseCorpus = append(baseCorpus, oneDialogueEx.EnLine)
//baseDialogueFilterMap[len(baseCorpus)-1] = index
println(len(audioVADInfos))
}

return false, -1, -1, nil
}

const FixMask = "-fix"
const FrontAndEndPer = 0.15
const SubUnitMaxCount = 5
const ExpandTimeRange = 1 // 从字幕的时间轴片段需要向前和向后多匹配一部分的音频,这里定义的就是这个 range 以分钟为单位, 正负 1 分钟
5 changes: 5 additions & 0 deletions internal/pkg/sub_timeline_fixer/fixer_test.go
Expand Up @@ -424,6 +424,11 @@ func TestSubTimelineFixer_GetOffsetTimeV2(t *testing.T) {
if bok == false {
t.Fatal("DetermineFileTypeFromFile == false")
}
/*
这里发现一个梗,内置的英文字幕导出的时候,有可能需要合并多个 Dialogue,见
internal/pkg/sub_helper/sub_helper.go 中 MergeMultiDialogue4EngSubtitle 的实现
*/
sub_helper.MergeMultiDialogue4EngSubtitle(fileInfo)

got, got1, got2, err := s.GetOffsetTimeV2(tt.args.audioInfo, fileInfo, tt.args.staticLineFileSavePath, tt.args.debugInfoFileSavePath)
if (err != nil) != tt.wantErr {
Expand Down
73 changes: 66 additions & 7 deletions internal/pkg/sub_timeline_fixer/sub_unit.go
Expand Up @@ -4,39 +4,98 @@ import (
"fmt"
"github.com/allanpk716/ChineseSubFinder/internal/pkg"
"github.com/allanpk716/ChineseSubFinder/internal/pkg/vad"
"math"
"time"
)

type SubUnit struct {
StartTime time.Time
EndTime time.Time
vadList []vad.VADInfo
subCount int
}

func NewSubUnit() *SubUnit {
return &SubUnit{
vadList: make([]vad.VADInfo, 0),
vadList: make([]vad.VADInfo, 0),
subCount: 0,
}
}

// Add 添加一句对白进来
func (s *SubUnit) Add(oneSubStartTime, oneSubEndTime time.Time) {

if s.GetStartTimeNumber() == 0 {
s.StartTime = oneSubStartTime
}
s.EndTime = oneSubEndTime
//
// 每一句对白的开始就人为 VAD active 是 1,直到结束,才是 0
s.vadList = append(s.vadList, *vad.NewVADInfoBase(true, time.Duration(s.GetStartTimeNumber()*math.Pow10(9))))

s.vadList = append(s.vadList, *vad.NewVADInfoBase(false, time.Duration(s.GetEndTimeNumber()*math.Pow10(9))))

s.subCount++
}

// AddAndInsert 添加一句对白进来,并且填充中间的空白,间隔 10ms
func (s *SubUnit) AddAndInsert(oneSubStartTime, oneSubEndTime time.Time) {

perWindows := float64(vad.FrameDuration) / 1000
// 不是第一次添加,那么就需要把两句对白中间间隔的 active == false 的插入,插入间隙
if len(s.vadList) > 0 {
needAddRange := pkg.Time2SecendNumber(oneSubStartTime) - s.GetEndTimeNumber()
for i := 0.0; i < needAddRange; {

s.vadList = append(s.vadList, *vad.NewVADInfoBase(false, time.Duration((s.GetEndTimeNumber()+i)*math.Pow10(9))))
i += perWindows
}
}

if s.GetStartTimeNumber() == 0 {
s.StartTime = oneSubStartTime
}
s.EndTime = oneSubEndTime

needAddRange := pkg.Time2SecendNumber(oneSubEndTime) - pkg.Time2SecendNumber(oneSubStartTime)
for i := 0.0; i < needAddRange; {

s.vadList = append(s.vadList, *vad.NewVADInfoBase(true, time.Duration((s.GetStartTimeNumber()+i)*math.Pow10(9))))
i += perWindows
}

s.subCount++
}

// GetDialogueCount 获取这个对白单元由几个对话
func (s SubUnit) GetDialogueCount() int {
return s.subCount
}

// GetStartTimeNumber 获取这个单元的起始时间,单位是秒
func (s SubUnit) GetStartTimeNumber() float64 {
return pkg.Time2Number(s.StartTime)
return pkg.Time2SecendNumber(s.StartTime)
}

// GetEndTimeNumber 获取这个单元的结束时间,单位是秒
func (s SubUnit) GetEndTimeNumber() float64 {
return pkg.Time2Number(s.EndTime)
return pkg.Time2SecendNumber(s.EndTime)
}

func (s SubUnit) GetFFMPEGCutRange() (string, string) {
return fmt.Sprintf("%d:%d:%d", s.StartTime.Hour(), s.StartTime.Minute(), s.StartTime.Second()),
fmt.Sprintf("%f", s.GetEndTimeNumber()-s.GetStartTimeNumber())
// GetTimelineRange 开始到结束的时间长度,单位是秒
func (s SubUnit) GetTimelineRange() float64 {
return s.GetEndTimeNumber() - s.GetStartTimeNumber()
}

// GetFFMPEGCutRange 这里会生成导出 FFMPEG 的参数字段,起始时间和结束的时间长度
func (s SubUnit) GetFFMPEGCutRange(expandTimeRange int) (string, string) {

var tmpStartTime time.Time
if s.GetStartTimeNumber()-float64(expandTimeRange)*60 < 0 {
tmpStartTime = time.Time{}
} else {
tmpStartTime = s.StartTime.Add(time.Duration(expandTimeRange) * time.Minute)
}

return fmt.Sprintf("%d:%d:%d.%d", tmpStartTime.Hour(), tmpStartTime.Minute(), tmpStartTime.Second(), tmpStartTime.Nanosecond()/1000/1000),
fmt.Sprintf("%f", s.GetTimelineRange()+float64(expandTimeRange)*60.0)
}
6 changes: 5 additions & 1 deletion internal/pkg/util.go
Expand Up @@ -446,7 +446,7 @@ func WriteStrings2File(desfilePath string, strings []string) error {
return nil
}

func Time2Number(inTime time.Time) float64 {
func Time2SecendNumber(inTime time.Time) float64 {
outSecend := 0.0
outSecend += float64(inTime.Hour() * 60 * 60)
outSecend += float64(inTime.Minute() * 60)
Expand All @@ -455,3 +455,7 @@ func Time2Number(inTime time.Time) float64 {

return outSecend
}

func Time2Duration(inTime time.Time) time.Duration {
return time.Duration(Time2SecendNumber(inTime))
}
19 changes: 0 additions & 19 deletions internal/pkg/vad/vad_helper.go
Expand Up @@ -4,7 +4,6 @@ import (
"bufio"
"errors"
"fmt"
"github.com/allanpk716/ChineseSubFinder/internal/types/subparser"
webRTCVAD "github.com/baabaaox/go-webrtcvad"
"io"
"os"
Expand Down Expand Up @@ -84,21 +83,3 @@ func GetVADInfoFromAudio(audioInfo AudioInfo) ([]VADInfo, error) {

return vadInfos, nil
}

// GetVADInfoFromSubtitle 分析字幕文件(暂时考虑的是外置的字幕),得到 VAD 分析信息,看样子是不支持并发的,只能单线程使用
func GetVADInfoFromSubtitle(subFileInfo *subparser.FileInfo, startTime, endIndex int) ([]VADInfo, error) {

var vadInfos = make([]VADInfo, 0)
timeFormat := subFileInfo.GetTimeFormat()
println(timeFormat)
for _, oneDialogueEx := range subFileInfo.DialoguesEx {

// 考虑的是外置字幕,所以就应该是有中文的
if oneDialogueEx.ChLine == "" {
continue
}

}

return vadInfos, nil
}
33 changes: 4 additions & 29 deletions internal/pkg/vad/vad_info.go
Expand Up @@ -20,36 +20,11 @@ func NewVADInfo(frame, offset int, active bool, nowTime time.Duration) *VADInfo
}
}

// GetTimeRange 获取这个 VAD 实例从 startTime,开始,向后多少 ms 的时间段的 VAD 新实例
func GetTimeRange(inVADInfos []VADInfo, starttime, timeRange int) []VADInfo {

var outVADInfos = make([]VADInfo, 0)

startTime := time.Duration(starttime)
endTime := time.Duration(starttime + timeRange)

for _, inVADInfo := range inVADInfos {

if inVADInfo.Time < startTime || inVADInfo.Time > endTime {
continue
}
outVADInfos = append(outVADInfos, inVADInfo)
}

return outVADInfos
}

// InsertVADInfo 整个函数待定,未必会实现。得到的是 VAD 状态变换的节点,中间缺失了连续的 VAD 点信息,使用本函数可以进行插值
func InsertVADInfo(inVADInfos []VADInfo, duration int) []VADInfo {

var outVADInfos = make([]VADInfo, 0)

// 找到第一句,从这个 StartTime 之前标记为 VAD false
if inVADInfos[0].Time != 0 {

func NewVADInfoBase(active bool, nowTime time.Duration) *VADInfo {
return &VADInfo{
Active: active,
Time: nowTime,
}

return outVADInfos
}

const (
Expand Down

0 comments on commit b0bd8a4

Please sign in to comment.