Skip to content

Commit

Permalink
正在调试 V2 时间轴修正
Browse files Browse the repository at this point in the history
Signed-off-by: allan716 <525223688@qq.com>
  • Loading branch information
allanpk716 committed Nov 5, 2021
1 parent f6906c6 commit d37a58b
Show file tree
Hide file tree
Showing 7 changed files with 147 additions and 36 deletions.
42 changes: 37 additions & 5 deletions internal/pkg/ffmpeg_helper/ffmpeg_helper.go
Expand Up @@ -130,8 +130,8 @@ func (f *FFMPEGHelper) GetAudioInfo(audioFileFullPath string) (bool, float64, er
return true, duration, nil
}

// ExportAudioArgsByTimeRange 根据输入的时间轴导出音频分段信息 "0:1:27" "28.2"
func (f *FFMPEGHelper) ExportAudioArgsByTimeRange(audioFullPath string, startTimeString, timeLength string) (string, string, error) {
// ExportAudioAndSubArgsByTimeRange 根据输入的时间轴导出音频分段信息 "0:1:27" "28.2"
func (f *FFMPEGHelper) ExportAudioAndSubArgsByTimeRange(audioFullPath, subFullPath string, startTimeString, timeLength string) (string, string, string, error) {

outStartTimeString := strings.ReplaceAll(startTimeString, ":", "-")
outStartTimeString = strings.ReplaceAll(outStartTimeString, ".", "#")
Expand All @@ -141,22 +141,37 @@ func (f *FFMPEGHelper) ExportAudioArgsByTimeRange(audioFullPath string, startTim
frontName := strings.ReplaceAll(filepath.Base(audioFullPath), filepath.Ext(audioFullPath), "")

outAudioName := frontName + "_" + outStartTimeString + "_" + outTimeLength + filepath.Ext(audioFullPath)
outSubName := frontName + "_" + outStartTimeString + "_" + outTimeLength + common.SubExtSRT

var outAudioFullPath = filepath.Join(filepath.Dir(audioFullPath), outAudioName)
var outSubFullPath = filepath.Join(filepath.Dir(audioFullPath), outSubName)

// 导出音频
if my_util.IsFile(outAudioFullPath) == true {
err := os.Remove(outAudioFullPath)
if err != nil {
return "", "", err
return "", "", "", err
}
}
args := f.getAudioExportArgsByTimeRange(audioFullPath, startTimeString, timeLength, outAudioFullPath)
execFFMPEG, err := f.execFFMPEG(args)
if err != nil {
return "", execFFMPEG, err
return "", "", execFFMPEG, err
}
// 导出字幕
if my_util.IsFile(outSubFullPath) == true {
err := os.Remove(outSubFullPath)
if err != nil {
return "", "", "", err
}
}
args = f.getSubExportArgsByTimeRange(subFullPath, startTimeString, timeLength, outSubFullPath)
execFFMPEG, err = f.execFFMPEG(args)
if err != nil {
return "", "", execFFMPEG, err
}

return outAudioFullPath, "", nil
return outAudioFullPath, outSubFullPath, "", nil
}

// parseJsonString2GetFFProbeInfo 使用 ffprobe 获取视频的 stream 信息,从中解析出字幕和音频的索引
Expand Down Expand Up @@ -372,6 +387,23 @@ func (f *FFMPEGHelper) getAudioExportArgsByTimeRange(audioFullPath string, start
return audioArgs
}

func (f *FFMPEGHelper) getSubExportArgsByTimeRange(subFullPath string, startTimeString, timeLength, outSubFullPath string) []string {

/*
ffmpeg.exe -i aa.srt -ss 00:1:27 -t 28 bb.srt
*/
var subArgs = make([]string, 0)
subArgs = append(subArgs, "-i")
subArgs = append(subArgs, subFullPath)
subArgs = append(subArgs, "-ss")
subArgs = append(subArgs, startTimeString)
subArgs = append(subArgs, "-t")
subArgs = append(subArgs, timeLength)
subArgs = append(subArgs, outSubFullPath)

return subArgs
}

// addSubMapArg 构建字幕的导出参数
func (f *FFMPEGHelper) addSubMapArg(subArgs *[]string, index int, subSaveFullPath string) {
*subArgs = append(*subArgs, "-map")
Expand Down
3 changes: 2 additions & 1 deletion internal/pkg/ffmpeg_helper/ffmpeg_helper_test.go
Expand Up @@ -75,12 +75,13 @@ func Test_parseJsonString2GetFFMPEGInfo(t *testing.T) {
func TestFFMPEGHelper_ExportAudioArgsByTimeRange(t *testing.T) {

audioFullPath := "C:\\Tmp\\Rick and Morty - S05E10\\英_1.pcm"
subFullPath := "C:\\Tmp\\Rick and Morty - S05E10\\英_2.srt"
startTimeString := "0:1:27"
timeLeng := "28.2"

f := NewFFMPEGHelper()

_, timeRange, err := f.ExportAudioArgsByTimeRange(audioFullPath, startTimeString, timeLeng)
_, _, timeRange, err := f.ExportAudioAndSubArgsByTimeRange(audioFullPath, subFullPath, startTimeString, timeLeng)
if err != nil {
println(timeRange)
t.Fatal(err)
Expand Down
3 changes: 2 additions & 1 deletion internal/pkg/my_util/util.go
Expand Up @@ -10,6 +10,7 @@ import (
"github.com/go-resty/resty/v2"
"io"
"io/ioutil"
"math"
"net/http"
"os"
"os/exec"
Expand Down Expand Up @@ -457,5 +458,5 @@ func Time2SecendNumber(inTime time.Time) float64 {
}

func Time2Duration(inTime time.Time) time.Duration {
return time.Duration(Time2SecendNumber(inTime))
return time.Duration(Time2SecendNumber(inTime) * math.Pow10(9))
}
52 changes: 44 additions & 8 deletions internal/pkg/sub_timeline_fixer/fixer.go
Expand Up @@ -14,6 +14,7 @@ import (
"github.com/mndrix/tukey"
"gonum.org/v1/gonum/mat"
"os"
"path/filepath"
"strings"
"time"
)
Expand Down Expand Up @@ -319,7 +320,7 @@ func (s *SubTimelineFixer) GetOffsetTimeV1(infoBase, infoSrc *subparser.FileInfo
// 不为空的时候,生成调试文件
if staticLineFileSavePath != "" {
//staticLineFileSavePath = "bar.html"
err = SaveStaticLine(staticLineFileSavePath, infoBase.Name, infoSrc.Name,
err = SaveStaticLineV1(staticLineFileSavePath, infoBase.Name, infoSrc.Name,
per, oldMean, oldSd, newMean, newSd, xAxis,
startDiffTimeLineData, endDiffTimeLineData)
if err != nil {
Expand Down Expand Up @@ -415,11 +416,12 @@ func (s *SubTimelineFixer) GetOffsetTimeV2(audioInfo vad.AudioInfo, infoSrc *sub

// 开始针对对白单元进行匹配
for _, subUnit := range subUnitList {
startTimeString, subLeng := subUnit.GetFFMPEGCutRange(ExpandTimeRange)

outAudioFPath, errString, err := s.ffmpegHelper.ExportAudioArgsByTimeRange(audioInfo.FileFullPath, startTimeString, subLeng)
startTimeString, subLength := subUnit.GetFFMPEGCutRange(ExpandTimeRange)

outAudioFPath, _, errString, err := s.ffmpegHelper.ExportAudioAndSubArgsByTimeRange(audioInfo.FileFullPath, infoSrc.FileFullPath, startTimeString, subLength)
if err != nil {
log_helper.GetLogger().Errorln("ExportAudioArgsByTimeRange", errString, err)
log_helper.GetLogger().Errorln("ExportAudioAndSubArgsByTimeRange", errString, err)
return false, 0, 0, err
}

Expand All @@ -432,13 +434,47 @@ func (s *SubTimelineFixer) GetOffsetTimeV2(audioInfo vad.AudioInfo, infoSrc *sub
return false, 0, 0, err
}

println(len(audioVADInfos))
var subTimeLineData = make([]opts.LineData, 0)
var subxAxis = make([]string, 0)
var audioTimeLineData = make([]opts.LineData, 0)
var audioxAxis = make([]string, 0)

for _, vadInfo := range subUnit.VADList {

subTimeLineData = append(subTimeLineData, opts.LineData{Value: vadInfo.Active})
baseTime := subUnit.GetBaseTimeNumber()
subxAxis = append(subxAxis, fmt.Sprintf("%f", vadInfo.Time.Seconds()-baseTime))
}

outDir := filepath.Dir(outAudioFPath)
outBaseName := filepath.Base(outAudioFPath)
outBaseNameWithOutExt := strings.ReplaceAll(outBaseName, filepath.Ext(outBaseName), "")

subVADStaticLineFullPath := filepath.Join(outDir, outBaseNameWithOutExt+"_sub.html")

err = SaveStaticLineV2("Sub", subVADStaticLineFullPath, subxAxis, subTimeLineData)
if err != nil {
return false, 0, 0, err
}

for _, vadInfo := range audioVADInfos {

audioTimeLineData = append(audioTimeLineData, opts.LineData{Value: vadInfo.Active})
audioxAxis = append(audioxAxis, fmt.Sprintf("%f", vadInfo.Time.Seconds()))
}

audioVADStaticLineFullPath := filepath.Join(outDir, outBaseNameWithOutExt+"_audio.html")

err = SaveStaticLineV2("Audio", audioVADStaticLineFullPath, audioxAxis, audioTimeLineData)
if err != nil {
return false, 0, 0, err
}
}

return false, -1, -1, nil
}

const FixMask = "-fix"
const FrontAndEndPer = 0.15
const SubUnitMaxCount = 5
const ExpandTimeRange = 1 // 从字幕的时间轴片段需要向前和向后多匹配一部分的音频,这里定义的就是这个 range 以分钟为单位, 正负 1 分钟
const FrontAndEndPer = 0.15 // 前百分之 15 和后百分之 15 都不进行识别
const SubUnitMaxCount = 10 // 一个 Sub单元有五句对白
const ExpandTimeRange = 0 // 从字幕的时间轴片段需要向前和向后多匹配一部分的音频,这里定义的就是这个 range 以分钟为单位, 正负 1 分钟
28 changes: 27 additions & 1 deletion internal/pkg/sub_timeline_fixer/static_line.go
Expand Up @@ -7,7 +7,7 @@ import (
"os"
)

func SaveStaticLine(saveFPath string, infoBaseName, infoSrcName string,
func SaveStaticLineV1(saveFPath string, infoBaseName, infoSrcName string,
per, oldMean, OldSd, NewMean, NewSd float64, xAxis []string,
startDiffTimeLineData, endDiffTimeLineData []opts.LineData) error {
// 1.New 一个条形图对象
Expand Down Expand Up @@ -37,3 +37,29 @@ func SaveStaticLine(saveFPath string, infoBaseName, infoSrcName string,

return nil
}

func SaveStaticLineV2(name, saveFPath string, xAxis []string, timeLineData []opts.LineData) error {

// 1.New 一个条形图对象
bar := charts.NewLine()
// 2.设置 标题 和 子标题
bar.SetGlobalOptions(charts.WithTitleOpts(opts.Title{
Title: name + " VAD",
}))
// 3.设置 数据组
bar.SetXAxis(xAxis).
AddSeries(name+" VAD", timeLineData)
// 4.绘图 生成html
outfile, err := os.Create(saveFPath)
defer func() {
_ = outfile.Close()
}()
if err != nil {
return err
}
err = bar.Render(outfile)
if err != nil {
return err
}
return nil
}
53 changes: 34 additions & 19 deletions internal/pkg/sub_timeline_fixer/sub_unit.go
Expand Up @@ -9,30 +9,35 @@ import (
)

type SubUnit struct {
StartTime time.Time
EndTime time.Time
vadList []vad.VADInfo
baseTime time.Time // 这个是基础的时间,后续需要减去这个,不然与导出的片段字幕去对比会有一个起始时间的偏差
StartTime time.Time // 这个时间会减去 baseTime 再存储
EndTime time.Time // 这个时间会减去 baseTime 再存储
VADList []vad.VADInfo
subCount int
firstAdd bool
}

func NewSubUnit() *SubUnit {
return &SubUnit{
vadList: make([]vad.VADInfo, 0),
VADList: make([]vad.VADInfo, 0),
subCount: 0,
firstAdd: false,
}
}

// Add 添加一句对白进来
func (s *SubUnit) Add(oneSubStartTime, oneSubEndTime time.Time) {

if s.GetStartTimeNumber() == 0 {
s.StartTime = oneSubStartTime
if s.firstAdd == false {
s.baseTime = oneSubStartTime
s.StartTime = oneSubStartTime.Add(-my_util.Time2Duration(s.baseTime))
s.firstAdd = true
}
s.EndTime = oneSubEndTime
s.EndTime = oneSubEndTime.Add(-my_util.Time2Duration(s.baseTime))
// 每一句对白的开始就人为 VAD active 是 1,直到结束,才是 0
s.vadList = append(s.vadList, *vad.NewVADInfoBase(true, time.Duration(s.GetStartTimeNumber()*math.Pow10(9))))
s.VADList = append(s.VADList, *vad.NewVADInfoBase(true, time.Duration(s.GetStartTimeNumber()*math.Pow10(9))))

s.vadList = append(s.vadList, *vad.NewVADInfoBase(false, time.Duration(s.GetEndTimeNumber()*math.Pow10(9))))
s.VADList = append(s.VADList, *vad.NewVADInfoBase(false, time.Duration(s.GetEndTimeNumber()*math.Pow10(9))))

s.subCount++
}
Expand All @@ -42,24 +47,30 @@ func (s *SubUnit) AddAndInsert(oneSubStartTime, oneSubEndTime time.Time) {

perWindows := float64(vad.FrameDuration) / 1000
// 不是第一次添加,那么就需要把两句对白中间间隔的 active == false 的插入,插入间隙
if len(s.vadList) > 0 {
needAddRange := my_util.Time2SecendNumber(oneSubStartTime) - s.GetEndTimeNumber()
if len(s.VADList) > 0 {
dd := my_util.Time2Duration(s.baseTime)
tmpSubStartTime := oneSubStartTime.Add(-dd)
needAddRange := my_util.Time2SecendNumber(tmpSubStartTime) - s.GetEndTimeNumber()
for i := 0.0; i < needAddRange; {

s.vadList = append(s.vadList, *vad.NewVADInfoBase(false, time.Duration((s.GetEndTimeNumber()+i)*math.Pow10(9))))
s.VADList = append(s.VADList, *vad.NewVADInfoBase(false, time.Duration((s.GetEndTimeNumber()+i)*math.Pow10(9))))
i += perWindows
}
}

if s.GetStartTimeNumber() == 0 {
s.StartTime = oneSubStartTime
if s.firstAdd == false {
s.baseTime = oneSubStartTime
dd := my_util.Time2Duration(s.baseTime)
s.StartTime = oneSubStartTime.Add(-dd)
s.firstAdd = true
}
s.EndTime = oneSubEndTime

s.EndTime = oneSubEndTime.Add(-my_util.Time2Duration(s.baseTime))

needAddRange := my_util.Time2SecendNumber(oneSubEndTime) - my_util.Time2SecendNumber(oneSubStartTime)
for i := 0.0; i < needAddRange; {

s.vadList = append(s.vadList, *vad.NewVADInfoBase(true, time.Duration((s.GetStartTimeNumber()+i)*math.Pow10(9))))
s.VADList = append(s.VADList, *vad.NewVADInfoBase(true, time.Duration((s.GetStartTimeNumber()+i)*math.Pow10(9))))
i += perWindows
}

Expand All @@ -73,27 +84,31 @@ func (s SubUnit) GetDialogueCount() int {

// GetStartTimeNumber 获取这个单元的起始时间,单位是秒
func (s SubUnit) GetStartTimeNumber() float64 {
return my_util.Time2SecendNumber(s.StartTime)
return my_util.Time2SecendNumber(s.StartTime.Add(my_util.Time2Duration(s.baseTime)))
}

// GetEndTimeNumber 获取这个单元的结束时间,单位是秒
func (s SubUnit) GetEndTimeNumber() float64 {
return my_util.Time2SecendNumber(s.EndTime)
return my_util.Time2SecendNumber(s.EndTime.Add(my_util.Time2Duration(s.baseTime)))
}

// GetTimelineRange 开始到结束的时间长度,单位是秒
func (s SubUnit) GetTimelineRange() float64 {
return s.GetEndTimeNumber() - s.GetStartTimeNumber()
}

func (s SubUnit) GetBaseTimeNumber() float64 {
return my_util.Time2SecendNumber(s.baseTime)
}

// GetFFMPEGCutRange 这里会生成导出 FFMPEG 的参数字段,起始时间和结束的时间长度
func (s SubUnit) GetFFMPEGCutRange(expandTimeRange int) (string, string) {

var tmpStartTime time.Time
if s.GetStartTimeNumber()-float64(expandTimeRange)*60 < 0 {
tmpStartTime = time.Time{}
} else {
tmpStartTime = s.StartTime.Add(time.Duration(expandTimeRange) * time.Minute)
tmpStartTime = s.StartTime.Add(time.Duration(expandTimeRange) * time.Minute).Add(my_util.Time2Duration(s.baseTime))
}

return fmt.Sprintf("%d:%d:%d.%d", tmpStartTime.Hour(), tmpStartTime.Minute(), tmpStartTime.Second(), tmpStartTime.Nanosecond()/1000/1000),
Expand Down
2 changes: 1 addition & 1 deletion internal/pkg/vad/vad_info.go
Expand Up @@ -29,7 +29,7 @@ func NewVADInfoBase(active bool, nowTime time.Duration) *VADInfo {

const (
// Mode vad mode,VAD 的模式
Mode = 2
Mode = 1
// FrameDuration frame duration,分析的时间窗口
FrameDuration = 10
)

0 comments on commit d37a58b

Please sign in to comment.