Skip to content

Commit

Permalink
GetOffsetTimeV2 能用版本,校准的细节有待优化
Browse files Browse the repository at this point in the history
Signed-off-by: allan716 <525223688@qq.com>
  • Loading branch information
allanpk716 committed Nov 11, 2021
1 parent 5b96918 commit d5f2dcd
Show file tree
Hide file tree
Showing 4 changed files with 58 additions and 27 deletions.
3 changes: 3 additions & 0 deletions internal/pkg/regex_things/regex_things.go
Expand Up @@ -24,3 +24,6 @@ const regStringSRT = `(\d+)\n([\d:,]+)\s+-{2}\>\s+([\d:,]+)\n([\s\S]*?(\n{2}|$))

var ReMatchDialogueSRT = regexp.MustCompile(regStringSRT)
var ReMatchDialogueASS = regexp.MustCompile(regStringASS)

// RegOneSeasonSubFolderNameMatch 每个视频文件夹下的缓存文件夹名称,一个季度的
var RegOneSeasonSubFolderNameMatch = regexp.MustCompile(`(?m)^Sub_S\dE0`)
24 changes: 16 additions & 8 deletions internal/pkg/sub_helper/sub_helper.go
Expand Up @@ -7,16 +7,18 @@ import (
"github.com/allanpk716/ChineseSubFinder/internal/pkg/language"
"github.com/allanpk716/ChineseSubFinder/internal/pkg/log_helper"
"github.com/allanpk716/ChineseSubFinder/internal/pkg/my_util"
"github.com/allanpk716/ChineseSubFinder/internal/pkg/regex_things"
"github.com/allanpk716/ChineseSubFinder/internal/pkg/sub_parser_hub"
"github.com/allanpk716/ChineseSubFinder/internal/types/subparser"
"github.com/allanpk716/ChineseSubFinder/internal/types/supplier"
"github.com/go-rod/rod/lib/utils"
"io/ioutil"
"math"
"os"
"path/filepath"
"regexp"
"strconv"
"strings"
"time"
)

// OrganizeDlSubFiles 需要从汇总来是网站字幕中,解压对应的压缩包中的字幕出来
Expand Down Expand Up @@ -207,7 +209,7 @@ func SearchMatchedSubFileByDir(dir string) ([]string, error) {
fullPath := dir + pathSep + curFile.Name()
if curFile.IsDir() {
// 需要排除 Sub_S1E0、Sub_S2E0 这样的整季的字幕文件夹,这里仅仅是缓存,不会被加载的
matched := regOneSeasonSubFolderNameMatch.FindAllStringSubmatch(curFile.Name(), -1)
matched := regex_things.RegOneSeasonSubFolderNameMatch.FindAllStringSubmatch(curFile.Name(), -1)
if len(matched) > 0 {
continue
}
Expand Down Expand Up @@ -334,7 +336,7 @@ func DeleteOneSeasonSubCacheFolder(seriesDir string) error {
pathSep := string(os.PathSeparator)
for _, curFile := range files {
if curFile.IsDir() == true {
matched := regOneSeasonSubFolderNameMatch.FindAllStringSubmatch(curFile.Name(), -1)
matched := regex_things.RegOneSeasonSubFolderNameMatch.FindAllStringSubmatch(curFile.Name(), -1)
if matched == nil || len(matched) < 1 {
continue
}
Expand Down Expand Up @@ -376,6 +378,11 @@ func MergeMultiDialogue4EngSubtitle(inSubParser *subparser.FileInfo) {
目前看到的情况是前者的抽样率太低,需要使用后者的逻辑
*/
func GetVADINfoFromSub(infoSrc *subparser.FileInfo, FrontAndEndPer float64, SubUnitMaxCount int) ([]SubUnit, error) {
return GetVADINfoFromSubNeedOffsetTime(infoSrc, FrontAndEndPer, SubUnitMaxCount, 0)
}

// GetVADINfoFromSubNeedOffsetTime 跟上面的函数功能一致,只不过这里可以加一个每一句话固定的偏移时间
func GetVADINfoFromSubNeedOffsetTime(infoSrc *subparser.FileInfo, FrontAndEndPer float64, SubUnitMaxCount int, offsetTime float64) ([]SubUnit, error) {
if SubUnitMaxCount < 0 {
SubUnitMaxCount = 0
}
Expand Down Expand Up @@ -414,7 +421,12 @@ func GetVADINfoFromSub(infoSrc *subparser.FileInfo, FrontAndEndPer float64, SubU
}
// 低于 5句对白,则添加
if srcOneSubUnit.GetDialogueCount() < SubUnitMaxCount {
srcOneSubUnit.AddAndInsert(oneDialogueExTimeStart, oneDialogueExTimeEnd, i)
// 算上偏移
offsetTimeDuration := time.Duration(offsetTime * math.Pow10(9))
oneDialogueExTimeStart = oneDialogueExTimeStart.Add(offsetTimeDuration)
oneDialogueExTimeEnd = oneDialogueExTimeEnd.Add(offsetTimeDuration)
// 如果没有偏移就是 0
srcOneSubUnit.AddAndInsert(oneDialogueExTimeStart, oneDialogueExTimeEnd)
} else {
srcSubUnitList = append(srcSubUnitList, *srcOneSubUnit)
srcOneSubUnit = NewSubUnit()
Expand All @@ -427,7 +439,3 @@ func GetVADINfoFromSub(infoSrc *subparser.FileInfo, FrontAndEndPer float64, SubU

return srcSubUnitList, nil
}

var (
regOneSeasonSubFolderNameMatch = regexp.MustCompile(`(?m)^Sub_S\dE0`)
)
10 changes: 6 additions & 4 deletions internal/pkg/sub_helper/sub_unit.go
Expand Up @@ -28,8 +28,8 @@ func NewSubUnit() *SubUnit {
}
}

// AddAndInsert 添加一句对白进来,并且填充中间的空白,间隔 10ms
func (s *SubUnit) AddAndInsert(oneSubStartTime, oneSubEndTime time.Time, index int) {
// AddAndInsert 添加一句对白进来,并且填充中间的空白,间隔 10ms。传入的时间是真实的时间
func (s *SubUnit) AddAndInsert(oneSubStartTime, oneSubEndTime time.Time) {

/*
这里有个比较有意思的细节,字幕拆分到 dialogue 的时候,可能连续的多个 dialogue 是时间轴连续的
Expand Down Expand Up @@ -193,7 +193,7 @@ func (s SubUnit) GetOffsetTimeNumber() float64 {

// GetFFMPEGCutRangeString 这里会生成导出 FFMPEG 的参数字段,起始时间和结束的时间长度
// 以当前的 VAD 信息为基准,正负 expandTimeRange(秒为单位) 来生成截取的片段时间轴信息
func (s SubUnit) GetFFMPEGCutRangeString(expandTimeRange float64) (string, string) {
func (s SubUnit) GetFFMPEGCutRangeString(expandTimeRange float64) (string, string, time.Time, float64) {

var tmpStartTime time.Time
if s.GetStartTimeNumber(true)-expandTimeRange < 0 {
Expand All @@ -205,7 +205,9 @@ func (s SubUnit) GetFFMPEGCutRangeString(expandTimeRange float64) (string, strin
}

return fmt.Sprintf("%d:%d:%d.%d", tmpStartTime.Hour(), tmpStartTime.Minute(), tmpStartTime.Second(), tmpStartTime.Nanosecond()/1000/1000),
fmt.Sprintf("%f", s.GetTimelineRange()+expandTimeRange)
fmt.Sprintf("%f", s.GetTimelineRange()+expandTimeRange),
tmpStartTime,
s.GetTimelineRange() + expandTimeRange
}

// GetExpandRangeIndex 导出扩展的起始时间和结束的时间,整个多出的参数只适用于整体的字幕范围,局部不试用
Expand Down
48 changes: 33 additions & 15 deletions internal/pkg/sub_timeline_fixer/fixer.go
Expand Up @@ -380,16 +380,16 @@ func (s *SubTimelineFixer) GetOffsetTimeV2(infoBase, infoSrc *subparser.FileInfo
for _, srcSubUnit := range srcSubUnitList {

// 得到当前这个单元推算出来需要提取的字幕时间轴范围,这个是 Base Sub 使用的提取段
startTimeString, subLength := srcSubUnit.GetFFMPEGCutRangeString(ExpandTimeRange)
startTimeBaseString, subBaseLength, startTimeBaseTime, _ := srcSubUnit.GetFFMPEGCutRangeString(ExpandTimeRange)
// 导出当前的字幕文件适合与匹配的范围的临时字幕文件
nowTmpSubBaseFPath, errString, err := s.ffmpegHelper.ExportSubArgsByTimeRange(infoBase.FileFullPath, "base", startTimeString, subLength)
nowTmpSubBaseFPath, errString, err := s.ffmpegHelper.ExportSubArgsByTimeRange(infoBase.FileFullPath, "base", startTimeBaseString, subBaseLength)
if err != nil {
log_helper.GetLogger().Errorln("ExportSubArgsByTimeRange base", errString, err)
return false, 0, 0, err
}
// 导出当前的字幕文件适合与匹配的范围的临时字幕文件,这个是 Src Sub 使用的提取段
startTimeString, subLength = srcSubUnit.GetFFMPEGCutRangeString(0)
nowTmpSubSrcFPath, errString, err := s.ffmpegHelper.ExportSubArgsByTimeRange(infoSrc.FileFullPath, "src", startTimeString, subLength)
startTimeSrcString, subSrcLength, _, _ := srcSubUnit.GetFFMPEGCutRangeString(0)
nowTmpSubSrcFPath, errString, err := s.ffmpegHelper.ExportSubArgsByTimeRange(infoSrc.FileFullPath, "src", startTimeSrcString, subSrcLength)
if err != nil {
log_helper.GetLogger().Errorln("ExportSubArgsByTimeRange src", errString, err)
return false, 0, 0, err
Expand All @@ -403,6 +403,7 @@ func (s *SubTimelineFixer) GetOffsetTimeV2(infoBase, infoSrc *subparser.FileInfo
return false, 0, 0, errors.New("DetermineFileTypeFromFile == false")
}

// 这里比较特殊,因为读取的字幕文件是单独切割出来的,所以默认是有偏移的们需要使用不同的函数,把偏移算进去
nowTmpBaseSubUnitList, err := sub_helper.GetVADINfoFromSub(nowTmpSubBaseFileInfo, 0, 10000)
if err != nil {
return false, 0, 0, err
Expand Down Expand Up @@ -454,19 +455,36 @@ func (s *SubTimelineFixer) GetOffsetTimeV2(infoBase, infoSrc *subparser.FileInfo
correlationTM := treemap.NewWith(utils.Float64Comparator)
for i := 0; i < len(nowTmpBaseSubVADUnit.VADList); i++ {

correlation := CalculateCurveCorrelation(srcSubUnit.GetVADFloatSlice(), nowTmpBaseSubVADUnit.GetVADFloatSlice()[], len(srcSubUnit.VADList))
// 截取的长度是以当前 srcSubUnit 基准来判断的
// 类似滑动窗口的的功能实现
windowStartIndex := i
windowEndIndex := i + len(srcSubUnit.VADList)
if windowEndIndex >= len(nowTmpBaseSubVADUnit.VADList) {
break
}

correlation := CalculateCurveCorrelation(nowTmpBaseSubVADUnit.GetVADFloatSlice()[windowStartIndex:windowEndIndex], srcSubUnit.GetVADFloatSlice(), len(srcSubUnit.VADList))
correlationTM.Put(correlation, i)
println(fmt.Sprintf("%v - %v", i, correlation))
}
// 找到最大的数值和索引
_, tmpMaxIndex := correlationTM.Max() // tmpMaxCorrelation
tmpMaxCorrelation, tmpMaxIndex := correlationTM.Max() // tmpMaxCorrelation
if tmpMaxCorrelation == nil || tmpMaxIndex == nil {
continue
}
bok, nowBaseIndexTime := nowTmpBaseSubVADUnit.GetIndexTimeNumber(tmpMaxIndex.(int), true)
if bok == false {
continue
}
if tmpMaxCorrelation.(float64) <= MinCorrelation {
continue
}

nowSrcRealTime := srcSubUnit.GetStartTimeNumber(true)
// 时间差值
TimeDiffStart := nowBaseIndexTime - nowSrcRealTime
TimeDiffStart := nowBaseIndexTime + my_util.Time2SecendNumber(startTimeBaseTime) - nowSrcRealTime

println(fmt.Sprintf("%v <-> %v <-> %v", tmpMaxIndex, tmpMaxCorrelation, TimeDiffStart))

tmpStartDiffTime = append(tmpStartDiffTime, TimeDiffStart)
startDiffTimeList = append(startDiffTimeList, TimeDiffStart)
}
Expand Down Expand Up @@ -515,8 +533,8 @@ func (s *SubTimelineFixer) GetOffsetTimeV2(infoBase, infoSrc *subparser.FileInfo
newSd = oldSd
}

println(fmt.Sprintf("%v <-> %v <-> %v", oldMean, oldSd, per))
println(fmt.Sprintf("%v <-> %v <-> %v", newMean, newSd, per))
println(fmt.Sprintf("Old Mean: %v SD: %v Per: %v", oldMean, oldSd, per))
println(fmt.Sprintf("New Mean: %v SD: %v Per: %v", newMean, newSd, per))

return false, -1, -1, nil
}
Expand Down Expand Up @@ -547,7 +565,7 @@ func (s *SubTimelineFixer) GetOffsetTimeV3(audioInfo vad.AudioInfo, infoSrc *sub
// 开始针对对白单元进行匹配
for _, subUnit := range subUnitList {

startTimeString, subLength := subUnit.GetFFMPEGCutRangeString(ExpandTimeRange)
startTimeString, subLength, _, _ := subUnit.GetFFMPEGCutRangeString(ExpandTimeRange)
// 导出当前的音频文件适合与匹配的范围的临时音频文件
outAudioFPath, _, errString, err := s.ffmpegHelper.ExportAudioAndSubArgsByTimeRange(audioInfo.FileFullPath, infoSrc.FileFullPath, startTimeString, subLength)
if err != nil {
Expand Down Expand Up @@ -634,7 +652,7 @@ func (s *SubTimelineFixer) GetOffsetTimeV3(audioInfo vad.AudioInfo, infoSrc *sub
}

const FixMask = "-fix"
const FrontAndEndPer = 0.10 // 前百分之 15 和后百分之 15 都不进行识别
const SubUnitMaxCount = 20 // 一个 Sub单元有五句对白
const ExpandTimeRange = 40 // 从字幕的时间轴片段需要向前和向后多匹配一部分的音频,这里定义的就是这个 range 以分钟为单位, 正负 60 秒
const MinCorelation = 0.4 // 最低的匹配度
const FrontAndEndPer = 0.15 // 前百分之 15 和后百分之 15 都不进行识别
const SubUnitMaxCount = 50 // 一个 Sub单元有五句对白
const ExpandTimeRange = 50 // 从字幕的时间轴片段需要向前和向后多匹配一部分的音频,这里定义的就是这个 range 以分钟为单位, 正负 60 秒
const MinCorrelation = 0.8 // 最低的匹配度

0 comments on commit d5f2dcd

Please sign in to comment.