Skip to content

Commit

Permalink
复现 ffsubsync 的 FFTAligner 逻辑ing
Browse files Browse the repository at this point in the history
Signed-off-by: allan716 <525223688@qq.com>
  • Loading branch information
allanpk716 committed Nov 24, 2021
1 parent 9dbc6eb commit 5fe6ad2
Show file tree
Hide file tree
Showing 8 changed files with 396 additions and 186 deletions.
12 changes: 6 additions & 6 deletions go.mod
Expand Up @@ -52,9 +52,9 @@ require (
github.com/tidwall/gjson v1.9.4
github.com/ulikunitz/xz v0.5.10 // indirect
github.com/ysmood/gson v0.7.0 // indirect
golang.org/x/net v0.0.0-20210614182718-04defd469f4e
golang.org/x/sys v0.0.0-20210616094352-59db8d763f22 // indirect
golang.org/x/text v0.3.6
golang.org/x/net v0.0.0-20211015210444-4f30a5c0130f
golang.org/x/sys v0.0.0-20211019181941-9d821ace8654 // indirect
golang.org/x/text v0.3.7
gonum.org/v1/gonum v0.9.3
gopkg.in/check.v1 v1.0.0-20201130134442-10cb98267c6c // indirect
gopkg.in/errgo.v2 v2.1.0
Expand Down Expand Up @@ -94,9 +94,9 @@ require (
github.com/ysmood/goob v0.3.0 // indirect
github.com/ysmood/leakless v0.7.0 // indirect
go4.org v0.0.0-20200411211856-f5505b9728dd // indirect
golang.org/x/exp v0.0.0-20210220032938-85be41e4509f // indirect
golang.org/x/mod v0.3.1-0.20200828183125-ce943fd02449 // indirect
golang.org/x/tools v0.0.0-20201124115921-2c860bdd6e78 // indirect
golang.org/x/exp v0.0.0-20211123021643-48cbe7f80d7c // indirect
golang.org/x/mod v0.6.0-dev.0.20211013180041-c96bc1413d57 // indirect
golang.org/x/tools v0.1.8-0.20211029000441-d6a9af8af023 // indirect
golang.org/x/xerrors v0.0.0-20200804184101-5ec99f83aff1 // indirect
lukechampine.com/uint128 v1.1.1 // indirect
modernc.org/cc/v3 v3.33.7 // indirect
Expand Down
203 changes: 195 additions & 8 deletions go.sum

Large diffs are not rendered by default.

4 changes: 2 additions & 2 deletions internal/pkg/sub_helper/key_features.go
Expand Up @@ -7,8 +7,8 @@ type KeyFeatures struct {
Small Feature // 小锯齿
}

func NewKeyFeatures(big, middle, small Feature) *KeyFeatures {
return &KeyFeatures{
func NewKeyFeatures(big, middle, small Feature) KeyFeatures {
return KeyFeatures{
big, middle, small,
}
}
Expand Down
37 changes: 13 additions & 24 deletions internal/pkg/sub_helper/sub_helper.go
Expand Up @@ -369,21 +369,20 @@ func MergeMultiDialogue4EngSubtitle(inSubParser *subparser.FileInfo) {
inSubParser.DialoguesEx = merger.Get()
}

// GetVADINfoFromSub 跟下面的 GetVADINfoFromSubNeedOffsetTimeWillInsert 函数功能一致
func GetVADINfoFromSub(infoSrc *subparser.FileInfo, FrontAndEndPer float64, SubUnitMaxCount int, insert bool, kf *KeyFeatures) ([]SubUnit, error) {
// GetVADInfoFeatureFromSub 跟下面的 GetVADInfoFeatureFromSubNeedOffsetTimeWillInsert 函数功能一致
func GetVADInfoFeatureFromSub(infoSrc *subparser.FileInfo, FrontAndEndPer float64, SubUnitMaxCount int, insert bool, kf KeyFeatures) ([]SubUnit, error) {

return GetVADINfoFromSubNeedOffsetTimeWillInsert(infoSrc, FrontAndEndPer, SubUnitMaxCount, 0, insert, kf)
return GetVADInfoFeatureFromSubNeedOffsetTimeWillInsert(infoSrc, FrontAndEndPer, SubUnitMaxCount, 0, insert, kf)
}

/*
GetVADINfoFromSubNeedOffsetTimeWillInsert 只不过这里可以加一个每一句话固定的偏移时间
GetVADInfoFeatureFromSubNeedOffsetTimeWillInsert 只不过这里可以加一个每一句话固定的偏移时间
这里的字幕要求是完整的一个字幕
1. 抽取字幕的时间片段的时候,暂定,前 15% 和后 15% 要避开,前奏、主题曲、结尾曲
2. 将整个字幕,抽取连续 5 句对话为一个单元,提取时间片段信息
3. 可能还有一个需求,默认的模式是每五句话一个单元,还有一种模式是每一句话向后找到连续的四句话组成一个单元,允许重叠
目前看到的情况是前者的抽样率太低,需要使用后者的逻辑
3. 这里抽取的是特征,也就有额外的逻辑去找这个特征(本程序内会描述为“钥匙”)
*/
func GetVADINfoFromSubNeedOffsetTimeWillInsert(infoSrc *subparser.FileInfo, SkipFrontAndEndPer float64, SubUnitMaxCount int, offsetTime float64, insert bool, kf *KeyFeatures) ([]SubUnit, error) {
func GetVADInfoFeatureFromSubNeedOffsetTimeWillInsert(infoSrc *subparser.FileInfo, SkipFrontAndEndPer float64, SubUnitMaxCount int, offsetTime float64, insert bool, kf KeyFeatures) ([]SubUnit, error) {
if SubUnitMaxCount < 0 {
SubUnitMaxCount = 0
}
Expand Down Expand Up @@ -447,21 +446,14 @@ func GetVADINfoFromSubNeedOffsetTimeWillInsert(infoSrc *subparser.FileInfo, Skip
// 然后重置
srcOneSubUnit = NewSubUnit()
// TODO 这里决定了插入数据的密度,有待测试
//i = i - SubUnitMaxCount
if kf == nil {
// 走原始的逻辑 i 的赋值逻辑跟之前一样,需要每一次进一步,也就是有重叠的部分出现
//i = i - SubUnitMaxCount + SubUnitMaxCount/5
//i = i - SubUnitMaxCount + SubUnitMaxCount/2
// i = i - SubUnitMaxCount
/*
确认
*/
if tmpNowMatchKey == false {
i = i - SubUnitMaxCount
} else {
if tmpNowMatchKey == false {
// 走原始的逻辑 i 的赋值逻辑跟之前一样,需要每一次进一步,也就是有重叠的部分出现
i = i - SubUnitMaxCount
} else {
// 判断了“钥匙”特征,且通过了
// i 需要跳过当前已经覆盖的段
i = i - SubUnitMaxCount + SubUnitMaxCount/2
}
i = i - SubUnitMaxCount/2
}
}
}
Expand All @@ -473,11 +465,8 @@ func GetVADINfoFromSubNeedOffsetTimeWillInsert(infoSrc *subparser.FileInfo, Skip
}

// IsMatchKey 是否符合“钥匙”的标准
func IsMatchKey(srcSubDialogueList []subparser.OneDialogueEx, kf *KeyFeatures) bool {
func IsMatchKey(srcSubDialogueList []subparser.OneDialogueEx, kf KeyFeatures) bool {

if kf == nil {
return false
}
/*
这里是设置主要依赖的还是数据源,源必须有足够的对白(暂定 50 句),才可能找到这么多信息
这里需要匹配的“钥匙”特征,先简单实现为 (这三个需要不交叉时间段)
Expand Down
19 changes: 13 additions & 6 deletions internal/pkg/sub_helper/sub_unit.go
Expand Up @@ -143,7 +143,7 @@ func (s *SubUnit) GetVADFloatSlice() []float64 {
if s.VADList[i].Active == true {
s.outVADFloats[i] = 1
} else {
s.outVADFloats[i] = 0
s.outVADFloats[i] = -1
}
}
}
Expand Down Expand Up @@ -272,9 +272,9 @@ func (s SubUnit) RealTimeToOffsetTime(realTime time.Time) time.Time {
}

// Save2Txt 导出为 float64 的内容
func (s SubUnit) Save2Txt(outFileFPath string) error {
func (s SubUnit) Save2Txt(outFileFPath string, oneLine bool) error {

file, err := os.OpenFile(outFileFPath, os.O_WRONLY|os.O_CREATE, 0666)
file, err := os.OpenFile(outFileFPath, os.O_WRONLY|os.O_CREATE|os.O_TRUNC, 0666)
if err != nil {
return err
}
Expand All @@ -286,9 +286,16 @@ func (s SubUnit) Save2Txt(outFileFPath string) error {
if s.VADList[i].Active == true {
active = 1.0
}
_, err = write.WriteString(fmt.Sprintf("%v\n", active))
if err != nil {
return err
if oneLine == true {
_, err = write.WriteString(fmt.Sprintf("%v", active))
if err != nil {
return err
}
} else {
_, err = write.WriteString(fmt.Sprintf("%v\n", active))
if err != nil {
return err
}
}
}
err = write.Flush()
Expand Down
67 changes: 67 additions & 0 deletions internal/pkg/sub_timeline_fixer/fft_aligner.go
@@ -0,0 +1,67 @@
package sub_timeline_fixer

import (
"gonum.org/v1/gonum/cmplxs"
"gonum.org/v1/gonum/dsp/fourier"
"gonum.org/v1/gonum/floats"
"math"
)

type FFTAligner struct {
}

func (f FFTAligner) fit(refFloats, subFloats []float64) {

// 先初始化一个 fft 共用实例
fftIns := fourier.NewFFT(1000)
// 计算出一维矩阵的长度
total_bits := math.Log2(float64(len(refFloats)) + float64(len(subFloats)))
total_length := int(math.Pow(2, math.Ceil(total_bits)))
// 需要补零的个数
extra_zeros := total_length - len(refFloats) - len(subFloats)
// 2 的倍数长度
power2Len := extra_zeros + len(refFloats) + len(subFloats)
// ----------------------------------------------------------
// 对于 sub 需要在前面补零
power2Sub := make([]float64, power2Len)
for i := 0; i < extra_zeros+len(refFloats); i++ {
power2Sub[i] = 0
}
for i := 0; i < len(subFloats); i++ {
power2Sub[extra_zeros+len(subFloats)+i] = subFloats[i]
}
// "github.com/brettbuddin/fourier"
//subFT := fourier.Forward()
fftIns.Reset(len(power2Sub))
subFT := fftIns.Coefficients(nil, power2Sub)
// ----------------------------------------------------------
// 对于 ref 需要在后面补零
power2Ref := make([]float64, power2Len)
for i := 0; i < len(refFloats); i++ {
power2Ref[i] = refFloats[i]
}
for i := 0; i < extra_zeros+len(subFloats); i++ {
power2Ref[len(refFloats)+i] = 0
}
// 反转 power2Ref 0, 1,1,0,0 -> 0,0,1,1,0
for i, j := 0, len(power2Ref)-1; i < j; i, j = i+1, j-1 {
power2Ref[i], power2Ref[j] = power2Ref[j], power2Ref[i]
}
fftIns.Reset(len(power2Ref))
refFT := fftIns.Coefficients(nil, power2Ref)
// ----------------------------------------------------------
// 先计算 subFT * refFT,结果放置在 refFT
cmplxs.Mul(refFT, subFT)
// 然后执行 numpy 的 ifft 操作
gotRefFT := fftIns.Sequence(nil, refFT)
floats.Scale(1/float64(len(power2Ref)), gotRefFT)

//refFloatsVec := mat.NewVecDense(len(refFloats), refFloats)
//subFloatsVec := mat.NewVecDense(len(subFloats), subFloats)
println("d")
//a := mat.NewVecDense(extra_zeros+refFloatsVec.Len(), nil)
}

func float642comolex() {

}
25 changes: 25 additions & 0 deletions internal/pkg/sub_timeline_fixer/fft_aligner_test.go
@@ -0,0 +1,25 @@
package sub_timeline_fixer

import "testing"

func TestFFTAligner_fit(t *testing.T) {
type args struct {
refFloats []float64
subFloats []float64
}
tests := []struct {
name string
args args
}{
{name: "00", args: args{
refFloats: []float64{1, 1, 1, 1, -1, -1, 1},
subFloats: []float64{-1, 1, 1, -1, -1, 1, -1},
}},
}
for _, tt := range tests {
t.Run(tt.name, func(t *testing.T) {
f := FFTAligner{}
f.fit(tt.args.refFloats, tt.args.subFloats)
})
}
}

0 comments on commit 5fe6ad2

Please sign in to comment.