Skip to content

Commit

Permalink
新增,ffmpeg 导出 pcm 音频文件片段功能
Browse files Browse the repository at this point in the history
Signed-off-by: allan716 <525223688@qq.com>
  • Loading branch information
allanpk716 committed Nov 3, 2021
1 parent ff325ef commit 476935d
Show file tree
Hide file tree
Showing 5 changed files with 143 additions and 53 deletions.
Expand Up @@ -245,7 +245,7 @@ func (s SubTimelineFixerHelper) fixSubTimeline(enSubFile emby.SubInfo, containCh
if err != nil {
return false, nil, fixedSubName, err
}
bok, offsetTime, sd, err := s.subTimelineFixer.GetOffsetTime(infoBase, infoSrc, filepath.Join(cacheTmpPath, infoSrc.Name+"-bar.html"), filepath.Join(cacheTmpPath, infoSrc.Name+".log"))
bok, offsetTime, sd, err := s.subTimelineFixer.GetOffsetTimeV1(infoBase, infoSrc, filepath.Join(cacheTmpPath, infoSrc.Name+"-bar.html"), filepath.Join(cacheTmpPath, infoSrc.Name+".log"))
if offsetTime != 0 {
log_helper.GetLogger().Infoln(infoSrc.Name, "offset time is", fmt.Sprintf("%f", offsetTime), "s")
}
Expand Down
61 changes: 61 additions & 0 deletions internal/pkg/ffmpeg_helper/ffmpeg_helper.go
Expand Up @@ -102,6 +102,24 @@ func (f *FFMPEGHelper) GetFFMPEGInfo(videoFileFullPath string) (bool, *FFMPEGInf
return bok, ffMPEGInfo, nil
}

// ExportAudioArgsByTimeRange 根据输入的时间轴导出音频分段信息
func (f *FFMPEGHelper) ExportAudioArgsByTimeRange(audioFullPath string, startTimeString, timeLeng, outAudioFullPath string) (string, error) {

if pkg.IsFile(outAudioFullPath) == true {
err := os.Remove(outAudioFullPath)
if err != nil {
return "", err
}
}
args := f.getAudioExportArgsByTimeRange(audioFullPath, startTimeString, timeLeng, outAudioFullPath)
execFFMPEG, err := f.execFFMPEG(args)
if err != nil {
return execFFMPEG, err
}

return "", nil
}

// parseJsonString2GetFFMPEGInfo 使用 ffprobe 获取视频的 stream 信息,从中解析出字幕和音频的索引
func (f *FFMPEGHelper) parseJsonString2GetFFMPEGInfo(videoFileFullPath, inputFFProbeString string) (bool, *FFMPEGInfo) {

Expand Down Expand Up @@ -204,6 +222,7 @@ func (f *FFMPEGHelper) exportAudioAndSubtitles(subArgs, audioArgs []string) (str
return "", nil
}

// execFFMPEG 执行 ffmpeg 命令
func (f *FFMPEGHelper) execFFMPEG(cmds []string) (string, error) {

cmd := exec.Command("ffmpeg", cmds...)
Expand All @@ -223,6 +242,7 @@ func (f *FFMPEGHelper) execFFMPEG(cmds []string) (string, error) {
return "", nil
}

// getAudioAndSubExportArgs 构建从原始视频导出字幕、音频的 ffmpeg 的参数
func (f *FFMPEGHelper) getAudioAndSubExportArgs(videoFileFullPath string, ffmpegInfo *FFMPEGInfo) ([]string, []string) {

/*
Expand Down Expand Up @@ -263,12 +283,53 @@ func (f *FFMPEGHelper) getAudioAndSubExportArgs(videoFileFullPath string, ffmpeg
return audioArgs, subArgs
}

// getAudioAndSubExportArgsByTimeRange 导出某个时间范围内的音频和字幕文件文件 startTimeString 00:1:27 timeLeng 向后多少秒
func (f *FFMPEGHelper) getAudioExportArgsByTimeRange(audioFullPath string, startTimeString, timeLeng, outAudioFullPath string) []string {

/*
ffmpeg.exe -ar 16000 -ac 1 -f s16le -i aa.pcm -ss 00:1:27 -t 28 -acodec pcm_s16le -f s16le -ac 1 -ar 16000 bb.pcm
*/

var audioArgs = make([]string, 0)
// 指定读取的音频文件编码格式
audioArgs = append(audioArgs, "-ar")
audioArgs = append(audioArgs, "16000")
audioArgs = append(audioArgs, "-ac")
audioArgs = append(audioArgs, "1")
audioArgs = append(audioArgs, "-f")
audioArgs = append(audioArgs, "s16le")

audioArgs = append(audioArgs, "-i")
audioArgs = append(audioArgs, audioFullPath)
audioArgs = append(audioArgs, "-ss")
audioArgs = append(audioArgs, startTimeString)
audioArgs = append(audioArgs, "-t")
audioArgs = append(audioArgs, timeLeng)

// 指定导出的音频文件编码格式
audioArgs = append(audioArgs, "-acodec")
audioArgs = append(audioArgs, "pcm_s16le")
audioArgs = append(audioArgs, "-f")
audioArgs = append(audioArgs, "s16le")
audioArgs = append(audioArgs, "-ac")
audioArgs = append(audioArgs, "1")
audioArgs = append(audioArgs, "-ar")
audioArgs = append(audioArgs, "16000")

audioArgs = append(audioArgs, outAudioFullPath)

return audioArgs
}

// addSubMapArg 构建字幕的导出参数
func (f *FFMPEGHelper) addSubMapArg(subArgs *[]string, index int, subSaveFullPath string) {
*subArgs = append(*subArgs, "-map")
*subArgs = append(*subArgs, fmt.Sprintf("0:%d", index))
*subArgs = append(*subArgs, subSaveFullPath)
}

// addAudioMapArg 构建音频的导出参数
func (f *FFMPEGHelper) addAudioMapArg(subArgs *[]string, index int, audioSaveFullPath string) {
// -acodec pcm_s16le -f s16le -ac 1 -ar 16000
*subArgs = append(*subArgs, "-map")
Expand Down
16 changes: 16 additions & 0 deletions internal/pkg/ffmpeg_helper/ffmpeg_helper_test.go
Expand Up @@ -71,3 +71,19 @@ func Test_parseJsonString2GetFFMPEGInfo(t *testing.T) {
})
}
}

func TestFFMPEGHelper_ExportAudioArgsByTimeRange(t *testing.T) {

audioFullPath := "C:\\Tmp\\Rick and Morty - S05E10\\英_1.pcm"
startTimeString := "0:1:27"
timeLeng := "28"
outAudioFullPath := "C:\\Tmp\\Rick and Morty - S05E10\\英_1_cut.pcm"

f := NewFFMPEGHelper()

timeRange, err := f.ExportAudioArgsByTimeRange(audioFullPath, startTimeString, timeLeng, outAudioFullPath)
if err != nil {
println(timeRange)
t.Fatal(err)
}
}
107 changes: 60 additions & 47 deletions internal/pkg/sub_timeline_fixer/fixer.go
Expand Up @@ -5,6 +5,7 @@ import (
"github.com/allanpk716/ChineseSubFinder/internal/common"
"github.com/allanpk716/ChineseSubFinder/internal/pkg"
"github.com/allanpk716/ChineseSubFinder/internal/pkg/log_helper"
"github.com/allanpk716/ChineseSubFinder/internal/pkg/vad"
"github.com/allanpk716/ChineseSubFinder/internal/types/sub_timeline_fiexer"
"github.com/allanpk716/ChineseSubFinder/internal/types/subparser"
"github.com/go-echarts/go-echarts/v2/opts"
Expand Down Expand Up @@ -57,8 +58,62 @@ func (s *SubTimelineFixer) StopWordCounter(inString string, per int) []string {
return stopWords
}

// GetOffsetTime 暂时只支持英文的基准字幕,源字幕必须是双语中英字幕
func (s *SubTimelineFixer) GetOffsetTime(infoBase, infoSrc *subparser.FileInfo, staticLineFileSavePath string, debugInfoFileSavePath string) (bool, float64, float64, error) {
// FixSubTimeline 校正时间轴
func (s *SubTimelineFixer) FixSubTimeline(infoSrc *subparser.FileInfo, inOffsetTime float64, desSaveSubFileFullPath string) (string, error) {

/*
从解析的实例中,正常来说是可以匹配出所有的 Dialogue 对话的 Start 和 End time 的信息
然后找到对应的字幕的文件,进行文件内容的替换来做时间轴的校正
*/
// 偏移时间
offsetTime := time.Duration(inOffsetTime*1000) * time.Millisecond
timeFormat := ""
if infoSrc.Ext == common.SubExtASS || infoSrc.Ext == common.SubExtSSA {
timeFormat = common.TimeFormatAss
} else {
timeFormat = common.TimeFormatSrt
}
fixContent := infoSrc.Content
for _, srcOneDialogue := range infoSrc.Dialogues {

timeStart, err := time.Parse(timeFormat, srcOneDialogue.StartTime)
if err != nil {
return "", err
}
timeEnd, err := time.Parse(timeFormat, srcOneDialogue.EndTime)
if err != nil {
return "", err
}

fixTimeStart := timeStart.Add(offsetTime)
fixTimeEnd := timeEnd.Add(offsetTime)

fixContent = strings.ReplaceAll(fixContent, srcOneDialogue.StartTime, fixTimeStart.Format(timeFormat))
fixContent = strings.ReplaceAll(fixContent, srcOneDialogue.EndTime, fixTimeEnd.Format(timeFormat))
}

dstFile, err := os.Create(desSaveSubFileFullPath)
if err != nil {
return "", err
}
defer func() {
_ = dstFile.Close()
}()
_, err = dstFile.WriteString(fixContent)
if err != nil {
return "", err
}
return fixContent, nil
}

/*
对于 V1 版本的字幕时间轴校正来说,是有特殊的前置要求的
1. 视频要有英文字幕
2. 外置的字幕必须是中文的双语字幕(简英、繁英)
*/

// GetOffsetTimeV1 暂时只支持英文的基准字幕,源字幕必须是双语中英字幕
func (s *SubTimelineFixer) GetOffsetTimeV1(infoBase, infoSrc *subparser.FileInfo, staticLineFileSavePath string, debugInfoFileSavePath string) (bool, float64, float64, error) {

var debugInfos = make([]string, 0)
// 构建基准语料库,目前阶段只需要考虑是 En 的就行了
Expand Down Expand Up @@ -314,52 +369,10 @@ func (s *SubTimelineFixer) GetOffsetTime(infoBase, infoSrc *subparser.FileInfo,
return true, newMean, newSd, nil
}

// FixSubTimeline 校正时间轴
func (s *SubTimelineFixer) FixSubTimeline(infoSrc *subparser.FileInfo, inOffsetTime float64, desSaveSubFileFullPath string) (string, error) {

/*
从解析的实例中,正常来说是可以匹配出所有的 Dialogue 对话的 Start 和 End time 的信息
然后找到对应的字幕的文件,进行文件内容的替换来做时间轴的校正
*/
// 偏移时间
offsetTime := time.Duration(inOffsetTime*1000) * time.Millisecond
timeFormat := ""
if infoSrc.Ext == common.SubExtASS || infoSrc.Ext == common.SubExtSSA {
timeFormat = common.TimeFormatAss
} else {
timeFormat = common.TimeFormatSrt
}
fixContent := infoSrc.Content
for _, srcOneDialogue := range infoSrc.Dialogues {
// GetOffsetTimeV2 使用 VAD 检测语音是否有人声,输出连续的点标记,再通过 SimHash 进行匹配,找到最佳的偏移时间
func (s *SubTimelineFixer) GetOffsetTimeV2(audioInfo vad.AudioInfo, infoSrc *subparser.FileInfo, staticLineFileSavePath string, debugInfoFileSavePath string) error {

timeStart, err := time.Parse(timeFormat, srcOneDialogue.StartTime)
if err != nil {
return "", err
}
timeEnd, err := time.Parse(timeFormat, srcOneDialogue.EndTime)
if err != nil {
return "", err
}

fixTimeStart := timeStart.Add(offsetTime)
fixTimeEnd := timeEnd.Add(offsetTime)

fixContent = strings.ReplaceAll(fixContent, srcOneDialogue.StartTime, fixTimeStart.Format(timeFormat))
fixContent = strings.ReplaceAll(fixContent, srcOneDialogue.EndTime, fixTimeEnd.Format(timeFormat))
}

dstFile, err := os.Create(desSaveSubFileFullPath)
if err != nil {
return "", err
}
defer func() {
_ = dstFile.Close()
}()
_, err = dstFile.WriteString(fixContent)
if err != nil {
return "", err
}
return fixContent, nil
return nil
}

const FixMask = "-fix"
10 changes: 5 additions & 5 deletions internal/pkg/sub_timeline_fixer/fixer_test.go
Expand Up @@ -306,20 +306,20 @@ func TestGetOffsetTime(t *testing.T) {
*/
sub_helper.MergeMultiDialogue4EngSubtitle(infoSrc)

bok, got, sd, err := s.GetOffsetTime(infoBase, infoSrc, tt.args.ch_enSubFile+"-bar.html", tt.args.ch_enSubFile+".log")
bok, got, sd, err := s.GetOffsetTimeV1(infoBase, infoSrc, tt.args.ch_enSubFile+"-bar.html", tt.args.ch_enSubFile+".log")
if (err != nil) != tt.wantErr {
t.Errorf("GetOffsetTime() error = %v, wantErr %v", err, tt.wantErr)
t.Errorf("GetOffsetTimeV1() error = %v, wantErr %v", err, tt.wantErr)
return
}

// 在一个正负范围内都可以接受
if got > tt.want-0.1 && got < tt.want+0.1 {

} else {
t.Errorf("GetOffsetTime() got = %v, want %v", got, tt.want)
t.Errorf("GetOffsetTimeV1() got = %v, want %v", got, tt.want)
}
//if got != tt.want {
// t.Errorf("GetOffsetTime() got = %v, want %v", got, tt.want)
// t.Errorf("GetOffsetTimeV1() got = %v, want %v", got, tt.want)
//}

if bok == true && got != 0 {
Expand All @@ -329,7 +329,7 @@ func TestGetOffsetTime(t *testing.T) {
}
}

println(fmt.Sprintf("GetOffsetTime: %fs SD:%f", got, sd))
println(fmt.Sprintf("GetOffsetTimeV1: %fs SD:%f", got, sd))
})
}
}
Expand Down

0 comments on commit 476935d

Please sign in to comment.