## 切分音频

In [1]:
from pydub import AudioSegment
import math
import os

# 指定FFmpeg的路径
ffmpeg_path = r"E:\Software\ffmpeg-master-latest-win64-gpl\bin"
os.environ["PATH"] += os.pathsep + ffmpeg_path

def split_audio(file_path, segment_length_ms):
    audio = AudioSegment.from_file(file_path)
    duration_ms = len(audio)
    segments_count = math.ceil(duration_ms / segment_length_ms)

    for i in range(segments_count):
        start_ms = i * segment_length_ms
        end_ms = min((i + 1) * segment_length_ms, duration_ms)
        segment = audio[start_ms:end_ms]
        segment.export(f"segment_{i}.mp3", format="mp3")

# 使用示例
file_path = "FC2PPV-2953018.mp3"  # 你的音频文件路径
segment_length_ms = 20 * 60 * 1000  # 每个片段的长度，这里设置为30秒
split_audio(file_path, segment_length_ms)

## 生成字幕

In [2]:
def generate_corrected_transcript(temperature, system_prompt, text):
    response = client.chat.completions.create(
        model="gpt-3.5-turbo-16k",
        temperature=temperature,
        messages=[
            {
                "role": "system",
                "content": system_prompt
            },
            {
                "role": "user",
                "content":  text
            }
        ],
    )
    # print(response)
    # print(type(response))
    return response.choices[0].message.content

In [16]:
from openai import OpenAI
import os

# os.environ['OPENAI_BASE_URL'] = 'https://www.api.rovy.me/v1'
os.environ['OPENAI_API_KEY'] = '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'

# 音频翻译成英文
from openai import OpenAI
client = OpenAI()

import glob

# 获取当前目录下所有以 'segment_' 开头的 .mp3 文件
segment_files = glob.glob('segment_*.mp3')

isTranslate = True
# 遍历文件列表
for filename in segment_files:
    with open(filename, "rb") as audio_file:
        translate = client.audio.translations.create(
            model="whisper-1",
            file=audio_file,
            response_format="srt",
        )
        # 根据音频文件名生成SRT文件名
    srt_filename = filename.replace('.mp3', '.srt')


    # 存储翻译结果到SRT文件
    with open(srt_filename, "w") as f:
        f.write(translate)

    print(f"Translation for {filename} saved to {srt_filename}")
    try:
        if isTranslate:
            system_prompt = "You are a helpful assistant for processing transcript. Your task is to translate the following text to Chinese. Additionally, pay attention to capturing the emotional exchanges and tone variations between characters, ensuring the translation is faithful to the source material while also conveying the same emotions and narrative atmosphere to an Chinese-speaking audience.The output must strictly follow the srt format."
            translate_cn = generate_corrected_transcript(0, system_prompt, translate)
            srt_filename_cn = filename.replace('.mp3', '_cn.srt')
            with open('./cn/' + srt_filename_cn, "w", encoding='UTF-8') as f:
                f.write(translate_cn)
                print(f"{srt_filename} -- 英文字幕转中文字幕成功")
    except:
        print(f"{srt_filename} -- 英文字幕转中文字幕失败")

Translation for segment_0.mp3 saved to segment_0.srt
<class 'openai.types.chat.chat_completion.ChatCompletion'>
segment_0.srt -- 英文字幕转中文字幕成功
Translation for segment_1.mp3 saved to segment_1.srt
<class 'openai.types.chat.chat_completion.ChatCompletion'>
segment_1.srt -- 英文字幕转中文字幕成功
Translation for segment_2.mp3 saved to segment_2.srt
<class 'openai.types.chat.chat_completion.ChatCompletion'>
segment_2.srt -- 英文字幕转中文字幕成功


## 合成字幕文件

In [23]:
import re
from datetime import datetime, timedelta
import glob

def str_to_timedelta(time_str):
    return datetime.strptime(time_str, '%H:%M:%S,%f') - datetime(1900, 1, 1)

def timedelta_to_str(time_delta):
    hours, remainder = divmod(time_delta.total_seconds(), 3600)
    minutes, seconds = divmod(remainder, 60)
    return "{:02}:{:02}:{:06.3f}".format(int(hours), int(minutes), seconds).replace('.', ',')

def adjust_subtitle_time(subtitle, time_offset):
    try:
        time_pattern = re.compile(r'(\d{2}:\d{2}:\d{2},\d{3}) --> (\d{2}:\d{2}:\d{2},\d{3})')
        start_time_str, end_time_str = time_pattern.search(subtitle).groups()
        start_time = str_to_timedelta(start_time_str) + time_offset
        end_time = str_to_timedelta(end_time_str) + time_offset
        adjusted_subtitle = re.sub(time_pattern, f'{timedelta_to_str(start_time)} --> {timedelta_to_str(end_time)}', subtitle)
        return adjusted_subtitle
    except:
        return subtitle

def merge_subtitles(subtitle_files, video_durations):
    merged_subtitle = ""
    time_offset = timedelta()
    subtitle_index = 1

    for subtitle_file, duration in zip(subtitle_files, video_durations):
        # print(subtitle_file, duration)
        with open(subtitle_file, 'r', encoding='utf-8') as f:  # Use UTF-8 encoding
            subtitles = f.read().strip().split('\n\n')
            for subtitle in subtitles:
                print(subtitle)
                if not subtitle.strip():
                    continue
                adjusted_subtitle = adjust_subtitle_time(subtitle, time_offset)
                adjusted_subtitle = re.sub(r'^\d+', str(subtitle_index), adjusted_subtitle, count=1)
                merged_subtitle += adjusted_subtitle + "\n\n"
                subtitle_index += 1
        time_offset += timedelta(seconds=duration)

    return merged_subtitle.strip()

# 示例使用 glob 查找文件和假定的视频持续时间
subtitle_files = glob.glob('segment*.srt')
subtitle_files.sort()  # 确保按顺序处理文件

video_durations = [20 * 60 for _ in subtitle_files]  # 假设每段视频20分钟

merged_subtitle = merge_subtitles(subtitle_files, video_durations)

# 保存合并后的字幕文件
with open('merged_subtitle-0207.srt', 'w', encoding='utf-8') as f:
    f.write(merged_subtitle)

if isTranslate:
    subtitle_files_cn = glob.glob('./cn/segment*.srt')
    subtitle_files_cn.sort()  # 确保按顺序处理文件

    video_durations_cn = [20 * 60 for _ in subtitle_files_cn]  # 假设每段视频20分钟
    # print(subtitle_files_cn)
    # print(video_durations_cn)
    merged_subtitle_cn = merge_subtitles(subtitle_files_cn, video_durations_cn)

    # 保存合并后的字幕文件
    with open('./cn/merged_subtitle-0207_cn.srt', 'w', encoding='utf-8') as f:
        f.write(merged_subtitle_cn)
try:
    if isTranslate:
        subtitle_files_cn = glob.glob('./cn/segment*.srt')
        subtitle_files_cn.sort()  # 确保按顺序处理文件

        video_durations_cn = [20 * 60 for _ in subtitle_files_cn]  # 假设每段视频20分钟

        merged_subtitle_cn = merge_subtitles(subtitle_files_cn, video_durations_cn)

        # 保存合并后的字幕文件
        with open('./cn/merged_subtitle-0207_cn.srt', 'w', encoding='utf-8') as f:
            f.write(merged_subtitle_cn)
except Exception as E:
    # 打印错误信息
    print(E)
    print(f"中文字幕合并失败")

1
00:00:00,000 --> 00:00:22,400
I'm.
2
00:00:22,400 --> 00:00:39,759
He has a brown nose.
3
00:00:39,759 --> 00:00:47,919
Now let's ask him a few questions.
4
00:00:47,919 --> 00:00:54,919
First, let's ask him how he feels about his new hair color.
5
00:00:54,919 --> 00:01:01,919
I'm not sure how he feels about his new hair color, so I'm going to ask him a few questions.
6
00:01:01,919 --> 00:01:06,919
First, let's ask him how he feels about his new hair color.
7
00:01:06,919 --> 00:01:07,919
Hello.
8
00:01:07,919 --> 00:01:08,919
What?
9
00:01:08,919 --> 00:01:09,919
Hello.
10
00:01:09,919 --> 00:01:10,919
Hello.
11
00:01:10,919 --> 00:01:18,919
You always look beautiful.
12
00:01:18,919 --> 00:01:20,919
Thank you.
13
00:01:20,919 --> 00:01:22,919
Do people say that a lot?
14
00:01:22,919 --> 00:01:24,919
No, they don't.
15
00:01:24,919 --> 00:01:25,919
They don't?
16
00:01:25,919 --> 00:01:26,919
No.
17
00:01:26,919 --> 00:01:33,919
What did you come here for today?
18
00:01:33,919 -