## 切分音频

In [19]:
from pydub import AudioSegment
import math
import os
import glob
import sys

# 指定FFmpeg的路径
ffmpeg_path = r"E:\Software\ffmpeg-master-latest-win64-gpl\bin"
os.environ["PATH"] += os.pathsep + ffmpeg_path

def split_audio(file_path, segment_length_ms):
    audio = AudioSegment.from_file(file_path)
    duration_ms = len(audio)
    segments_count = math.ceil(duration_ms / segment_length_ms)

    for i in range(segments_count):
        start_ms = i * segment_length_ms
        end_ms = min((i + 1) * segment_length_ms, duration_ms)
        segment = audio[start_ms:end_ms]
        segment.export(f"segment_{i}.mp3", format="mp3")

# 使用示例
# file_path = "name.mp3"  # 你的音频文件路径
# 获取当前目录下所有的.mp3文件
mp3_files = glob.glob('./*.mp3')

# 检查是否至少有一个.mp3文件
if mp3_files:
    # 只取第一个.mp3文件
    file_path = mp3_files[0]
    # 去掉文件路径前面的 './'
    # file_path = file_path.lstrip('./')
    # 打印输出，正在处理名为 file_path 的文件
    print(f"正在处理名为 {file_path} 的文件")
else:
    print("没有找到任何.mp3文件")
    file_path = None
    # 当需要停止程序时
    sys.exit()
segment_length_ms = 20 * 60 * 1000  # 每个片段的长度，这里设置为30秒
split_audio(file_path, segment_length_ms)

正在处理名为 .\rsrvr-015-1.mp3 的文件


## 生成字幕

In [20]:
def generate_corrected_transcript(temperature, system_prompt, text):
    response = client.chat.completions.create(
        model="gpt-3.5-turbo-16k",
        temperature=temperature,
        messages=[
            {
                "role": "system",
                "content": system_prompt
            },
            {
                "role": "user",
                "content":  text
            }
        ],
    )
    # print(response)
    # print(type(response))
    return response.choices[0].message.content

In [21]:
from openai import OpenAI
import os

# os.environ['OPENAI_BASE_URL'] = 'https://www.api.rovy.me/v1'
os.environ['OPENAI_API_KEY'] = 'sess-G6DmleCTMjGfHzTseKQzK7Id7yLmgdOkw8Byn2cH'

# 音频翻译成英文
from openai import OpenAI
client = OpenAI()

import glob

# 获取当前目录下所有以 'segment_' 开头的 .mp3 文件
segment_files = glob.glob('segment_*.mp3')

isTranslate = False
# 遍历文件列表
for filename in segment_files:
    with open(filename, "rb") as audio_file:
        translate = client.audio.translations.create(
            model="whisper-1",
            file=audio_file,
            response_format="srt",
        )
        # 根据音频文件名生成SRT文件名
    srt_filename = filename.replace('.mp3', '.srt')


    # 存储翻译结果到SRT文件
    with open(srt_filename, "w") as f:
        f.write(translate)

    print(f"Translation for {filename} saved to {srt_filename}")
    try:
        if isTranslate:
            system_prompt = "You are a helpful assistant for processing transcript. Your task is to translate the following text to Chinese. Additionally, pay attention to capturing the emotional exchanges and tone variations between characters, ensuring the translation is faithful to the source material while also conveying the same emotions and narrative atmosphere to an Chinese-speaking audience. Do not miss any content of the original text, translate everything. The output must strictly follow the srt format."
            translate_cn = generate_corrected_transcript(0, system_prompt, translate)
            srt_filename_cn = filename.replace('.mp3', '_cn.srt')
            with open('./cn/' + srt_filename_cn, "w", encoding='UTF-8') as f:
                f.write(translate_cn)
                print(f"{srt_filename} -- 英文字幕转中文字幕成功")
    except:
        print(f"{srt_filename} -- 英文字幕转中文字幕失败")

Translation for segment_0.mp3 saved to segment_0.srt
Translation for segment_1.mp3 saved to segment_1.srt


## 合成字幕文件

In [22]:
import re
from datetime import datetime, timedelta
import glob

def str_to_timedelta(time_str):
    return datetime.strptime(time_str, '%H:%M:%S,%f') - datetime(1900, 1, 1)

def timedelta_to_str(time_delta):
    hours, remainder = divmod(time_delta.total_seconds(), 3600)
    minutes, seconds = divmod(remainder, 60)
    return "{:02}:{:02}:{:06.3f}".format(int(hours), int(minutes), seconds).replace('.', ',')

def adjust_subtitle_time(subtitle, time_offset):
    try:
        time_pattern = re.compile(r'(\d{2}:\d{2}:\d{2},\d{3}) --> (\d{2}:\d{2}:\d{2},\d{3})')
        start_time_str, end_time_str = time_pattern.search(subtitle).groups()
        start_time = str_to_timedelta(start_time_str) + time_offset
        end_time = str_to_timedelta(end_time_str) + time_offset
        adjusted_subtitle = re.sub(time_pattern, f'{timedelta_to_str(start_time)} --> {timedelta_to_str(end_time)}', subtitle)
        return adjusted_subtitle
    except:
        return subtitle

def merge_subtitles(subtitle_files, video_durations):
    merged_subtitle = ""
    time_offset = timedelta()
    subtitle_index = 1

    for subtitle_file, duration in zip(subtitle_files, video_durations):
        # print(subtitle_file, duration)
        with open(subtitle_file, 'r', encoding='utf-8') as f:  # Use UTF-8 encoding
            print(subtitle_file)
            subtitles = f.read().strip().split('\n\n')
            for subtitle in subtitles:
                print(subtitle)
                if not subtitle.strip():
                    continue
                adjusted_subtitle = adjust_subtitle_time(subtitle, time_offset)
                adjusted_subtitle = re.sub(r'^\d+', str(subtitle_index), adjusted_subtitle, count=1)
                merged_subtitle += adjusted_subtitle + "\n\n"
                subtitle_index += 1
        time_offset += timedelta(seconds=duration)

    return merged_subtitle.strip()

# 示例使用 glob 查找文件和假定的视频持续时间
subtitle_files = glob.glob('segment*.srt')
subtitle_files.sort()  # 确保按顺序处理文件

video_durations = [20 * 60 for _ in subtitle_files]  # 假设每段视频20分钟

merged_subtitle = merge_subtitles(subtitle_files, video_durations)

# 去除'.mp3'后缀
file_name_without_extension = file_path.rsplit('.', 1)[0]

# 拼接新的文件路径
new_file_path = f"{file_name_without_extension}.srt"
# 保存合并后的字幕文件
with open(new_file_path, 'w', encoding='utf-8') as f:
    f.write(merged_subtitle)

# if isTranslate:
#     subtitle_files_cn = glob.glob('./cn/segment*.srt')
#     subtitle_files_cn.sort()  # 确保按顺序处理文件
#
#     video_durations_cn = [20 * 60 for _ in subtitle_files_cn]  # 假设每段视频20分钟
#     # print(subtitle_files_cn)
#     # print(video_durations_cn)
#     merged_subtitle_cn = merge_subtitles(subtitle_files_cn, video_durations_cn)
#
#     # 保存合并后的字幕文件
#     with open('./cn/merged_subtitle-0207_cn.srt', 'w', encoding='utf-8') as f:
#         f.write(merged_subtitle_cn)
try:
    if isTranslate:
        subtitle_files_cn = glob.glob('./cn/segment*.srt')
        subtitle_files_cn.sort()  # 确保按顺序处理文件

        video_durations_cn = [20 * 60 for _ in subtitle_files_cn]  # 假设每段视频20分钟

        merged_subtitle_cn = merge_subtitles(subtitle_files_cn, video_durations_cn)

        # 保存合并后的字幕文件
        # 去除'.mp3'后缀
        file_name_without_extension = file_path.rsplit('.', 1)[0]

        # 拼接新的文件路径
        new_file_path_cn = f"./cn/{file_name_without_extension}_cn.srt"
        # 保存合并后的字幕文件
        with open(new_file_path_cn, 'w', encoding='utf-8') as f:
            f.write(merged_subtitle_cn)
except Exception as E:
    # 打印错误信息
    print(E)
    print(f"中文字幕合并失败")

segment_0.srt
1
00:01:30,080 --> 00:01:31,500
Thank you Master!
2
00:01:51,120 --> 00:01:52,680
He fell down.
3
00:01:56,400 --> 00:01:58,779
Ah, it's about time to go up
4
00:02:01,000 --> 00:02:06,000
As usual, I will give you a service.
5
00:02:13,000 --> 00:02:18,000
Today, I will give you a service using my mouth
6
00:02:22,000 --> 00:02:25,000
Or a service using my eyes. Which do you prefer?
7
00:02:30,000 --> 00:02:35,000
Then, I will use my mouth today.
8
00:03:00,000 --> 00:03:02,000
Is it okay if I stroke your face?
9
00:03:30,000 --> 00:03:32,000
Okay.
10
00:04:00,000 --> 00:04:02,000
Is it okay if I stroke your nose?
11
00:04:30,000 --> 00:04:31,000
Okay.
12
00:05:00,000 --> 00:05:01,000
Close your eyes.
13
00:05:24,000 --> 00:05:25,000
Close your eyes.
14
00:05:30,000 --> 00:05:31,000
Close your eyes.
15
00:05:31,000 --> 00:05:32,000
Close your eyes.
16
00:05:32,000 --> 00:05:33,000
Close your eyes.
17
00:05:58,000 --> 00:06:00,000
Master's drool is delicious.
18
00:06:02,