In [3]:
import re
from datetime import datetime, timedelta

# 字幕时间格式转换工具
def str_to_timedelta(time_str):
    return datetime.strptime(time_str, '%H:%M:%S,%f') - datetime(1900, 1, 1)

def timedelta_to_str(time_delta):
    return str(time_delta)[:-3]

# 调整字幕时间
def adjust_subtitle_time(subtitle, time_offset):
    time_pattern = re.compile(r'(\d{2}:\d{2}:\d{2},\d{3}) --> (\d{2}:\d{2}:\d{2},\d{3})')
    start_time_str, end_time_str = time_pattern.search(subtitle).groups()
    start_time = str_to_timedelta(start_time_str) + time_offset
    end_time = str_to_timedelta(end_time_str) + time_offset
    adjusted_subtitle = re.sub(time_pattern, f'{timedelta_to_str(start_time)} --> {timedelta_to_str(end_time)}', subtitle)
    return adjusted_subtitle

# 合并字幕文件
def merge_subtitles(subtitle_files, video_durations):
    merged_subtitle = ""
    time_offset = timedelta()
    subtitle_index = 1

    for subtitle_file, duration in zip(subtitle_files, video_durations):
        # with open(subtitle_file, 'r', encoding='utf-8') as f:
        with open(subtitle_file, 'r', encoding='latin1') as f:
            subtitles = f.read().strip().split('\n\n')
            for subtitle in subtitles:
                # 跳过空的字幕块
                if not subtitle.strip():
                    continue
                adjusted_subtitle = adjust_subtitle_time(subtitle, time_offset)
                # 更新字幕序号
                adjusted_subtitle = re.sub(r'^\d+', str(subtitle_index), adjusted_subtitle, count=1)
                merged_subtitle += adjusted_subtitle + "\n\n"
                subtitle_index += 1
        time_offset += timedelta(seconds=duration)

    return merged_subtitle.strip()

import glob

# 自动查找所有 seg*.srt 文件
subtitle_files = glob.glob('segment*.srt')
subtitle_files.sort()  # 确保文件是按照顺序排列的

# 假设每个视频段的持续时间为20分钟（1200秒）
video_durations = [20 * 60 for _ in subtitle_files]

# 假设 merge_subtitles 是一个已经实现的函数
merged_subtitle = merge_subtitles(subtitle_files, video_durations)

# 保存合并后的字幕文件
with open('merged_subtitle.srt', 'w', encoding='utf-8') as f:
    f.write(merged_subtitle)



In [1]:
import re
from datetime import datetime, timedelta
import glob

def str_to_timedelta(time_str):
    return datetime.strptime(time_str, '%H:%M:%S,%f') - datetime(1900, 1, 1)

def timedelta_to_str(time_delta):
    hours, remainder = divmod(time_delta.total_seconds(), 3600)
    minutes, seconds = divmod(remainder, 60)
    return "{:02}:{:02}:{:06.3f}".format(int(hours), int(minutes), seconds).replace('.', ',')

def adjust_subtitle_time(subtitle, time_offset):
    time_pattern = re.compile(r'(\d{2}:\d{2}:\d{2},\d{3}) --> (\d{2}:\d{2}:\d{2},\d{3})')
    start_time_str, end_time_str = time_pattern.search(subtitle).groups()
    start_time = str_to_timedelta(start_time_str) + time_offset
    end_time = str_to_timedelta(end_time_str) + time_offset
    adjusted_subtitle = re.sub(time_pattern, f'{timedelta_to_str(start_time)} --> {timedelta_to_str(end_time)}', subtitle)
    return adjusted_subtitle

def merge_subtitles(subtitle_files, video_durations):
    merged_subtitle = ""
    time_offset = timedelta()
    subtitle_index = 1

    for subtitle_file, duration in zip(subtitle_files, video_durations):
        with open(subtitle_file, 'r', encoding='utf-8') as f:  # Use UTF-8 encoding
            subtitles = f.read().strip().split('\n\n')
            for subtitle in subtitles:
                if not subtitle.strip():
                    continue
                adjusted_subtitle = adjust_subtitle_time(subtitle, time_offset)
                adjusted_subtitle = re.sub(r'^\d+', str(subtitle_index), adjusted_subtitle, count=1)
                merged_subtitle += adjusted_subtitle + "\n\n"
                subtitle_index += 1
        time_offset += timedelta(seconds=duration)

    return merged_subtitle.strip()

# 示例使用 glob 查找文件和假定的视频持续时间
subtitle_files = glob.glob('segment*.srt')
subtitle_files.sort()  # 确保按顺序处理文件

video_durations = [20 * 60 for _ in subtitle_files]  # 假设每段视频20分钟

merged_subtitle = merge_subtitles(subtitle_files, video_durations)

# 保存合并后的字幕文件
with open('merged_subtitle-0207.srt', 'w', encoding='utf-8') as f:
    f.write(merged_subtitle)