In [None]:
import os
import re

def convert_srt_to_txt(srt_file_path, txt_file_path):
    
    """
    Convert subtitles formats of '.srt' to '.txt', with data cleaning.
    
    Arguments:
        srt_file_path (str)
        txt_file_path (str)
    """

    try:
        with open(srt_file_path, 'r', encoding='utf-8') as srt_file:
            content = srt_file.read()
            
            # Use regular expressions to remove timestamps and sequence numbers
            # '.srt' files from bilibili AI subtitles
            pattern = r'\d+\W{0,}\d+:\d+:\d+,\d+ --> \d+:\d+:\d+,\d+\W{0,}|\d+[\r\n]+'
            cleaned_content = re.sub(pattern, '', content)
            
            # Remove extra line breaks
            cleaned_content = re.sub(r'\n{2,}', '\n', cleaned_content)
            cleaned_content = re.sub(r'^\n', '', cleaned_content)
            
            # Remove extra spaces
            cleaned_content = re.sub(r' +', ' ', cleaned_content)
            
            # Remove spaces at the beginning of each line
            cleaned_content = re.sub(r'^ ', '', cleaned_content, flags=re.MULTILINE)
            
            # Write the cleaned content to a new text file
            with open(txt_file_path, 'w', encoding='utf-8') as txt_file:
                txt_file.write(cleaned_content)
                
        return True
    except Exception as e:
        print(f"Errors occurred when converting: {e}")
        return False

def batch_convert_srt_to_txt(srt_dir, txt_dir):
    
    """
    Batch convert '.srt' to '.txt' in the directory
    
    Arguments:
        srt_dir (str)
        txt_dir (str)
    """
    
    if not os.path.exists(txt_dir):
        os.makedirs(txt_dir)
        
    # Loop '.srt' files in the directory
    srt_files = [f for f in os.listdir(srt_dir) if f.endswith('.srt')]
    
    for srt_file in srt_files:
        srt_file_path = os.path.join(srt_dir, srt_file)
        txt_file_path = os.path.join(txt_dir, os.path.splitext(srt_file)[0] + '.txt')
        
        if convert_srt_to_txt(srt_file_path, txt_file_path):
            print(f"Convert successfully：{srt_file} -> {os.path.splitext(srt_file)[0] + '.txt'}")
        else:
            print(f"Convert failed：{srt_file}")

In [None]:
# Excutation
batch_convert_srt_to_txt('C:/Users/Mouser/Desktop/srt_files', 'C:/Users/Mouser/Desktop/txt_files')

成功转换：01_1-0第一单元导读.srt -> 01_1-0第一单元导读.txt
成功转换：02_1-1春.srt -> 02_1-1春.txt
成功转换：03_1-2春.srt -> 03_1-2春.txt
成功转换：04_1-3春课后题.srt -> 04_1-3春课后题.txt
成功转换：05_1-4春阅读题型总结.srt -> 05_1-4春阅读题型总结.txt
成功转换：06_2-1济南的冬天.srt -> 06_2-1济南的冬天.txt
成功转换：07_2-2济南的冬天.srt -> 07_2-2济南的冬天.txt
成功转换：08_2-3济南的冬天课后题.srt -> 08_2-3济南的冬天课后题.txt
成功转换：09_2-4济南的冬天阅读题型总结.srt -> 09_2-4济南的冬天阅读题型总结.txt
成功转换：10_3-1雨的四季.srt -> 10_3-1雨的四季.txt
成功转换：11_3-2雨的四季.srt -> 11_3-2雨的四季.txt
成功转换：12_4-1中学古代诗歌如何学习.srt -> 12_4-1中学古代诗歌如何学习.txt
成功转换：13_4-2观沧海.srt -> 13_4-2观沧海.txt
成功转换：14_4-3闻王昌龄左迁龙标遥有此寄.srt -> 14_4-3闻王昌龄左迁龙标遥有此寄.txt
成功转换：15_4-4次北固山下.srt -> 15_4-4次北固山下.txt
成功转换：16_4-5天净沙秋思.srt -> 16_4-5天净沙秋思.txt
成功转换：17_4-6课后题.srt -> 17_4-6课后题.txt
成功转换：18_4-7阅读题型总结.srt -> 18_4-7阅读题型总结.txt
成功转换：19_5-1秋天的怀念.srt -> 19_5-1秋天的怀念.txt
成功转换：20_5-2秋天的怀念课后题.srt -> 20_5-2秋天的怀念课后题.txt
成功转换：21_5-3秋天的怀念阅读题型总结.srt -> 21_5-3秋天的怀念阅读题型总结.txt
成功转换：22_6-1散步.srt -> 22_6-1散步.txt
成功转换：23_6-2散步课后题.srt -> 23_6-2散步课后题.txt
成功转换：24_7-1散文诗二首金色花.srt -> 24_7-1散文诗二首金色花.txt
成功