In [5]:
import os
import textgrid
from tqdm.notebook import tqdm

# 起始当前脚本所在的路径
script_dir = os.getcwd()
textgrid_dir = os.path.join(script_dir, 'TextGrid_raw')    #TextGrid路径
out_dir = os.path.join(script_dir, 'TextGrid')         #输出路径
words_dict_file = os.path.join(script_dir, 'words_rules.txt')       #音节替换词典路径
phones_dict_file = os.path.join(script_dir, 'phones_rules.txt')       #音素替换词典路径

# 开始时清空输出路径下文件
if os.path.exists(out_dir):
    file_names = os.listdir(out_dir)
    for file_name in file_names:
        file_path = os.path.join(out_dir, file_name)
        os.remove(file_path)
else:
    os.makedirs(out_dir)
    
# 读取音节词典转换规则
words_replace_rules = {}
with open(words_dict_file, 'r') as f:
    for line in f:
        line = line.strip()
        if line:
            src, dest = line.split('\t')
            words_replace_rules[src] = dest
print(words_replace_rules)
# 读取音素词典转换规则
phones_replace_rules = {}
with open(phones_dict_file, 'r') as f:
    for line in f:
        line = line.strip()
        if line:
            src, dest = line.split('\t')
            phones_replace_rules[src] = dest
print(phones_replace_rules)

# 统计转换次数
words_consonant_count = {consonant: 0 for consonant in words_replace_rules.keys()}
phones_consonant_count = {consonant: 0 for consonant in phones_replace_rules.keys()}
# 文件总数
total_files = len([filename for filename in os.listdir(textgrid_dir) if filename.endswith('.TextGrid')])

# 遍历 TextGrid路径的所有 .TextGrid 文件
for filename in tqdm(os.listdir(textgrid_dir), total=total_files, desc='Processing'):
    if filename.endswith('.TextGrid'):
        textgrid_file = os.path.join(textgrid_dir, filename)
        tg = textgrid.TextGrid.fromFile(textgrid_file)
        words_tier = None
        # 查找音节（words）层
        for tier in tg:
            if tier.name == 'words':
                words_tier = tier
                break            
        # 查找音素（phones）层
        phones_tier = None
        for tier in tg:
            if tier.name == 'phones':
                phones_tier = tier
                break
        # 替换音节标记
        for interval in words_tier:
            for match, replace in words_replace_rules.items():
                if match == interval.mark:
                    words_consonant_count[match] += 1
                    interval.mark = replace 
    
        # 替换音素标记
        for interval in phones_tier:
            for match, replace in phones_replace_rules.items():
                if match == interval.mark:
                    phones_consonant_count[match] += 1
                    interval.mark = replace
        # 保存修改后的 TextGrid 文件
        output_file = os.path.join(out_dir, f'{os.path.splitext(filename)[0]}.TextGrid')
        tg.write(output_file)

# 打印转换次数
print('完成如下转换次数:')
for consonant, replace in words_replace_rules.items():
    replace_count = words_consonant_count[consonant]
    print(f'{consonant}→{replace}: {replace_count}')
print()
for consonant, replace in phones_replace_rules.items():
    replace_count = phones_consonant_count[consonant]
    print(f'{consonant}→{replace}: {replace_count}')


{'a': 'A', 'i': 'I', 'u': 'U', 'e': 'E', 'o': 'O', 'k_a': 'ka/', 'k_i': 'ki/', 'k_u': 'ku/', 'k_e': 'ke/', 'k_o': 'ko', 'ky_a': 'kya', 'ky_e': 'kye', 'ky_i': 'kyi', 'ky_o': 'kyo', 'ky_u': 'kyu', 's_a': 'sa/', 's_e': 'se/', 's_i': 'si/', 's_o': 'so', 's_u': 'su/', 'sh_a': 'sha/', 'sh_e': 'she/', 'sh_i': 'shi/', 'sh_o': 'sho', 'sh_u': 'shu/', 't_a': 'ta/', 't_e': 'te/', 't_i': 'ti/', 't_o': 'to', 't_u': 'tu/', 'h_a': 'ha/', 'h_e': 'he/', 'h_i': 'hi/', 'h_o': 'ho/', 'h_u': 'hu/', 'hy_a': 'hya', 'hy_e': 'hye', 'hy_i': 'hyi', 'hy_o': 'hyo', 'hy_u': 'hyu', 'g_a': 'ga/', 'g_e': 'ge/', 'g_i': 'gi/', 'g_o': 'go', 'g_u': 'gu/', 'gy_a': 'gya', 'gy_e': 'gye', 'gy_i': 'gyi', 'gy_u': 'gyu', 'gy_o': 'gyo', 'z_a': 'za/', 'z_e': 'ze/', 'z_i': 'zi/', 'z_o': 'zo', 'z_u': 'zu/', 'd_a': 'da/', 'd_e': 'de/', 'd_i': 'di/', 'd_o': 'do', 'd_u': 'du/', 'b_a': 'ba/', 'b_e': 'be/', 'b_i': 'bi/', 'b_o': 'bo/', 'b_u': 'bu/', 'p_a': 'pa/', 'p_e': 'pe/', 'p_i': 'pi/', 'p_o': 'po/', 'p_u': 'pu/', 'py_a': 'pya', 'py_e'

Processing:   0%|          | 0/513 [00:00<?, ?it/s]

完成如下转换次数:
a→A: 222
i→I: 750
u→U: 128
e→E: 164
o→O: 134
k_a→ka/: 361
k_i→ki/: 382
k_u→ku/: 241
k_e→ke/: 140
k_o→ko: 255
ky_a→kya: 4
ky_e→kye: 0
ky_i→kyi: 0
ky_o→kyo: 14
ky_u→kyu: 8
s_a→sa/: 131
s_e→se/: 108
s_i→si/: 0
s_o→so: 111
s_u→su/: 165
sh_a→sha/: 8
sh_e→she/: 0
sh_i→shi/: 273
sh_o→sho: 45
sh_u→shu/: 8
t_a→ta/: 343
t_e→te/: 342
t_i→ti/: 7
t_o→to: 307
t_u→tu/: 7
h_a→ha/: 81
h_e→he/: 9
h_i→hi/: 90
h_o→ho/: 42
h_u→hu/: 0
hy_a→hya: 1
hy_e→hye: 0
hy_i→hyi: 0
hy_o→hyo: 1
hy_u→hyu: 0
g_a→ga/: 230
g_e→ge/: 37
g_i→gi/: 22
g_o→go: 25
g_u→gu/: 26
gy_a→gya: 0
gy_e→gye: 0
gy_i→gyi: 0
gy_u→gyu: 2
gy_o→gyo: 1
z_a→za/: 21
z_e→ze/: 31
z_i→zi/: 0
z_o→zo: 10
z_u→zu/: 80
d_a→da/: 220
d_e→de/: 165
d_i→di/: 7
d_o→do: 141
d_u→du/: 4
b_a→ba/: 87
b_e→be/: 32
b_i→bi/: 59
b_o→bo/: 47
b_u→bu/: 49
p_a→pa/: 25
p_e→pe/: 1
p_i→pi/: 14
p_o→po/: 14
p_u→pu/: 28
py_a→pya: 0
py_e→pye: 0
py_i→pyi: 0
py_o→pyo: 0
py_u→pyu: 3
j_a→ja: 32
j_e→je: 0
j_i→ji/: 92
j_o→jo: 10
j_u→ju/: 8
f_a→fa/: 6
f_e→fe/: 1
f_i→fi/: 14
f_o→fo/