In [1]:
import os
from pydub import AudioSegment
import json
import tqdm
import yaml

ROOT = r'D:\Download\steam\steam\steamapps\common\Left 4 Dead 2\left4dead2'
VOICE_ROOT = os.path.join(ROOT, 'sound', 'player', 'survivor', 'voice')
CHARACTERS = ['coach', 'gambler', 'mechanic', 'producer']
DLC1_CHARACTER = ['coach', 'gambler', 'mechanic', 'producer', 'biker', 'teengirl', 'manager']
DLC1_VOICE_ROOT = r'D:\Download\steam\steam\steamapps\common\Left 4 Dead 2\left4dead2_dlc1\sound\player\survivor\voice'
DLC2_VOICE_ROOT = r'D:\Download\steam\steam\steamapps\common\Left 4 Dead 2\left4dead2_dlc2\sound\player\survivor\voice'

with open('../config/voice.yaml', 'r', encoding='utf-8') as fp:
    config = yaml.load(fp, Loader=yaml.FullLoader)

def read_json(path: str) -> dict:
    with open(path, 'r', encoding='utf-8') as fp:
        data = json.load(fp)
    return data

def write_json(path: str, obj: dict):
    with open(path, 'w', encoding='utf-8') as fp:
        json.dump(obj, fp, ensure_ascii=False, indent=4)

In [2]:
def check_empty(name: str):
    assert name in CHARACTERS
    data = read_json(f'../transcription/{name}_zh.json')
    empty = {}
    for k in data:
        words = data[k].strip()
        if len(words) == 0:
            empty[k] = ''
    if len(empty) > 0:
        empty_path = f'empty.{name}.json'
        write_json(empty_path, empty)
        print('{} unaccomplished words found in {}, write them to {}'.format(len(empty), name, empty_path))
    else:
        print('no unaccomplished words found in {}'.format(name))

for name in CHARACTERS:
    check_empty(name)

no unaccomplished words found in coach
no unaccomplished words found in gambler
no unaccomplished words found in mechanic
no unaccomplished words found in producer


In [3]:
def merge_json(name):
    # merge path2 to path1
    path1 = f'../transcription/{name}_zh.json'
    path2 = f'./empty.{name}.json'

    if not os.path.exists(path2):
        print('no cache in {}, skip'.format(name))
        return

    data1 = read_json(path1)
    data2 = read_json(path2)

    for k in data2:
        data1[k] = data2[k]
        
    # sort
    data1 = {k : data1[k] for k in sorted(data1.keys())}
    write_json(path1, data1)
    os.remove(path2)

for name in CHARACTERS:
    merge_json(name)

no cache in coach, skip
no cache in gambler, skip
no cache in mechanic, skip
no cache in producer, skip


# check dist sr and length

In [6]:
def check_base_info(name: str):
    target_folder = os.path.join(VOICE_ROOT, name)
    result_folder = os.path.join('..', 'dist', config[name]['mod_name'], 'sound', 'player', 'survivor', 'voice', name)
    assert len(os.listdir(target_folder)) == len(os.listdir(result_folder))
    exceed_count = 0
    if not os.path.exists('log'):
        os.makedirs('log')
    exceed_log = {}

    for wav_file in tqdm.tqdm(os.listdir(target_folder)):
        target_path = os.path.join(target_folder, wav_file)
        result_path = os.path.join(result_folder, wav_file)
        assert os.path.exists(result_path), f"{result_path} doesn't exist"

        target_audio: AudioSegment = AudioSegment.from_file(target_path)
        result_audio: AudioSegment = AudioSegment.from_file(result_path)

        target_sr = target_audio.frame_rate
        result_sr = result_audio.frame_rate
        target_len = len(target_audio)
        result_len = len(result_audio)

        assert target_sr == result_sr, f"{wav_file} doesn't share the same sample rate: {target_sr}(expect) vs {result_sr}(fact)"
        assert target_len <= result_len, f"{wav_file} doesn't share the same length: {target_len}(expect) vs {result_len}(fact)"
        if result_len > target_len and (result_len - target_len) / target_len >= 0.2 and target_len >= 1000:
            exceed_count += 1
            exceed_log[wav_file] = { 'expect': target_len, 'fact': result_len }
    
    write_json(f'log/{name}.exceed.json', exceed_log)
    print('{} exceed count : {}'.format(name, exceed_count))


for name in DLC1_CHARACTER:
    print('check', name)
    check_base_info(name)

check coach


100%|██████████| 2420/2420 [00:00<00:00, 5197.99it/s]


coach exceed count : 0
check gambler


100%|██████████| 2547/2547 [00:00<00:00, 5986.58it/s]


gambler exceed count : 1
check mechanic


100%|██████████| 2616/2616 [00:00<00:00, 5561.38it/s]


mechanic exceed count : 3
check producer


100%|██████████| 2090/2090 [00:00<00:00, 5636.10it/s]

producer exceed count : 0





# check dlc1_dist sr and length

In [2]:
def check_base_info(name: str):
    target_folder = os.path.join(DLC1_VOICE_ROOT, name)
    result_folder = os.path.join('..', 'dlc1_dist', config[name]['mod_name'], 'sound', 'player', 'survivor', 'voice', name)
    assert len(os.listdir(target_folder)) == len(os.listdir(result_folder))
    exceed_count = 0
    if not os.path.exists('log'):
        os.makedirs('log')
    exceed_log = {}

    for wav_file in tqdm.tqdm(os.listdir(target_folder)):
        target_path = os.path.join(target_folder, wav_file)
        result_path = os.path.join(result_folder, wav_file)
        assert os.path.exists(result_path), f"{result_path} doesn't exist"

        target_audio: AudioSegment = AudioSegment.from_file(target_path)
        result_audio: AudioSegment = AudioSegment.from_file(result_path)

        target_sr = target_audio.frame_rate
        result_sr = result_audio.frame_rate
        target_len = len(target_audio)
        result_len = len(result_audio)

        assert target_sr == result_sr, f"{wav_file} doesn't share the same sample rate: {target_sr}(expect) vs {result_sr}(fact)"
        assert target_len <= result_len, f"{wav_file} doesn't share the same length: {target_len}(expect) vs {result_len}(fact)"
        if result_len > target_len and (result_len - target_len) / target_len >= 0.2 and target_len >= 1000:
            exceed_count += 1
            exceed_log[wav_file] = { 'expect': target_len, 'fact': result_len }
    
    write_json(f'log/{name}.exceed.json', exceed_log)
    print('{} exceed count : {}'.format(name, exceed_count))


for name in DLC1_CHARACTER:
    print('check', name)
    check_base_info(name)

check coach


100%|██████████| 313/313 [00:01<00:00, 281.59it/s]


coach exceed count : 3
check gambler


100%|██████████| 421/421 [00:01<00:00, 311.45it/s]


gambler exceed count : 15
check mechanic


100%|██████████| 463/463 [00:01<00:00, 283.02it/s]


mechanic exceed count : 22
check producer


100%|██████████| 450/450 [00:01<00:00, 284.59it/s]


producer exceed count : 7
check biker


100%|██████████| 206/206 [00:00<00:00, 291.12it/s]


biker exceed count : 2
check teengirl


100%|██████████| 155/155 [00:00<00:00, 288.34it/s]


teengirl exceed count : 1
check manager


100%|██████████| 138/138 [00:00<00:00, 287.75it/s]

manager exceed count : 1





In [None]:
def check_base_info(name: str):
    target_folder = os.path.join(DLC2_VOICE_ROOT, name)
    result_folder = os.path.join('..', 'dlc2_dist', config[name]['mod_name'], 'sound', 'player', 'survivor', 'voice', name)
    assert len(os.listdir(target_folder)) == len(os.listdir(result_folder))
    exceed_count = 0
    if not os.path.exists('log'):
        os.makedirs('log')
    exceed_log = {}

    for wav_file in tqdm.tqdm(os.listdir(target_folder)):
        target_path = os.path.join(target_folder, wav_file)
        result_path = os.path.join(result_folder, wav_file)
        assert os.path.exists(result_path), f"{result_path} doesn't exist"

        target_audio: AudioSegment = AudioSegment.from_file(target_path)
        result_audio: AudioSegment = AudioSegment.from_file(result_path)

        target_sr = target_audio.frame_rate
        result_sr = result_audio.frame_rate
        target_len = len(target_audio)
        result_len = len(result_audio)

        assert target_sr == result_sr, f"{wav_file} doesn't share the same sample rate: {target_sr}(expect) vs {result_sr}(fact)"
        assert target_len <= result_len, f"{wav_file} doesn't share the same length: {target_len}(expect) vs {result_len}(fact)"
        if result_len > target_len and (result_len - target_len) / target_len >= 0.2 and target_len >= 1000:
            exceed_count += 1
            exceed_log[wav_file] = { 'expect': target_len, 'fact': result_len }
    
    write_json(f'log/{name}.exceed.json', exceed_log)
    print('{} exceed count : {}'.format(name, exceed_count))


for name in DLC2_CHARACTER:
    print('check', name)
    check_base_info(name)