extract meta info of player voice from your l4d2 game folder

meta info:
- name of wav file
- length of wav
- sample rate

In [7]:
import os
from pydub import AudioSegment
import json
import tqdm

ROOT = r'D:\Download\steam\steam\steamapps\common\Left 4 Dead 2\left4dead2'
VOICE_ROOT = os.path.join(ROOT, 'sound', 'player', 'survivor', 'voice')
NPC_VOICE_ROOT = os.path.join(ROOT, 'sound', 'npc')
DLC1_VOICE_ROOT = r'D:\Download\steam\steam\steamapps\common\Left 4 Dead 2\left4dead2_dlc1\sound\player\survivor\voice'
DLC2_VOICE_ROOT = r'D:\Download\steam\steam\steamapps\common\Left 4 Dead 2\left4dead2_dlc2\sound\player\survivor\voice'
CHARACTERS = ['coach', 'gambler', 'mechanic', 'producer']
DLC1_CHARACTER = ['coach', 'gambler', 'mechanic', 'producer', 'biker', 'teengirl', 'manager']
DLC2_CHARACTER = ['biker', 'teengirl', 'manager', 'namvet']


def read_json(path: str) -> dict:
    with open(path, 'r', encoding='utf-8') as fp:
        data = json.load(fp)
    return data
def write_json(path: str, obj: dict):
    with open(path, 'w', encoding='utf-8') as fp:
        json.dump(obj, fp, ensure_ascii=False, indent=4)

In [2]:
def make_meta_info(name: str):
    assert name in CHARACTERS
    meta_info = {}

    voice_folder = os.path.join(VOICE_ROOT, name)
    for wav_file in tqdm.tqdm(os.listdir(voice_folder)):
        wav_path = os.path.join(voice_folder, wav_file)
        audio: AudioSegment = AudioSegment.from_file(wav_path)
        length = len(audio)
        sr = audio.frame_rate
        meta_info[wav_file] = { 'length': length, 'sr': sr }
    write_json('../transcription/{}.meta.json'.format(name), meta_info)

for name in CHARACTERS:
    make_meta_info(name)
    print('finish handle', name)

100%|██████████| 2420/2420 [00:00<00:00, 10040.95it/s]


finish handle coach


100%|██████████| 2547/2547 [00:00<00:00, 12845.97it/s]


finish handle gambler


100%|██████████| 2616/2616 [00:00<00:00, 11847.46it/s]


finish handle mechanic


100%|██████████| 2090/2090 [00:00<00:00, 11508.67it/s]

finish handle producer





In [12]:
def align_two_folders(name: str):
    assert name in CHARACTERS
    meta_path = f'../transcription/{name}.meta.json'
    zh_path = f'../transcription/{name}_zh.json'
    meta_data = read_json(meta_path)
    zh_data = read_json(zh_path)
    zh_wavs = set(zh_data.keys())
    for wav_file in meta_data:
        if wav_file in zh_wavs:
            zh_wavs.discard(wav_file)
        else:
            print('{} not in zh_wavs'.format(wav_file))
            zh_data[wav_file] = ''
    
    print('find {} useless wav files'.format(len(zh_wavs)))
    for k in zh_wavs:
        zh_data.pop(k)
    
    write_json(zh_path, zh_data)

for name in CHARACTERS:
    align_two_folders(name)

find 0 useless wav files
find 0 useless wav files
find 0 useless wav files
find 0 useless wav files


In [9]:
obj = read_json('../model/config.json')
vits_characters = {}
for i, code in enumerate(obj['speakers']):
    vits_characters[str(code)] = i

write_json(f'../config/vit_character.json', vits_characters)

In [2]:
def make_npc_meta(name):
    meta_info = {}

    voice_folder = os.path.join(NPC_VOICE_ROOT, name)
    for wav_file in tqdm.tqdm(os.listdir(voice_folder)):
        wav_path = os.path.join(voice_folder, wav_file)
        audio: AudioSegment = AudioSegment.from_file(wav_path)
        length = len(audio)
        sr = audio.frame_rate
        meta_info[wav_file] = { 'length': length, 'sr': sr }
    write_json('../transcription/npc/{}.meta.json'.format(name), meta_info)

for name in ['whitaker', 'virgil', 'pilot', 'soldier', 'soldier1', 'soldier2', '05_military']:
    make_npc_meta(name)
    print('make {} meta info'.format(name))

100%|██████████| 117/117 [00:00<00:00, 212.03it/s]


make whitaker meta info


100%|██████████| 121/121 [00:00<00:00, 125.32it/s]


make virgil meta info


100%|██████████| 1/1 [00:00<00:00, 142.85it/s]


make pilot meta info


100%|██████████| 3/3 [00:00<00:00, 95.18it/s]


make soldier meta info


100%|██████████| 65/65 [00:00<00:00, 119.59it/s]


make soldier1 meta info


100%|██████████| 34/34 [00:00<00:00, 117.48it/s]


make soldier2 meta info


100%|██████████| 14/14 [00:00<00:00, 111.94it/s]

make 05_military meta info





In [4]:
def make_dlc1_meta(name):
    meta_info = {}

    voice_folder = os.path.join(DLC1_VOICE_ROOT, name)
    for wav_file in tqdm.tqdm(os.listdir(voice_folder)):
        wav_path = os.path.join(voice_folder, wav_file)
        audio: AudioSegment = AudioSegment.from_file(wav_path)
        length = len(audio)
        sr = audio.frame_rate
        meta_info[wav_file] = { 'length': length, 'sr': sr }
    write_json('../transcription/dlc1/{}.meta.json'.format(name), meta_info)

for name in DLC1_CHARACTER:
    print('make {} meta info'.format(name))
    make_dlc1_meta(name)

make coach meta info


100%|██████████| 313/313 [00:02<00:00, 118.70it/s]


make gambler meta info


100%|██████████| 421/421 [00:02<00:00, 152.00it/s]


make mechanic meta info


100%|██████████| 463/463 [00:03<00:00, 119.23it/s]


make producer meta info


100%|██████████| 450/450 [00:03<00:00, 145.14it/s]


make biker meta info


100%|██████████| 206/206 [00:02<00:00, 100.82it/s]


make teengirl meta info


100%|██████████| 155/155 [00:01<00:00, 115.86it/s]


make manager meta info


100%|██████████| 138/138 [00:01<00:00, 135.48it/s]


In [8]:
def make_dlc2_meta(name):
    meta_info = {}

    voice_folder = os.path.join(DLC2_VOICE_ROOT, name)
    for wav_file in tqdm.tqdm(os.listdir(voice_folder)):
        wav_path = os.path.join(voice_folder, wav_file)
        audio: AudioSegment = AudioSegment.from_file(wav_path)
        length = len(audio)
        sr = audio.frame_rate
        meta_info[wav_file] = { 'length': length, 'sr': sr }
    write_json('../transcription/dlc2/{}.meta.json'.format(name), meta_info)

for name in DLC2_CHARACTER:
    print('make {} meta info'.format(name))
    make_dlc2_meta(name)

make biker meta info


100%|██████████| 1483/1483 [00:04<00:00, 344.71it/s]


make teengirl meta info


100%|██████████| 1571/1571 [00:04<00:00, 382.54it/s]


make manager meta info


100%|██████████| 1222/1222 [00:03<00:00, 378.91it/s]


make namvet meta info


100%|██████████| 1214/1214 [00:00<00:00, 6864.31it/s]
