Skip to content

Commit

Permalink
Optimized speaker-based processing in subsets
Browse files Browse the repository at this point in the history
  • Loading branch information
mmcauliffe committed Apr 18, 2021
1 parent 751eda1 commit 83ccb5e
Show file tree
Hide file tree
Showing 18 changed files with 246 additions and 92 deletions.
4 changes: 3 additions & 1 deletion montreal_forced_aligner/command_line/align.py
Original file line number Diff line number Diff line change
Expand Up @@ -12,7 +12,7 @@
from montreal_forced_aligner.config import TEMP_DIR, align_yaml_to_config, load_basic_align
from montreal_forced_aligner.utils import get_available_acoustic_languages, get_pretrained_acoustic_path, \
get_available_dict_languages, get_dictionary_path
from montreal_forced_aligner.helper import setup_logger
from montreal_forced_aligner.helper import setup_logger, log_config
from montreal_forced_aligner.exceptions import ArgumentError


Expand All @@ -39,6 +39,8 @@ def align_corpus(args, unknown_args=None):
print('Cleaning old directory!')
shutil.rmtree(data_directory, ignore_errors=True)
logger = setup_logger(command, data_directory)
logger.debug('ALIGN CONFIG:')
log_config(logger, align_config)
if os.path.exists(conf_path):
with open(conf_path, 'r') as f:
conf = yaml.load(f, Loader=yaml.SafeLoader)
Expand Down
6 changes: 5 additions & 1 deletion montreal_forced_aligner/command_line/train_and_align.py
Original file line number Diff line number Diff line change
Expand Up @@ -10,7 +10,7 @@
from montreal_forced_aligner.aligner import TrainableAligner
from montreal_forced_aligner.config import TEMP_DIR, train_yaml_to_config, load_basic_train
from montreal_forced_aligner.utils import get_available_dict_languages, get_dictionary_path
from montreal_forced_aligner.helper import setup_logger
from montreal_forced_aligner.helper import setup_logger, log_config

from montreal_forced_aligner.exceptions import ArgumentError

Expand Down Expand Up @@ -38,6 +38,10 @@ def align_corpus(args, unknown_args=None):
print('Cleaning old directory!')
shutil.rmtree(data_directory, ignore_errors=True)
logger = setup_logger(command, data_directory)
logger.debug('TRAIN CONFIG:')
log_config(logger, train_config)
logger.debug('ALIGN CONFIG:')
log_config(logger, align_config)
if args.debug:
logger.warning('Running in DEBUG mode, may have impact on performance and disk usage.')
if os.path.exists(conf_path):
Expand Down
4 changes: 3 additions & 1 deletion montreal_forced_aligner/command_line/train_dictionary.py
Original file line number Diff line number Diff line change
Expand Up @@ -12,7 +12,7 @@
from montreal_forced_aligner.config import TEMP_DIR, align_yaml_to_config, load_basic_align
from montreal_forced_aligner.utils import get_available_acoustic_languages, get_pretrained_acoustic_path, \
get_available_dict_languages, get_dictionary_path
from montreal_forced_aligner.helper import setup_logger
from montreal_forced_aligner.helper import setup_logger, log_config
from montreal_forced_aligner.exceptions import ArgumentError


Expand All @@ -37,6 +37,8 @@ def train_dictionary(args):
print('Cleaning old directory!')
shutil.rmtree(data_directory, ignore_errors=True)
logger = setup_logger(command, data_directory)
logger.debug('ALIGN CONFIG:')
log_config(logger, align_config)
if os.path.exists(conf_path):
with open(conf_path, 'r') as f:
conf = yaml.load(f, Loader=yaml.SafeLoader)
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -9,7 +9,7 @@
from montreal_forced_aligner.dictionary import Dictionary
from montreal_forced_aligner.aligner import PretrainedAligner
from montreal_forced_aligner.config import TEMP_DIR, train_yaml_to_config, load_basic_train_ivector
from montreal_forced_aligner.helper import setup_logger
from montreal_forced_aligner.helper import setup_logger, log_config
from montreal_forced_aligner.utils import get_available_acoustic_languages, get_pretrained_acoustic_path, \
get_available_dict_languages, get_dictionary_path
from montreal_forced_aligner.models import AcousticModel
Expand Down Expand Up @@ -38,6 +38,10 @@ def train_ivector(args):
print('Cleaning old directory!')
shutil.rmtree(data_directory, ignore_errors=True)
logger = setup_logger(command, data_directory)
logger.debug('TRAIN CONFIG:')
log_config(logger, train_config)
logger.debug('ALIGN CONFIG:')
log_config(logger, align_config)

if os.path.exists(conf_path):
with open(conf_path, 'r') as f:
Expand Down
4 changes: 3 additions & 1 deletion montreal_forced_aligner/command_line/transcribe.py
Original file line number Diff line number Diff line change
Expand Up @@ -9,7 +9,7 @@
from montreal_forced_aligner.dictionary import Dictionary
from montreal_forced_aligner.transcriber import Transcriber
from montreal_forced_aligner.models import AcousticModel, LanguageModel, FORMAT
from montreal_forced_aligner.helper import setup_logger
from montreal_forced_aligner.helper import setup_logger, log_config
from montreal_forced_aligner.config import TEMP_DIR, transcribe_yaml_to_config, load_basic_transcribe, save_config
from montreal_forced_aligner.utils import get_available_acoustic_languages, get_pretrained_acoustic_path, \
get_available_lm_languages, get_pretrained_language_model_path, \
Expand Down Expand Up @@ -37,6 +37,8 @@ def transcribe_corpus(args):
print('Cleaning old directory!')
shutil.rmtree(data_directory, ignore_errors=True)
logger = setup_logger(command, data_directory)
logger.debug('TRANSCRIBE CONFIG:')
log_config(logger, transcribe_config)
os.makedirs(data_directory, exist_ok=True)
os.makedirs(args.output_directory, exist_ok=True)
os.makedirs(data_directory, exist_ok=True)
Expand Down
1 change: 1 addition & 0 deletions montreal_forced_aligner/config/align_config.py
Original file line number Diff line number Diff line change
Expand Up @@ -41,6 +41,7 @@ def update(self, data):

def update_from_args(self, args):
super(AlignConfig, self).update_from_args(args)
self.feature_config.update_from_args(args)
if self.retry_beam <= self.beam:
self.retry_beam = self.beam * 4

Expand Down
18 changes: 9 additions & 9 deletions montreal_forced_aligner/corpus/align_corpus.py
Original file line number Diff line number Diff line change
Expand Up @@ -374,12 +374,6 @@ def check_warnings(self):

if len(self.speak_utt_mapping) < self.num_jobs:
self.num_jobs = len(self.speak_utt_mapping)
if self.num_jobs < len(self.sample_rates.keys()):
self.num_jobs = len(self.sample_rates.keys())
msg = 'The number of jobs was set to {}, due to the different sample rates in the dataset. ' \
'If you would like to use fewer parallel jobs, ' \
'please resample all wav files to the same sample rate.'.format(self.num_jobs)
self.logger.warning(msg)

def save_text_file(self, file_name):
if self.segments:
Expand Down Expand Up @@ -617,6 +611,7 @@ def create_subset(self, subset, feature_config):
split_directory = self.split_directory()
subset_directory = os.path.join(self.output_directory, 'subset_{}'.format(subset))
subset_utt_path = os.path.join(subset_directory, 'included_utts.txt')
subset_speaker_path = os.path.join(subset_directory, 'included_speakers.txt')
if os.path.exists(subset_utt_path):
subset_utts = []
with open(subset_utt_path, 'r', encoding='utf8') as f:
Expand All @@ -638,7 +633,14 @@ def create_subset(self, subset, feature_config):
with open(subset_utt_path, 'w', encoding='utf8') as f:
for u in subset_utts:
f.write('{}\n'.format(u))
for i, s in enumerate(self.speakers):
subset_speakers = set()
for u in subset_utts:
subset_speakers.add(self.utt_speak_mapping[u])
subset_speakers = sorted(subset_speakers)
with open(subset_speaker_path, 'w') as f:
for s in subset_speakers:
f.write(s + '\n')
for i, s in enumerate(subset_speakers):
for fn in ['text.{}', 'text.{}.int', 'utt2spk.{}']:
sub_path = os.path.join(subset_directory, fn.format(i))
with open(os.path.join(split_directory, fn.format(i)), 'r', encoding='utf8') as inf, \
Expand All @@ -648,7 +650,6 @@ def create_subset(self, subset, feature_config):
if s[0] not in subset_utts:
continue
outf.write(line)
subset_speakers = []
sub_path = os.path.join(subset_directory, 'spk2utt.{}'.format(i))
with open(os.path.join(split_directory, 'spk2utt.{}'.format(i)), 'r', encoding='utf8') as inf, \
open(sub_path, 'w', encoding='utf8') as outf:
Expand All @@ -659,7 +660,6 @@ def create_subset(self, subset, feature_config):
if not filtered_utts:
continue
outf.write('{} {}\n'.format(speaker, ' '.join(filtered_utts)))
subset_speakers.append(speaker)
sub_path = os.path.join(subset_directory, 'cmvn.{}.scp'.format(i))
with open(os.path.join(split_directory, 'cmvn.{}.scp'.format(i)), 'r', encoding='utf8') as inf, \
open(sub_path, 'w', encoding='utf8') as outf:
Expand Down
2 changes: 2 additions & 0 deletions montreal_forced_aligner/corpus/base.py
Original file line number Diff line number Diff line change
Expand Up @@ -199,6 +199,8 @@ def parse_features_logs(self):

def speaker_utterance_info(self):
num_speakers = len(self.speak_utt_mapping.keys())
if not num_speakers:
raise CorpusError('Could not find any utterances in {}'.format(self.directory))
average_utterances = sum(len(x) for x in self.speak_utt_mapping.values()) / num_speakers
msg = 'Number of speakers in corpus: {}, average number of utterances per speaker: {}'.format(num_speakers,
average_utterances)
Expand Down
3 changes: 2 additions & 1 deletion montreal_forced_aligner/features/config.py
Original file line number Diff line number Diff line change
Expand Up @@ -5,6 +5,7 @@
from .processing import mfcc, compute_vad

from ..helper import thirdparty_binary, load_scp, save_speaker_groups
from ..config.base_config import BaseConfig


def make_safe(value):
Expand All @@ -13,7 +14,7 @@ def make_safe(value):
return str(value)


class FeatureConfig(object):
class FeatureConfig(BaseConfig):
"""
Class to store configuration information about MFCC generation
Expand Down
6 changes: 5 additions & 1 deletion montreal_forced_aligner/helper.py
Original file line number Diff line number Diff line change
Expand Up @@ -4,7 +4,7 @@
from typing import Any, List, Tuple
import logging
import sys

import yaml
from .exceptions import ThirdpartyError, KaldiProcessingError

Labels = List[Any]
Expand Down Expand Up @@ -198,6 +198,10 @@ def setup_logger(identifier, output_directory):

return logger

def log_config(logger, config):

stream = yaml.dump(config)
logger.debug(stream)

def parse_logs(log_directory):
error_logs = []
Expand Down
17 changes: 12 additions & 5 deletions montreal_forced_aligner/multiprocessing/alignment.py
Original file line number Diff line number Diff line change
Expand Up @@ -551,6 +551,7 @@ def ali_to_textgrid_func(model_directory, word_path, split_directory, job_name,

subprocess.call([thirdparty_binary('nbest-to-ctm'),
'--frame-shift={}'.format(frame_shift),
'--precision=3',
'ark:' + aligned_path,
word_ctm_path],
stderr=log_file)
Expand All @@ -560,6 +561,7 @@ def ali_to_textgrid_func(model_directory, word_path, split_directory, job_name,
stderr=log_file)
nbest_proc = subprocess.Popen([thirdparty_binary('nbest-to-ctm'),
'--frame-shift={}'.format(frame_shift),
'--precision=3',
"ark:-", phone_ctm_path],
stdin=phone_proc.stdout,
stderr=log_file)
Expand Down Expand Up @@ -633,7 +635,7 @@ def convert_ali_to_textgrids(align_config, output_directory, model_directory, di
phone_ctm[k] = v
else:
phone_ctm[k].update(v)
ctm_to_textgrid(word_ctm, phone_ctm, output_directory, corpus, dictionary)
ctm_to_textgrid(word_ctm, phone_ctm, output_directory, corpus, dictionary, frameshift=frame_shift)


def tree_stats_func(directory, ci_phones, mdl, feature_string, ali_path, job_name):
Expand Down Expand Up @@ -685,10 +687,15 @@ def tree_stats(directory, align_directory, split_directory, ci_phones, speakers,
tree_accs = [os.path.join(directory, '{}.treeacc'.format(x)) for x in range(len(speakers))]
log_path = os.path.join(directory, 'log', 'sum_tree_acc.log')
with open(log_path, 'w', encoding='utf8') as log_file:
subprocess.call([thirdparty_binary('sum-tree-stats'), os.path.join(directory, 'treeacc')] +
tree_accs, stderr=log_file)
for f in tree_accs:
os.remove(f)
tmp_stats_path = os.path.join(directory, 'tempacc')
output_stats_path = os.path.join(directory, 'treeacc')
for af in tree_accs:
if not os.path.exists(output_stats_path):
os.rename(af, output_stats_path)
continue
subprocess.call([thirdparty_binary('sum-tree-stats'), tmp_stats_path, output_stats_path, af], stderr=log_file)
os.remove(af)
os.rename(tmp_stats_path, output_stats_path)


def convert_alignments_func(directory, align_directory, job_name):
Expand Down
15 changes: 12 additions & 3 deletions montreal_forced_aligner/textgrid.py
Original file line number Diff line number Diff line change
Expand Up @@ -70,14 +70,23 @@ def ctm_to_textgrid(word_ctm, phone_ctm, out_directory, corpus, dictionary, fram
wordtier = IntervalTier(name='words', maxTime=maxtime)
phonetier = IntervalTier(name='phones', maxTime=maxtime)
phonetier_len = len(phone_ctm[k][speaker])
for interval in v:
for interval in sorted(v):
if maxtime - interval[1] < frameshift: # Fix rounding issues
interval[1] = maxtime
wordtier.add(*interval)
try:
wordtier.add(*interval)
except ValueError:

interval[0] = wordtier[-1].maxTime
wordtier.add(*interval)
for j, interval in enumerate(phone_ctm[k][speaker]):
if j == phonetier_len - 1: # sync last phone boundary to end of audio file
interval[1] = maxtime
phonetier.add(*interval)
try:
phonetier.add(*interval)
except ValueError:
interval[0] = phonetier[-1].maxTime
phonetier.add(*interval)
tg.append(wordtier)
tg.append(phonetier)
relative = corpus.file_directory_mapping[k]
Expand Down
59 changes: 45 additions & 14 deletions montreal_forced_aligner/trainers/base.py
Original file line number Diff line number Diff line change
Expand Up @@ -209,6 +209,15 @@ def align(self, subset, call_back=None):
align_data_directory = self.corpus.split_directory()
else:
align_data_directory = self.corpus.subset_directory(subset, self.feature_config)

subset_speaker_path = os.path.join(align_data_directory, 'included_speakers.txt')
if os.path.exists(subset_speaker_path):
speakers = []
with open(subset_speaker_path, 'r', encoding='utf8') as f:
for line in f:
speakers.append(line.strip())
else:
speakers = self.corpus.speakers
try:
log_dir = os.path.join(self.align_directory, 'log')
os.makedirs(log_dir, exist_ok=True)
Expand All @@ -222,10 +231,10 @@ def align(self, subset, call_back=None):
shutil.copyfile(os.path.join(self.train_directory, 'final.occs'),
os.path.join(self.align_directory, 'final.occs'))
compile_train_graphs(self.align_directory, self.dictionary.output_directory,
align_data_directory, self.corpus.speakers, self.corpus.num_jobs, self)
align_data_directory, speakers, self.corpus.num_jobs, self)
align('final', self.align_directory, align_data_directory,
self.dictionary.optional_silence_csl,
self.corpus.speakers, self.corpus.num_jobs, self, self.align_directory)
speakers, self.corpus.num_jobs, self, self.align_directory)
self.save(os.path.join(self.align_directory, 'acoustic_model.zip'))
except Exception as e:
with open(dirty_path, 'w'):
Expand Down Expand Up @@ -254,6 +263,15 @@ def train(self, call_back=None):
iters = tqdm(range(1, self.num_iterations))
else:
iters = range(1, self.num_iterations)

subset_speaker_path = os.path.join(self.data_directory, 'included_speakers.txt')
if os.path.exists(subset_speaker_path):
speakers = []
with open(subset_speaker_path, 'r', encoding='utf8') as f:
for line in f:
speakers.append(line.strip())
else:
speakers = self.corpus.speakers
try:
for i in iters:
model_path = os.path.join(self.train_directory, '{}.mdl'.format(i))
Expand All @@ -264,31 +282,40 @@ def train(self, call_back=None):
if i in self.realignment_iterations:
align(i, self.train_directory, self.data_directory,
self.dictionary.optional_silence_csl,
self.corpus.speakers, self.corpus.num_jobs, self)
speakers, self.corpus.num_jobs, self)
if self.debug:
compute_alignment_improvement(i, self, self.train_directory, self.corpus.speakers,
compute_alignment_improvement(i, self, self.train_directory, speakers,
self.corpus.num_jobs)
acc_stats(i, self.train_directory, self.data_directory, self.corpus.speakers, self.corpus.num_jobs,
acc_stats(i, self.train_directory, self.data_directory, speakers, self.corpus.num_jobs,
self)
log_path = os.path.join(self.log_directory, 'update.{}.log'.format(i))
with open(log_path, 'w') as logf:
with open(log_path, 'w') as log_file:
acc_files = [os.path.join(self.train_directory, '{}.{}.acc'.format(i, x))
for x in range(len(self.corpus.speakers))]
for x in range(len(speakers))]
tmp_stats_path = os.path.join(self.train_directory, 'temp.acc')
output_stats_path = os.path.join(self.train_directory, '{}.acc'.format(i))
for af in acc_files:
if not os.path.exists(output_stats_path):
os.rename(af, output_stats_path)
continue
sum_proc = subprocess.Popen([thirdparty_binary('gmm-sum-accs'),
tmp_stats_path,
output_stats_path, af],
stderr=log_file)
sum_proc.communicate()
os.remove(af)
os.rename(tmp_stats_path, output_stats_path)
est_proc = subprocess.Popen([thirdparty_binary('gmm-est'),
'--write-occs=' + occs_path,
'--mix-up=' + str(num_gauss), '--power=' + str(self.power),
model_path,
"{} - {}|".format(thirdparty_binary('gmm-sum-accs'),
' '.join(map(make_path_safe, acc_files))),
output_stats_path,
next_model_path],
stderr=logf)
stderr=log_file)
est_proc.communicate()
if not self.debug:
for f in acc_files:
os.remove(f)
if not os.path.exists(next_model_path):
raise (Exception('There was an error training in iteration {}, please check the logs.'.format(i)))
self.parse_log_directory(self.log_directory, i, self.corpus.speakers, call_back)
self.parse_log_directory(self.log_directory, i, speakers, call_back)
if i < self.final_gaussian_iteration:
num_gauss += self.gaussian_increment
shutil.copy(os.path.join(self.train_directory, '{}.mdl'.format(self.num_iterations)),
Expand All @@ -306,6 +333,10 @@ def train(self, call_back=None):
os.remove(os.path.join(self.train_directory, '{}.occs'.format(i)))
except FileNotFoundError:
pass
try:
os.remove(os.path.join(self.train_directory, '{}.acc'.format(i)))
except FileNotFoundError:
pass
except Exception as e:
with open(dirty_path, 'w'):
pass
Expand Down
Loading

0 comments on commit 83ccb5e

Please sign in to comment.