Skip to content

Commit

Permalink
Merge 63e7f24 into 61b7329
Browse files Browse the repository at this point in the history
  • Loading branch information
mmcauliffe committed Apr 20, 2021
2 parents 61b7329 + 63e7f24 commit bf10ecb
Show file tree
Hide file tree
Showing 26 changed files with 129 additions and 247 deletions.
1 change: 1 addition & 0 deletions .deepsource.toml
Original file line number Diff line number Diff line change
Expand Up @@ -8,3 +8,4 @@ enabled = true

[analyzers.meta]
runtime_version = "3.x.x"
max_line_length = 120
14 changes: 14 additions & 0 deletions docs/source/changelog.rst
Original file line number Diff line number Diff line change
Expand Up @@ -6,6 +6,20 @@
Changelog
=========

2.0.0a12
--------

- Updated how sample rates are handled. MFA now generates features between 80 Hz and 7800 Hz and allows downsampling and
upsampling, so there will be no more errors or warnings about unsupported sample rates or speakers with different sample
rates
- Fixed a bug where some options for generating MFCCs weren't properly getting picked up (e.g., snip-edges)
- (EXPERIMENTAL) Added better support for varying frame shift. In :code:`mfa align`, you can now add a flag of :code:`--frame_shift 1` to align
with millisecond shifts between frames. Please note this is more on the experimental side, as it increases computational
time significantly and I don't know fully the correct options to use for :code:`self_loop_scale`, :code:`transition_scale`,
and :code:`acoustic_scale` to generate good alignments.
- Fixed a bug in G2P training with relative paths for output model
- Cleaned up validator output

2.0.0a11
--------

Expand Down
2 changes: 1 addition & 1 deletion montreal_forced_aligner/__init__.py
Original file line number Diff line number Diff line change
@@ -1,6 +1,6 @@
__ver_major__ = 2
__ver_minor__ = 0
__ver_patch__ = '0a11'
__ver_patch__ = '0a13'
__version__ = "{}.{}.{}".format(__ver_major__, __ver_minor__, __ver_patch__)

__all__ = ['aligner', 'command_line', 'models', 'corpus', 'config', 'dictionary', 'exceptions',
Expand Down
4 changes: 3 additions & 1 deletion montreal_forced_aligner/command_line/align.py
Original file line number Diff line number Diff line change
Expand Up @@ -12,7 +12,7 @@
from montreal_forced_aligner.config import TEMP_DIR, align_yaml_to_config, load_basic_align
from montreal_forced_aligner.utils import get_available_acoustic_languages, get_pretrained_acoustic_path, \
get_available_dict_languages, get_dictionary_path
from montreal_forced_aligner.helper import setup_logger
from montreal_forced_aligner.helper import setup_logger, log_config
from montreal_forced_aligner.exceptions import ArgumentError


Expand All @@ -39,6 +39,8 @@ def align_corpus(args, unknown_args=None):
print('Cleaning old directory!')
shutil.rmtree(data_directory, ignore_errors=True)
logger = setup_logger(command, data_directory)
logger.debug('ALIGN CONFIG:')
log_config(logger, align_config)
if os.path.exists(conf_path):
with open(conf_path, 'r') as f:
conf = yaml.load(f, Loader=yaml.SafeLoader)
Expand Down
3 changes: 2 additions & 1 deletion montreal_forced_aligner/command_line/g2p.py
Original file line number Diff line number Diff line change
Expand Up @@ -5,7 +5,8 @@
from textgrid import TextGrid, IntervalTier

from montreal_forced_aligner.multiprocessing.corpus import parse_transcription
from montreal_forced_aligner.corpus.align_corpus import load_text, AlignableCorpus
from montreal_forced_aligner.corpus.align_corpus import AlignableCorpus
from montreal_forced_aligner.helper import load_text

from montreal_forced_aligner.g2p.generator import PyniniDictionaryGenerator as Generator
from montreal_forced_aligner.models import G2PModel
Expand Down
6 changes: 5 additions & 1 deletion montreal_forced_aligner/command_line/train_and_align.py
Original file line number Diff line number Diff line change
Expand Up @@ -10,7 +10,7 @@
from montreal_forced_aligner.aligner import TrainableAligner
from montreal_forced_aligner.config import TEMP_DIR, train_yaml_to_config, load_basic_train
from montreal_forced_aligner.utils import get_available_dict_languages, get_dictionary_path
from montreal_forced_aligner.helper import setup_logger
from montreal_forced_aligner.helper import setup_logger, log_config

from montreal_forced_aligner.exceptions import ArgumentError

Expand Down Expand Up @@ -38,6 +38,10 @@ def align_corpus(args, unknown_args=None):
print('Cleaning old directory!')
shutil.rmtree(data_directory, ignore_errors=True)
logger = setup_logger(command, data_directory)
logger.debug('TRAIN CONFIG:')
log_config(logger, train_config)
logger.debug('ALIGN CONFIG:')
log_config(logger, align_config)
if args.debug:
logger.warning('Running in DEBUG mode, may have impact on performance and disk usage.')
if os.path.exists(conf_path):
Expand Down
4 changes: 3 additions & 1 deletion montreal_forced_aligner/command_line/train_dictionary.py
Original file line number Diff line number Diff line change
Expand Up @@ -12,7 +12,7 @@
from montreal_forced_aligner.config import TEMP_DIR, align_yaml_to_config, load_basic_align
from montreal_forced_aligner.utils import get_available_acoustic_languages, get_pretrained_acoustic_path, \
get_available_dict_languages, get_dictionary_path
from montreal_forced_aligner.helper import setup_logger
from montreal_forced_aligner.helper import setup_logger, log_config
from montreal_forced_aligner.exceptions import ArgumentError


Expand All @@ -37,6 +37,8 @@ def train_dictionary(args):
print('Cleaning old directory!')
shutil.rmtree(data_directory, ignore_errors=True)
logger = setup_logger(command, data_directory)
logger.debug('ALIGN CONFIG:')
log_config(logger, align_config)
if os.path.exists(conf_path):
with open(conf_path, 'r') as f:
conf = yaml.load(f, Loader=yaml.SafeLoader)
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -9,7 +9,7 @@
from montreal_forced_aligner.dictionary import Dictionary
from montreal_forced_aligner.aligner import PretrainedAligner
from montreal_forced_aligner.config import TEMP_DIR, train_yaml_to_config, load_basic_train_ivector
from montreal_forced_aligner.helper import setup_logger
from montreal_forced_aligner.helper import setup_logger, log_config
from montreal_forced_aligner.utils import get_available_acoustic_languages, get_pretrained_acoustic_path, \
get_available_dict_languages, get_dictionary_path
from montreal_forced_aligner.models import AcousticModel
Expand Down Expand Up @@ -38,7 +38,10 @@ def train_ivector(args):
print('Cleaning old directory!')
shutil.rmtree(data_directory, ignore_errors=True)
logger = setup_logger(command, data_directory)

logger.debug('TRAIN CONFIG:')
log_config(logger, train_config)
logger.debug('ALIGN CONFIG:')
log_config(logger, align_config)
if os.path.exists(conf_path):
with open(conf_path, 'r') as f:
conf = yaml.load(f, Loader=yaml.SafeLoader)
Expand Down
4 changes: 3 additions & 1 deletion montreal_forced_aligner/command_line/transcribe.py
Original file line number Diff line number Diff line change
Expand Up @@ -9,7 +9,7 @@
from montreal_forced_aligner.dictionary import Dictionary
from montreal_forced_aligner.transcriber import Transcriber
from montreal_forced_aligner.models import AcousticModel, LanguageModel, FORMAT
from montreal_forced_aligner.helper import setup_logger
from montreal_forced_aligner.helper import setup_logger, log_config
from montreal_forced_aligner.config import TEMP_DIR, transcribe_yaml_to_config, load_basic_transcribe, save_config
from montreal_forced_aligner.utils import get_available_acoustic_languages, get_pretrained_acoustic_path, \
get_available_lm_languages, get_pretrained_language_model_path, \
Expand Down Expand Up @@ -37,6 +37,8 @@ def transcribe_corpus(args):
print('Cleaning old directory!')
shutil.rmtree(data_directory, ignore_errors=True)
logger = setup_logger(command, data_directory)
logger.debug('TRANSCRIBE CONFIG:')
log_config(logger, transcribe_config)
os.makedirs(data_directory, exist_ok=True)
os.makedirs(args.output_directory, exist_ok=True)
os.makedirs(data_directory, exist_ok=True)
Expand Down
1 change: 1 addition & 0 deletions montreal_forced_aligner/config/align_config.py
Original file line number Diff line number Diff line change
Expand Up @@ -41,6 +41,7 @@ def update(self, data):

def update_from_args(self, args):
super(AlignConfig, self).update_from_args(args)
self.feature_config.update_from_args(args)
if self.retry_beam <= self.beam:
self.retry_beam = self.beam * 4

Expand Down
56 changes: 11 additions & 45 deletions montreal_forced_aligner/corpus/align_corpus.py
Original file line number Diff line number Diff line change
Expand Up @@ -4,9 +4,9 @@
import re
from collections import Counter
from textgrid import TextGrid, IntervalTier
from ..helper import load_text, output_mapping, save_groups, filter_scp, load_scp
from ..helper import output_mapping, save_groups, filter_scp, load_scp

from ..exceptions import SampleRateError, CorpusError, WavReadError, SampleRateMismatchError, \
from ..exceptions import CorpusError, WavReadError, \
BitDepthError, TextParseError, TextGridParseError

from .base import BaseCorpus, find_exts
Expand Down Expand Up @@ -65,11 +65,14 @@ def __init__(self, directory, output_directory,

self.loaded_from_temp = self._load_from_temp()
if not self.loaded_from_temp:
print('loading from source')
if self.use_mp:
self.logger.debug('Loading from source with multiprocessing')
self._load_from_source_mp()
else:
self.logger.debug('Loading from source without multiprocessing')
self._load_from_source()
else:
self.logger.debug('Successfully loaded from temporary files')
self.check_warnings()
self.find_best_groupings()

Expand Down Expand Up @@ -129,7 +132,6 @@ def _load_from_temp(self):
new_w = re.split(r"[-']", w)
self.word_counts.update(new_w + [w])
self.text_mapping[utt] = ' '.join(text)
self.sample_rates = {int(k): v if isinstance(v, list) else [v] for k,v in load_scp(sr_path).items()}
self.utt_wav_mapping = load_scp(wav_path)
self.wav_info = load_scp(wav_info_path, float)
self.utt_text_file_mapping = load_scp(text_file_path)
Expand Down Expand Up @@ -189,7 +191,7 @@ def _load_from_source_mp(self):
else:
self.no_transcription_files.append(wav_path)
continue
job_queue.put((file_name, wav_path, transcription_path, relative_path, self.speaker_characters, self.temp_directory))
job_queue.put((file_name, wav_path, transcription_path, relative_path, self.speaker_characters))
job_queue.join()
stopped.stop()
for p in procs:
Expand All @@ -204,7 +206,6 @@ def _load_from_source_mp(self):
utt_name = info['utt_name']
speaker_name = info['speaker_name']
wav_info = info['wav_info']
sr = wav_info['sample_rate']
if utt_name in self.utt_wav_mapping:
ind = 0
fixed_utt_name = utt_name
Expand All @@ -218,22 +219,19 @@ def _load_from_source_mp(self):
for w in words:
new_w = re.split(r"[-']", w)
self.word_counts.update(new_w + [w])
self.wav_files.append(file_name)
self.text_mapping[utt_name] = ' '.join(words)
self.utt_text_file_mapping[utt_name] = info['text_file']
self.speak_utt_mapping[speaker_name].append(utt_name)
self.utt_wav_mapping[utt_name] = info['wav_path']
self.sample_rates[sr].add(speaker_name)
self.utt_speak_mapping[utt_name] = speaker_name
self.file_directory_mapping[utt_name] = info['relative_path']
self.lab_count += 1
else:
wav_info = info['wav_info']
sr = wav_info['sample_rate']
file_name = info['recording_name']
self.wav_files.append(file_name)
self.speaker_ordering[file_name] = info['speaker_ordering']
for s in info['speaker_ordering']:
self.sample_rates[sr].add(s)
self.segments.update(info['segments'])
self.utt_wav_mapping.update(info['utt_wav_mapping'])
self.file_utt_mapping.update(info['file_utt_mapping'])
Expand All @@ -255,7 +253,7 @@ def _load_from_source_mp(self):
self.file_directory_mapping[fn] = info['relative_path']
self.tg_count += 1
self.wav_info[file_name] = [wav_info['num_channels'], wav_info['sample_rate'], wav_info['duration']]
for k in ['wav_read_errors', 'unsupported_sample_rate', 'unsupported_bit_depths',
for k in ['wav_read_errors', 'unsupported_bit_depths',
'decode_error_files', 'textgrid_read_errors']:
if hasattr(self, k):
if k in return_dict:
Expand All @@ -280,7 +278,6 @@ def _load_from_source(self):
utt_name = info['utt_name']
speaker_name = info['speaker_name']
wav_info = info['wav_info']
sr = wav_info['sample_rate']
if utt_name in self.utt_wav_mapping:
ind = 0
fixed_utt_name = utt_name
Expand All @@ -298,14 +295,11 @@ def _load_from_source(self):
self.utt_text_file_mapping[utt_name] = lab_path
self.speak_utt_mapping[speaker_name].append(utt_name)
self.utt_wav_mapping[utt_name] = wav_path
self.sample_rates[sr].add(speaker_name)
self.utt_speak_mapping[utt_name] = speaker_name
self.file_directory_mapping[utt_name] = relative_path
self.lab_count += 1
except WavReadError:
self.wav_read_errors.append(wav_path)
except SampleRateError:
self.unsupported_sample_rate.append(wav_path)
except BitDepthError:
self.unsupported_bit_depths.append(wav_path)
except TextParseError:
Expand All @@ -316,13 +310,10 @@ def _load_from_source(self):
tg_path = os.path.join(root, tg_name)
try:
info = parse_textgrid_file(file_name, wav_path, tg_path, relative_path,
self.speaker_characters, self.temp_directory)
self.speaker_characters)
wav_info = info['wav_info']
sr = wav_info['sample_rate']
self.wav_files.append(file_name)
self.speaker_ordering[file_name] = info['speaker_ordering']
for s in info['speaker_ordering']:
self.sample_rates[sr].add(s)
self.segments.update(info['segments'])
self.utt_wav_mapping.update(info['utt_wav_mapping'])
self.utt_text_file_mapping.update(info['utt_text_file_mapping'])
Expand All @@ -343,8 +334,6 @@ def _load_from_source(self):
self.tg_count += 1
except WavReadError:
self.wav_read_errors.append(wav_path)
except SampleRateError:
self.unsupported_sample_rate.append(wav_path)
except BitDepthError:
self.unsupported_bit_depths.append(wav_path)
except TextGridParseError as e:
Expand All @@ -358,30 +347,7 @@ def _load_from_source(self):

def check_warnings(self):
self.issues_check = self.ignored_utterances or self.no_transcription_files or \
self.textgrid_read_errors or self.unsupported_sample_rate or self.decode_error_files

bad_speakers = []
for speaker in self.speak_utt_mapping.keys():
count = 0
for k, v in self.sample_rates.items():
if speaker in v:
count += 1
if count > 1:
bad_speakers.append(speaker)
if bad_speakers:
msg = 'The following speakers had multiple speaking rates: {}. ' \
'Please make sure that each speaker has a consistent sampling rate.'.format(', '.join(bad_speakers))
self.logger.error(msg)
raise (SampleRateMismatchError(msg))

if len(self.speak_utt_mapping) < self.num_jobs:
self.num_jobs = len(self.speak_utt_mapping)
if self.num_jobs < len(self.sample_rates.keys()):
self.num_jobs = len(self.sample_rates.keys())
msg = 'The number of jobs was set to {}, due to the different sample rates in the dataset. ' \
'If you would like to use fewer parallel jobs, ' \
'please resample all wav files to the same sample rate.'.format(self.num_jobs)
self.logger.warning(msg)
self.textgrid_read_errors or self.decode_error_files

def save_text_file(self, file_name):
if self.segments:
Expand Down
42 changes: 13 additions & 29 deletions montreal_forced_aligner/corpus/base.py
Original file line number Diff line number Diff line change
Expand Up @@ -137,6 +137,10 @@ def __init__(self, directory, output_directory,
if os.path.exists(feat_path):
self.feat_mapping = load_scp(feat_path)

@property
def speakers(self):
return sorted(self.speak_utt_mapping.keys())

def add_utterance(self, utterance, speaker, file, text, wav_file=None, seg=None):
if seg is not None:
self.segments[utterance] = seg
Expand Down Expand Up @@ -171,35 +175,15 @@ def delete_utterance(self, utterance):
del self.utt_wav_mapping[utterance]

def find_best_groupings(self):
num_sample_rates = len(self.sample_rates.keys())
jobs_per_sample_rate = {x: 1 for x in self.sample_rates.keys()}
remaining_jobs = self.num_jobs - num_sample_rates
while remaining_jobs > 0:
min_num = min(jobs_per_sample_rate.values())
addable = sorted([k for k, v in jobs_per_sample_rate.items() if v == min_num],
key=lambda x: -1 * len(self.sample_rates[x]))
jobs_per_sample_rate[addable[0]] += 1
remaining_jobs -= 1
self.speaker_groups = []
self.frequency_configs = []
job_num = 0
for k, v in jobs_per_sample_rate.items():
speakers = sorted(self.sample_rates[k])

groups = [[] for _ in range(v)]

configs = [(job_num + x, {'sample-frequency': k, 'low-freq': 20, 'high-freq': 7800}) for x in range(v)]
ind = 0
while speakers:
s = speakers.pop(0)
groups[ind].append(s)
ind += 1
if ind >= v:
ind = 0

job_num += v
self.speaker_groups.extend(groups)
self.frequency_configs.extend(configs)
if len(self.speakers) < self.num_jobs:
self.num_jobs = len(self.speakers)
self.speaker_groups = [[] for _ in range(self.num_jobs)]
job_ind = 0
for s in self.speakers:
self.speaker_groups[job_ind].append(s)
job_ind += 1
if job_ind == self.num_jobs:
job_ind = 0
self.groups = []
for x in self.speaker_groups:
g = []
Expand Down
Loading

0 comments on commit bf10ecb

Please sign in to comment.