Skip to content
New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

2.2.12 #643

Merged
merged 4 commits into from
May 30, 2023
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension


Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
9 changes: 3 additions & 6 deletions .github/workflows/main.yml
Original file line number Diff line number Diff line change
Expand Up @@ -8,10 +8,6 @@ on:
repository_dispatch:
types: rebuild


env:
CACHE_NUMBER: 0 # increase to reset cache manually

concurrency:
group: run_tests-${{ github.ref }}
cancel-in-progress: true
Expand Down Expand Up @@ -41,12 +37,13 @@ jobs:
fetch-depth: 0

- name: Install Conda environment with Micromamba
uses: mamba-org/provision-with-micromamba@main
uses: mamba-org/setup-micromamba@v1
with:
environment-file: environment.yml
environment-name: mfa
extra-specs: |
create-args: >-
python=3.9
cache-environment: true

- name: Configure mfa
shell: bash -l {0}
Expand Down
1 change: 1 addition & 0 deletions Dockerfile
Original file line number Diff line number Diff line change
Expand Up @@ -5,6 +5,7 @@ RUN mkdir -p /mfa
RUN mamba env create -p /env -f docker_environment.yaml && conda clean -afy

COPY . /pkg
RUN conda run -p /env python -m pip install speechbrain
RUN conda run -p /env python -m pip install --no-deps /pkg

RUN useradd -ms /bin/bash mfauser
Expand Down
7 changes: 7 additions & 0 deletions docs/source/changelog/changelog_2.2.rst
Original file line number Diff line number Diff line change
Expand Up @@ -5,6 +5,13 @@
2.2 Changelog
*************

2.2.12
======

- Re-established support for sqlite for most aspects of MFA (some functionality requires using PostgreSQL)
- Added a configuration flag for `mfa configure --enable_use_postgres` and `mfa [command] ... --use_postgres` to use PostgreSQL as the database backend
- Fixed a bug where adapted acoustic models would not contain all the necessary metadata to be used

2.2.11
======

Expand Down
2 changes: 1 addition & 1 deletion docs/source/first_steps/index.rst
Original file line number Diff line number Diff line change
Expand Up @@ -178,7 +178,7 @@ Once we've validated the data, we can train an acoustic model (and output the al

mfa train ~/mfa_data/my_corpus ~/mfa_data/my_dictionary.txt ~/mfa_data/new_acoustic_model.zip # Export just the trained acoustic model
mfa train ~/mfa_data/my_corpus ~/mfa_data/my_dictionary.txt ~/mfa_data/my_corpus_aligned # Export just the training alignments
mfa train ~/mfa_data/my_corpus ~/mfa_data/my_dictionary.txt ~/mfa_data/new_acoustic_model.zip ~/mfa_data/my_corpus_aligned # Export both trained model and alignments
mfa train ~/mfa_data/my_corpus ~/mfa_data/my_dictionary.txt ~/mfa_data/new_acoustic_model.zip --output_directory ~/mfa_data/my_corpus_aligned # Export both trained model and alignments

As for other commands, if your data is large, you'll likely want to increase the number of jobs that MFA uses. For that and more advanced configuration of the training command, see :ref:`train_acoustic_model`.

Expand Down
1 change: 1 addition & 0 deletions environment.yml
Original file line number Diff line number Diff line change
Expand Up @@ -26,6 +26,7 @@ dependencies:
- sqlalchemy>=2.0
- pgvector
- pgvector-python
- sqlite
- postgresql
- psycopg2
- click
Expand Down
85 changes: 55 additions & 30 deletions montreal_forced_aligner/abc.py
Original file line number Diff line number Diff line change
Expand Up @@ -74,6 +74,9 @@ def __init__(self, args: MfaArguments):
self.log_path = self.args.log_path

def db_engine(self):
db_string = self.db_string
if not GLOBAL_CONFIG.current_profile.use_postgres:
db_string += "?mode=ro&nolock=1&uri=true"

return sqlalchemy.create_engine(
self.db_string,
Expand Down Expand Up @@ -216,7 +219,6 @@ def __init__(
**kwargs,
):
super().__init__(**kwargs)
self.db_backend = GLOBAL_CONFIG.database_backend

self._db_engine = None
self._db_path = None
Expand All @@ -228,7 +230,10 @@ def delete_database(self) -> None:
Reset all schemas
"""

MfaSqlBase.metadata.drop_all(self.db_engine)
if GLOBAL_CONFIG.current_profile.use_postgres:
MfaSqlBase.metadata.drop_all(self.db_engine)
elif self.db_path.exists():
os.remove(self.db_path)

def initialize_database(self) -> None:
"""
Expand All @@ -238,26 +243,29 @@ def initialize_database(self) -> None:
return
from montreal_forced_aligner.command_line.utils import check_databases

exist_check = True
try:
check_databases(self.identifier)
except Exception:
if GLOBAL_CONFIG.current_profile.use_postgres:
exist_check = True
try:
subprocess.check_call(
[
"createdb",
f"--host={GLOBAL_CONFIG.database_socket}",
self.identifier,
],
stderr=subprocess.DEVNULL,
stdout=subprocess.DEVNULL,
)
check_databases(self.identifier)
except Exception:
raise DatabaseError(
f"There was an error connecting to the {GLOBAL_CONFIG.current_profile_name} MFA database server. "
"Please ensure the server is initialized (mfa server init) or running (mfa server start)"
)
exist_check = False
try:
subprocess.check_call(
[
"createdb",
f"--host={GLOBAL_CONFIG.database_socket}",
self.identifier,
],
stderr=subprocess.DEVNULL,
stdout=subprocess.DEVNULL,
)
except Exception:
raise DatabaseError(
f"There was an error connecting to the {GLOBAL_CONFIG.current_profile_name} MFA database server. "
"Please ensure the server is initialized (mfa server init) or running (mfa server start)"
)
exist_check = False
else:
exist_check = self.db_path.exists()
self.database_initialized = True
if exist_check:
if GLOBAL_CONFIG.current_profile.clean or getattr(self, "dirty", False):
Expand All @@ -267,11 +275,12 @@ def initialize_database(self) -> None:
return

os.makedirs(self.output_directory, exist_ok=True)
with self.db_engine.connect() as conn:
conn.execute(sqlalchemy.text("CREATE EXTENSION IF NOT EXISTS vector"))
conn.execute(sqlalchemy.text("CREATE EXTENSION IF NOT EXISTS pg_trgm"))
conn.execute(sqlalchemy.text("CREATE EXTENSION IF NOT EXISTS pg_stat_statements"))
conn.commit()
if GLOBAL_CONFIG.current_profile.use_postgres:
with self.db_engine.connect() as conn:
conn.execute(sqlalchemy.text("CREATE EXTENSION IF NOT EXISTS vector"))
conn.execute(sqlalchemy.text("CREATE EXTENSION IF NOT EXISTS pg_trgm"))
conn.execute(sqlalchemy.text("CREATE EXTENSION IF NOT EXISTS pg_stat_statements"))
conn.commit()

MfaSqlBase.metadata.create_all(self.db_engine)

Expand Down Expand Up @@ -338,9 +347,19 @@ def current_workflow(self) -> CorpusWorkflow:
return wf

@property
def db_string(self):
def db_path(self) -> Path:
"""Connection path for sqlite database"""
return self.output_directory.joinpath(f"{self.identifier}.db")

@property
def db_string(self) -> str:
"""Connection string for the database"""
return f"postgresql+psycopg2://@/{self.identifier}?host={GLOBAL_CONFIG.database_socket}"
if GLOBAL_CONFIG.use_postgres:
return (
f"postgresql+psycopg2://@/{self.identifier}?host={GLOBAL_CONFIG.database_socket}"
)
else:
return f"sqlite:///{self.db_path}"

def construct_engine(self, **kwargs) -> sqlalchemy.engine.Engine:
"""
Expand All @@ -358,10 +377,16 @@ def construct_engine(self, **kwargs) -> sqlalchemy.engine.Engine:
:class:`~sqlalchemy.engine.Engine`
SqlAlchemy engine
"""
db_string = self.db_string
if not GLOBAL_CONFIG.use_postgres:
if kwargs.pop("read_only", False):
db_string += "?mode=ro&nolock=1&uri=true"
kwargs["poolclass"] = sqlalchemy.NullPool
else:
kwargs["pool_size"] = 10
kwargs["max_overflow"] = 10
e = sqlalchemy.create_engine(
self.db_string,
pool_size=10,
max_overflow=10,
db_string,
logging_name="main_process_engine",
**kwargs,
).execution_options(logging_token="main_process_engine")
Expand Down
1 change: 1 addition & 0 deletions montreal_forced_aligner/acoustic_modeling/base.py
Original file line number Diff line number Diff line change
Expand Up @@ -643,6 +643,7 @@ def export_model(self, output_model_path: Path) -> None:
)
acoustic_model.add_meta_file(self)
acoustic_model.add_model(self.working_directory)
acoustic_model.add_model(self.worker.phones_dir)
acoustic_model.add_pronunciation_models(
self.working_directory, self.worker.dictionary_base_names.values()
)
Expand Down
1 change: 1 addition & 0 deletions montreal_forced_aligner/acoustic_modeling/trainer.py
Original file line number Diff line number Diff line change
Expand Up @@ -750,6 +750,7 @@ def export_files(
Flag for including the original text of the corpus files as a tier
"""
self.align()
self.analyze_alignments()
super(TrainableAligner, self).export_files(
output_directory, output_format, include_original_text
)
Expand Down
18 changes: 18 additions & 0 deletions montreal_forced_aligner/alignment/adapting.py
Original file line number Diff line number Diff line change
Expand Up @@ -358,6 +358,23 @@ def meta(self) -> MetaDict:
"train_date": str(datetime.now()),
"features": self.feature_options,
"phone_set_type": str(self.phone_set_type),
"dictionaries": {
"names": sorted(self.dictionary_base_names.values()),
"default": self.dictionary_base_names[self._default_dictionary_id],
"silence_word": self.silence_word,
"use_g2p": self.use_g2p,
"oov_word": self.oov_word,
"bracketed_word": self.bracketed_word,
"laughter_word": self.laughter_word,
"clitic_marker": self.clitic_marker,
"position_dependent_phones": self.position_dependent_phones,
},
"oov_phone": self.oov_phone,
"optional_silence_phone": self.optional_silence_phone,
"silence_probability": self.silence_probability,
"initial_silence_probability": self.initial_silence_probability,
"final_silence_correction": self.final_silence_correction,
"final_non_silence_correction": self.final_non_silence_correction,
}
return data

Expand All @@ -377,6 +394,7 @@ def export_model(self, output_model_path: Path) -> None:
)
acoustic_model.add_meta_file(self)
acoustic_model.add_model(self.working_directory)
acoustic_model.add_model(self.phones_dir)
if directory:
os.makedirs(directory, exist_ok=True)
basename, _ = os.path.splitext(output_model_path)
Expand Down
Loading