Skip to content

Commit

Permalink
Black formatter examples (#291)
Browse files Browse the repository at this point in the history
* Formatted the examples folder

* Fixing out of date black line-length limit
  • Loading branch information
aviraljain99 committed Jun 17, 2022
1 parent 91a95fe commit 0672888
Show file tree
Hide file tree
Showing 5 changed files with 140 additions and 128 deletions.
75 changes: 39 additions & 36 deletions elpis/examples/cli/hft/create_dataset.py
Original file line number Diff line number Diff line change
Expand Up @@ -12,67 +12,70 @@

def main(dataset_name: str, reset: bool):
presets = {
'abui': {
'dataset_dir': '/datasets/abui/transcribed',
'importer_method': 'tier_name',
'importer_value': 'Phrase',
"abui": {
"dataset_dir": "/datasets/abui/transcribed",
"importer_method": "tier_name",
"importer_value": "Phrase",
},
'gk': {
'dataset_dir': '/datasets/gk',
'importer_method': 'tier_type',
'importer_value': 'tx',
"gk": {
"dataset_dir": "/datasets/gk",
"importer_method": "tier_type",
"importer_value": "tx",
},
"timit": {
"dataset_dir": "/datasets/timit/training_data",
"importer_method": "tier_name",
"importer_value": "default",
},
'timit': {
'dataset_dir': '/datasets/timit/training_data',
'importer_method': 'tier_name',
'importer_value': 'default',
}
}
logger.info(f'Using preset for {dataset_name}')
logger.info(f"Using preset for {dataset_name}")
logger.info(presets[dataset_name])

# Step 0
# ======
# Use or create the Elpis interface directory where all the associated files/objects are stored.
logger.info('Create interface')
elpis = Interface(path=Path('/state/of_origin'), use_existing=reset)
logger.info("Create interface")
elpis = Interface(path=Path("/state/of_origin"), use_existing=reset)

# Step 1
# ======
# Select Engine
logger.info('Set engine')
logger.info("Set engine")
from elpis.engines import ENGINES
elpis.set_engine(ENGINES['hft'])

elpis.set_engine(ENGINES["hft"])

# Step 2
# ======
# Setup a dataset to to train data on.
# Reuse dataset if it exists
logger.info(f'Current datasets {elpis.list_datasets()}')
logger.info(f"Current datasets {elpis.list_datasets()}")
if dataset_name not in elpis.list_datasets():
logger.info(f'Making new dataset {dataset_name}')
logger.info(f"Making new dataset {dataset_name}")
dataset = elpis.new_dataset(dataset_name)
logger.info(f"Adding data from {presets[dataset_name]['dataset_dir']}")
dataset.add_directory(presets[dataset_name]['dataset_dir'], extensions=['eaf', 'wav'])
logger.info('Select importer')
dataset.auto_select_importer() # Selects Elan because of eaf file.
logger.info('Set setting')
dataset.importer.set_setting(presets[dataset_name]['importer_method'], presets[dataset_name]['importer_value'])
logger.info('Process data')
dataset.add_directory(presets[dataset_name]["dataset_dir"], extensions=["eaf", "wav"])
logger.info("Select importer")
dataset.auto_select_importer() # Selects Elan because of eaf file.
logger.info("Set setting")
dataset.importer.set_setting(
presets[dataset_name]["importer_method"],
presets[dataset_name]["importer_value"],
)
logger.info("Process data")
dataset.process()
else:
logger.info(f'Use existing dataset {dataset_name}')
logger.info(f"Use existing dataset {dataset_name}")


if __name__ == '__main__':
parser = argparse.ArgumentParser(description='Prepare a dataset.')
parser.add_argument('--name',
default='abui',
type=str,
help='Which dataset to use?')
parser.add_argument('--reset',
action='store_false',
help='Reset state to create a new dataset with the given name.')
if __name__ == "__main__":
parser = argparse.ArgumentParser(description="Prepare a dataset.")
parser.add_argument("--name", default="abui", type=str, help="Which dataset to use?")
parser.add_argument(
"--reset",
action="store_false",
help="Reset state to create a new dataset with the given name.",
)
args = parser.parse_args()

main(dataset_name=args.name, reset=bool(args.reset))
89 changes: 48 additions & 41 deletions elpis/examples/cli/hft/train.py
Original file line number Diff line number Diff line change
Expand Up @@ -11,52 +11,56 @@

def main(dataset_name: str, reset: bool):
presets = {
'abui': {
'dataset_dir': '/datasets/abui/transcribed',
'importer_method': 'tier_name',
'importer_value': 'Phrase',
'model_name': 'abui'
"abui": {
"dataset_dir": "/datasets/abui/transcribed",
"importer_method": "tier_name",
"importer_value": "Phrase",
"model_name": "abui",
},
"timit": {
"dataset_dir": "/datasets/timit/training_data",
"importer_method": "tier_name",
"importer_value": "default",
"model_name": "timit",
},
'timit': {
'dataset_dir': '/datasets/timit/training_data',
'importer_method': 'tier_name',
'importer_value': 'default',
'model_name': 'timit'
}
}
logger.info(f'Using preset for {dataset_name}')
logger.info(f"Using preset for {dataset_name}")
logger.info(presets[dataset_name])

# Step 0
# ======
# Use or create the Elpis interface directory where all the associated files/objects are stored.
logger.info('Create interface')
elpis = Interface(path=Path('/state/of_origin'), use_existing=reset)
logger.info("Create interface")
elpis = Interface(path=Path("/state/of_origin"), use_existing=reset)

# Step 1
# ======
# Select Engine
logger.info('Set engine')
logger.info("Set engine")
from elpis.engines import ENGINES
elpis.set_engine(ENGINES['hft'])

elpis.set_engine(ENGINES["hft"])

# Step 2
# ======
# Setup a dataset to to train data on.
# Reuse dataset if it exists
if dataset_name not in elpis.list_datasets():
logger.info(f'Making new dataset {dataset_name}')
logger.info(f"Making new dataset {dataset_name}")
dataset = elpis.new_dataset(dataset_name)
logger.info(f"Adding data from {presets[dataset_name]['dataset_dir']}")
dataset.add_directory(presets[dataset_name]['dataset_dir'], extensions=['eaf', 'wav'])
logger.info('Select importer')
dataset.auto_select_importer() # Selects Elan because of eaf file.
logger.info('Set setting')
dataset.importer.set_setting(presets[dataset_name]['importer_method'], presets[dataset_name]['importer_value'])
logger.info('Process data')
dataset.add_directory(presets[dataset_name]["dataset_dir"], extensions=["eaf", "wav"])
logger.info("Select importer")
dataset.auto_select_importer() # Selects Elan because of eaf file.
logger.info("Set setting")
dataset.importer.set_setting(
presets[dataset_name]["importer_method"],
presets[dataset_name]["importer_value"],
)
logger.info("Process data")
dataset.process()
else:
logger.info(f'Use existing dataset {dataset_name}')
logger.info(f"Use existing dataset {dataset_name}")
dataset = elpis.get_dataset(dataset_name)

# Step 3
Expand All @@ -67,29 +71,32 @@ def main(dataset_name: str, reset: bool):
while model_name in elpis.list_models():
i = i + 1
model_name = f'{presets[dataset_name]["model_name"]}{i}'
logger.info(f'Making new model {model_name}')
logger.info(f"Making new model {model_name}")
model = elpis.new_model(model_name)
logger.info(f'Made model {model.hash}')
logger.info(f"Made model {model.hash}")
# TODO add model settings
logger.info('Linking dataset')
logger.info("Linking dataset")
model.link_dataset(dataset)
if Path('/state/of_origin/models/latest').is_dir():
os.remove('/state/of_origin/models/latest')
os.symlink(f'/state/of_origin/models/{model.hash}', '/state/of_origin/models/latest', target_is_directory=True)
logger.add(f'/state/models/{model.hash}/train.log')
logger.info('Start training. This may take a while')
if Path("/state/of_origin/models/latest").is_dir():
os.remove("/state/of_origin/models/latest")
os.symlink(
f"/state/of_origin/models/{model.hash}",
"/state/of_origin/models/latest",
target_is_directory=True,
)
logger.add(f"/state/models/{model.hash}/train.log")
logger.info("Start training. This may take a while")
model.train()


if __name__ == '__main__':
parser = argparse.ArgumentParser(description='Prepare a dataset and train a model.')
parser.add_argument('--name',
default='abui',
type=str,
help='Which dataset to use?')
parser.add_argument('--reset',
action='store_false',
help='Reset state to create a new dataset and model.')
if __name__ == "__main__":
parser = argparse.ArgumentParser(description="Prepare a dataset and train a model.")
parser.add_argument("--name", default="abui", type=str, help="Which dataset to use?")
parser.add_argument(
"--reset",
action="store_false",
help="Reset state to create a new dataset and model.",
)
args = parser.parse_args()

main(dataset_name=args.name, reset=bool(args.reset))
62 changes: 31 additions & 31 deletions elpis/examples/cli/hft/transcribe.py
Original file line number Diff line number Diff line change
Expand Up @@ -14,64 +14,64 @@ def main(model_name: str, infer_path: str):
# Step 0
# ======
# Use the Elpis interface directory where all the associated files/objects are stored.
logger.info('Create interface')
elpis = Interface(path=Path('/state/of_origin'), use_existing=True)
logger.info("Create interface")
elpis = Interface(path=Path("/state/of_origin"), use_existing=True)

# Step 1
# ======
# Select Engine
logger.info('Set engine')
logger.info("Set engine")
from elpis.engines import ENGINES
elpis.set_engine(ENGINES['hft'])

elpis.set_engine(ENGINES["hft"])

# Step 2
# ======
# Load Model
logger.info(f'Get elpis model for {model_name}')
logger.info(f"Get elpis model for {model_name}")
model = elpis.get_model(model_name)

# Step 3
# ======
# Make a transcription interface and transcribe audio.
i = 0
base_name = 'tx'
tx_name = f'{base_name}{i}'
base_name = "tx"
tx_name = f"{base_name}{i}"
while tx_name in elpis.list_transcriptions():
i = i + 1
tx_name = f'{base_name}{i}'
logger.info(f'Making new transcriber {tx_name}')
tx_name = f"{base_name}{i}"
logger.info(f"Making new transcriber {tx_name}")
transcription = elpis.new_transcription(tx_name)
logger.info(f'Made transcriber {transcription.hash}')
logger.info('Linking model')
logger.info(f"Made transcriber {transcription.hash}")
logger.info("Linking model")
transcription.link(model)

if Path('/state/of_origin/transcriptions/latest').is_dir():
os.remove('/state/of_origin/transcriptions/latest')
os.symlink(f'/state/of_origin/transcriptions/{transcription.hash}',
'/state/of_origin/transcriptions/latest',
target_is_directory=True)
if Path("/state/of_origin/transcriptions/latest").is_dir():
os.remove("/state/of_origin/transcriptions/latest")
os.symlink(
f"/state/of_origin/transcriptions/{transcription.hash}",
"/state/of_origin/transcriptions/latest",
target_is_directory=True,
)

logger.info(f'Load audio from {infer_path}')
with open(infer_path, 'rb') as infer_audio_file:
logger.info(f"Load audio from {infer_path}")
with open(infer_path, "rb") as infer_audio_file:
transcription.prepare_audio(infer_audio_file)

logger.info('Transcribe')
logger.info("Transcribe")
transcription.transcribe()
logger.info(transcription.text())


if __name__ == '__main__':
parser = argparse.ArgumentParser(description='Transcribe a file.')
parser.add_argument('--name',
default='abui0',
type=str,
help='Which dataset to use?'
)
parser.add_argument('--infer',
default='/datasets/abui/untranscribed/audio.wav',
type=str,
help='Which file to transcribe?'
)
if __name__ == "__main__":
parser = argparse.ArgumentParser(description="Transcribe a file.")
parser.add_argument("--name", default="abui0", type=str, help="Which dataset to use?")
parser.add_argument(
"--infer",
default="/datasets/abui/untranscribed/audio.wav",
type=str,
help="Which file to transcribe?",
)
args = parser.parse_args()

main(model_name=args.name, infer_path=args.infer)
29 changes: 15 additions & 14 deletions elpis/examples/cli/kaldi/train.py
Original file line number Diff line number Diff line change
Expand Up @@ -21,28 +21,29 @@
from pathlib import Path
from loguru import logger

DATASET_DIR = '/datasets/abui/transcribed'
DATASET_NAME = 'ds'
IMPORTER_METHOD = 'tier_name'
IMPORTER_VALUE = 'Phrase'
L2S_PATH = '/datasets/abui/letter_to_sound.txt'
PRON_DICT_NAME = 'pd'
MODEL_NAME = 'mx'
TX_NAME = 'tx'
INFER_FILE_PATH = '/datasets/abui/untranscribed/audio.wav'
DATASET_DIR = "/datasets/abui/transcribed"
DATASET_NAME = "ds"
IMPORTER_METHOD = "tier_name"
IMPORTER_VALUE = "Phrase"
L2S_PATH = "/datasets/abui/letter_to_sound.txt"
PRON_DICT_NAME = "pd"
MODEL_NAME = "mx"
TX_NAME = "tx"
INFER_FILE_PATH = "/datasets/abui/untranscribed/audio.wav"

# Step 0
# ======
# Create a Kaldi interface directory (where all the associated files/objects
# will be stored).
elpis = Interface(path=Path('/state/of_origin'), use_existing=True)
elpis = Interface(path=Path("/state/of_origin"), use_existing=True)


# Step 1
# ======
# Select Engine
from elpis.engines import ENGINES
engine = ENGINES['kaldi']

engine = ENGINES["kaldi"]
elpis.set_engine(engine)


Expand All @@ -53,8 +54,8 @@
if DATASET_NAME not in elpis.list_datasets():
logger.info("Making new dataset")
dataset = elpis.new_dataset(DATASET_NAME)
dataset.add_directory(DATASET_DIR, extensions=['eaf', 'wav'])
dataset.auto_select_importer() # Selects Elan because of eaf file.
dataset.add_directory(DATASET_DIR, extensions=["eaf", "wav"])
dataset.auto_select_importer() # Selects Elan because of eaf file.
dataset.importer.set_setting(IMPORTER_METHOD, IMPORTER_VALUE)
dataset.process()
else:
Expand Down Expand Up @@ -87,7 +88,7 @@
model.link_dataset(dataset)
model.link_pron_dict(pron_dict)
model.build_structure()
model.train() # may take a while
model.train() # may take a while
else:
logger.info("Use existing model")
model = elpis.get_model(MODEL_NAME)

0 comments on commit 0672888

Please sign in to comment.