Skip to content

Commit

Permalink
Replace print statements with loguru library (#271)
Browse files Browse the repository at this point in the history
* Move non-file prints over to logging

* migrate to loguru

* migrate to loguru

* Removed text length

* remove all two-argument logger calls

* remove all two-argument logger calls

* Add logger train.log

* Add logger train.log

* Redirect stderr and print out shell

* Revert

* Add Kaldi printouts to logs

* Add Kaldi printouts to logs

* Add Kaldi printouts to logs

* Add Kaldi printouts to logs

* Add Kaldi printouts to python console

* Add Kaldi printouts to python console

* Updated logging behaviour

* Revert to simpler behaviour

* Addressed PR comments

* Make (now longer) log output more readable

Co-authored-by: Ben Foley <ben@cbmm.io>
  • Loading branch information
mattchrlw and benfoley committed Dec 10, 2021
1 parent 4e4fd57 commit 1f488f0
Show file tree
Hide file tree
Showing 31 changed files with 11,279 additions and 10,867 deletions.
18 changes: 10 additions & 8 deletions elpis/__init__.py
Original file line number Diff line number Diff line change
@@ -1,4 +1,7 @@
import os
import logging
from loguru import logger

from flask import redirect
from . import endpoints
from .app import Flask
Expand All @@ -18,6 +21,11 @@ def create_app(test_config=None):
# Variable to control the use of a proxy to support webpackdevserver
WEBPACK_DEV_SERVER_PROXY = os.environ.get("WEBPACK_DEV_SERVER_PROXY", None)

log = logging.getLogger('werkzeug')
log.setLevel(logging.DEBUG)
# Prevent the HTTP request logs polluting more important train logs
log.disabled = True

if WEBPACK_DEV_SERVER_PROXY:
app = Flask(__name__,
instance_relative_config=True,
Expand All @@ -38,20 +46,14 @@ def create_app(test_config=None):
# static_dir = static_dir_build
# else:
# static_dir = static_dir_watch
print('using static_dir:', static_dir)
logger.info(f'using static_dir: {static_dir}')
# Create a custom Flask instance defined in the app.py file. Same as a
# normal Flask class but with a specialised blueprint function.
app = Flask(__name__,
instance_relative_config=True,
static_folder=GUI_BUILD_DIR + static_dir,
static_url_path=static_dir)

import logging
log = logging.getLogger('werkzeug')
log.setLevel(logging.DEBUG)
# Prevent the HTTP request logs polluting more important train logs
log.disabled = True

# When making this multi-user, the secret key would require to be a secure hash.
app.config.from_mapping(
SECRET_KEY='dev'
Expand Down Expand Up @@ -105,7 +107,7 @@ def index_file():
@app.route('/', defaults={'path': ''})
@app.route("/<path:path>")
def index(path):
print('in index with:', path)
logger.info(f'in index with: {path}')
if (WEBPACK_DEV_SERVER_PROXY):
# If we are running the webpack dev server,
# We proxy webpack requests through to the dev server
Expand Down
7 changes: 4 additions & 3 deletions elpis/endpoints/model.py
Original file line number Diff line number Diff line change
@@ -1,6 +1,7 @@
from typing import Callable, Dict
from flask import request, current_app as app, jsonify
from ..blueprint import Blueprint
from loguru import logger
import subprocess
from elpis.engines.common.objects.model import Model
from elpis.engines.common.errors import InterfaceError
Expand Down Expand Up @@ -33,7 +34,7 @@ def new():
interface = app.config['INTERFACE']
try:
model = interface.new_model(request.json["name"])
print(f"New model created {model.name} {model.hash}")
logger.info(f"New model created {model.name} {model.hash}")
except InterfaceError as e:
return jsonify({
"status": 500,
Expand Down Expand Up @@ -111,7 +112,7 @@ def build_data(model: Model):
@bp.route("/train", methods=['GET'])
def train():
def setup(model: Model):
model.train(on_complete=lambda: print('Trained model!'))
model.train(on_complete=lambda: logger.info('Trained model!'))

def build_data(model: Model):
return {
Expand Down Expand Up @@ -148,7 +149,7 @@ def results():
try:
results = model.get_train_results()
except FileNotFoundError:
print("Results file not found.")
logger.error("Results file not found.")
return jsonify(MISSING_LOG_RESPONSE)
data = {
"results": results
Expand Down
4 changes: 2 additions & 2 deletions elpis/endpoints/pron_dict.py
Original file line number Diff line number Diff line change
Expand Up @@ -4,7 +4,7 @@
from elpis.engines import Interface
from elpis.engines.common.objects.pron_dict import PronDict
from elpis.engines.common.errors import InterfaceError

from loguru import logger

bp = Blueprint("pron_dict", __name__, url_prefix="/pron-dict")

Expand All @@ -19,7 +19,7 @@ def new():
"status": 500,
"error": e.human_message
})
print(f"****{request.json['name']}****")
logger.info(f"****{request.json['name']}****")
dataset = interface.get_dataset(request.json['dataset_name'])
pron_dict.link(dataset)
app.config['CURRENT_PRON_DICT'] = pron_dict
Expand Down
7 changes: 4 additions & 3 deletions elpis/endpoints/transcription.py
Original file line number Diff line number Diff line change
@@ -1,11 +1,12 @@
from flask import request, current_app as app, jsonify
from ..blueprint import Blueprint
from loguru import logger

from elpis.engines import Interface
from elpis.engines.common.objects.model import Model
from elpis.engines.common.objects.transcription import Transcription
from elpis.engines.common.utilities import hasher


bp = Blueprint("transcription", __name__, url_prefix="/transcription")

# TODO transcriptions have no name
Expand All @@ -17,7 +18,7 @@ def new():
transcription.link(model)
app.config['CURRENT_TRANSCRIPTION'] = transcription
file = request.files['file']
transcription.prepare_audio(file, on_complete=lambda: print('Prepared audio file!'))
transcription.prepare_audio(file, on_complete=lambda: logger.info('Prepared audio file!'))
data = {
"status": transcription.status,
"originalFilename": file.filename
Expand All @@ -31,7 +32,7 @@ def new():
@bp.route("/transcribe", methods=['GET'])
def transcribe():
transcription: Transcription = app.config['CURRENT_TRANSCRIPTION']
transcription.transcribe(on_complete=lambda: print('Transcribed text!'))
transcription.transcribe(on_complete=lambda: logger.info('Transcribed text!'))
data = {
"status": transcription.status,
"stage_status": transcription.stage_status
Expand Down
13 changes: 7 additions & 6 deletions elpis/engines/common/input/clean_json.py
Original file line number Diff line number Diff line change
Expand Up @@ -21,6 +21,7 @@
import nltk
from argparse import ArgumentParser
from langid.langid import LanguageIdentifier, model
from loguru import logger
from nltk.corpus import words
from typing import Dict, List, Set
from ..utilities import load_json_file, write_data_to_json_file
Expand Down Expand Up @@ -96,7 +97,7 @@ def are_words_valid(clean_words: List[str],

# Exclude utterance if > 10% english
if remove_english and len(clean_words) > 0 and english_word_count / len(clean_words) > 0.1:
# print(round(english_word_count / len(clean_words)), trans, file=sys.stderr)
# logger.debug(round(english_word_count / len(clean_words)), trans, file=sys.stderr)
return False

# Exclude utterance if langid thinks its english
Expand Down Expand Up @@ -203,14 +204,14 @@ def extract_additional_corpora(additional_corpus: str = '',
:param punctuation_to_collapse_by: punctuation marks to strip
:param punctuation_to_explode_by: punctuation marks to replace with spaces
"""
print("corpus_txt", corpus_txt)
logger.info(f"{corpus_txt=}")
if os.path.exists(corpus_txt):
write_mode = 'a' # append if already exists
else:
write_mode = 'w' # make a new file if not
with open(corpus_txt, write_mode) as corpus_txt_file:
if os.path.exists(additional_corpus):
print(f"Extracting corpus examples from: {additional_corpus}")
logger.info(f"Extracting corpus examples from: {additional_corpus}")
with open(additional_corpus, "r", encoding="utf-8", ) as file_:
for line in file_.readlines():
# clean the text along the way
Expand All @@ -222,7 +223,7 @@ def extract_additional_corpora(additional_corpus: str = '',
line = line + '\n'
corpus_txt_file.writelines(line)
else:
print(f"Provided additional text additional_corpus file path invalid: "
logger.warning(f"Provided additional text additional_corpus file path invalid: "
f"{additional_corpus}")


Expand Down Expand Up @@ -280,7 +281,7 @@ def main() -> None:
dirty_json_data: List[Dict[str, str]] = load_json_file(arguments.infile)
outfile = arguments.outfile if arguments.outfile else sys.stdout

print(f"Filtering dirty json data {arguments.infile}...")
logger.info(f"Filtering dirty json data {arguments.infile}...")

filtered_data = clean_json_data(json_data=dirty_json_data,
remove_english=arguments.remove_english,
Expand All @@ -291,7 +292,7 @@ def main() -> None:
write_data_to_json_file(data=list(filtered_data),
file_name=outfile)

print(f"Finished! Wrote {str(len(filtered_data))} transcriptions.")
logger.info(f"Finished! Wrote {str(len(filtered_data))} transcriptions.")


if __name__ == "__main__":
Expand Down
23 changes: 12 additions & 11 deletions elpis/engines/common/input/elan_to_json.py
Original file line number Diff line number Diff line change
Expand Up @@ -13,6 +13,7 @@

import argparse
import glob
from loguru import logger
import os
import sys
from typing import List, Dict, Tuple, Optional
Expand Down Expand Up @@ -67,15 +68,15 @@ def process_eaf(input_elan_file: str = '',
:return: a list of dictionaries, where each dictionary is an annotation
"""

print(f"processing eaf {input_elan_file} using {tier_order} {tier_type} {tier_name}")
logger.info(f"processing eaf {input_elan_file} using {tier_order} {tier_type} {tier_name}")

# Get paths to files
input_directory, full_file_name = os.path.split(input_elan_file)
file_name, extension = os.path.splitext(full_file_name)

# Look for wav file matching the eaf file in same directory
if os.path.isfile(os.path.join(input_directory, file_name + ".wav")):
print("WAV file found for " + file_name, file=sys.stderr)
logger.info(f"WAV file found for {file_name}")
else:
raise ValueError(f"WAV file not found for {full_file_name}. "
f"Please put it next to the eaf file in {input_directory}.")
Expand All @@ -101,38 +102,38 @@ def process_eaf(input_elan_file: str = '',
# tier_order is 1-index but List indexing is 0-index
try:
tier_name = tier_names[tier_order - 1]
print(f"using tier order {tier_order} to get tier name {tier_name}")
logger.info(f"using tier order {tier_order} to get tier name {tier_name}")
except IndexError:
print("couldn't find a tier")
logger.warning("couldn't find a tier")
pass
else:
# else use tier type to get a tier name
if tier_type in tier_types:
print(f"found tier type {tier_type}")
logger.info(f"found tier type {tier_type}")
tier_names = input_eaf.get_tier_ids_for_linguistic_type(tier_type)
tier_name = tier_names[0]
if tier_name:
print(f"found tier name {tier_name}")
logger.info(f"found tier name {tier_name}")
else:
print("tier type not found in this file")
logger.warning("tier type not found in this file")

if tier_name in tier_names:
print(f"using tier name {tier_name}")
logger.info(f"using tier name {tier_name}")
annotations = input_eaf.get_annotation_data_for_tier(tier_name)

if annotations:
print(f"annotations {annotations}")
logger.info(f"annotations {annotations}")
annotations = sorted(annotations)
parameters: Dict[str, str] = input_eaf.get_parameters_for_tier(tier_name)
print(f"parameters {parameters}")
logger.info(f"parameters {parameters}")
speaker_id: str = parameters.get("PARTICIPANT", "")

for annotation in annotations:
start: str = annotation[0]
end: str = annotation[1]
annotation_text: str = annotation[2]

print(f"annotation {annotation} {start} {end}")
logger.info(f"annotation {annotation} {start} {end}")
obj = {
"audio_file_name": f"{file_name}.wav",
"transcript": annotation_text,
Expand Down
5 changes: 3 additions & 2 deletions elpis/engines/common/input/make_prn_dict.py
Original file line number Diff line number Diff line change
Expand Up @@ -9,6 +9,7 @@
"""

import argparse
from loguru import logger
import sys
from typing import List, Tuple, Set, TextIO

Expand Down Expand Up @@ -105,9 +106,9 @@ def generate_pronunciation_dictionary(word_list: str,
missing_characters=missing_characters)

for character in missing_characters:
print(f"Unexpected character: {character}", file=sys.stderr)
logger.warning(f"Unexpected character: {character}")

print(f"Wrote lexicon to {pronunciation_dictionary}", file=sys.stderr)
logger.info(f"Wrote lexicon to {pronunciation_dictionary}")


def main():
Expand Down
15 changes: 8 additions & 7 deletions elpis/engines/common/input/make_wordlist.py
Original file line number Diff line number Diff line change
Expand Up @@ -12,6 +12,7 @@
"""

import argparse
from loguru import logger
import os
import sys
from typing import List, Dict
Expand All @@ -27,7 +28,7 @@ def save_word_list(word_list: List[str], file_name: str) -> None:
with open(file_name, "w", encoding='utf-8') as f:
for word in word_list:
f.write(word + "\n",)
print(f"Wrote word list to {file_name}")
logger.info(f"Wrote word list to {file_name}")


def extract_word_list(json_data: List[Dict[str, str]]) -> List[str]:
Expand Down Expand Up @@ -55,12 +56,12 @@ def extract_additional_words(file_name: str) -> List[str]:
words = []
if os.path.exists(file_name):
with open(file_name, "r") as f:
print(f"Extracting additional words from {file_name}")
logger.info(f"Extracting additional words from {file_name}")
for line in f.readlines():
new_words = line.strip().split(" ")
words += [word for word in new_words]
else:
print(f"WARNING: Additional word list file at {file_name} does not exist, skipping!")
logger.warning(f"Additional word list file at {file_name} does not exist, skipping!")
return words


Expand All @@ -80,7 +81,7 @@ def generate_word_list(transcription_file: str,
"""
json_data: List[Dict[str, str]] = load_json_file(transcription_file)

print("Extracting word list(s)...", flush=True, file=sys.stderr)
logger.info("Extracting word list(s)...")

# Retrieve ELAN word data
word_list = extract_word_list(json_data)
Expand All @@ -97,9 +98,9 @@ def generate_word_list(transcription_file: str,
# Remove duplicates
word_list = list(set(word_list))

print(sorted(word_list))
logger.debug(sorted(word_list))

print(f"Writing wordlist to file...", flush=True, file=sys.stderr)
logger.info(f"Writing wordlist to file...")
save_word_list(word_list, output_file)


Expand Down Expand Up @@ -135,7 +136,7 @@ def main():
additional_corpus_txt=arguments.additional_corpus_txt
)

print("Done.", file=sys.stderr)
logger.info("Done.")


if __name__ == '__main__':
Expand Down
3 changes: 2 additions & 1 deletion elpis/engines/common/input/split_on_silence.py
Original file line number Diff line number Diff line change
Expand Up @@ -8,6 +8,7 @@
Nicholas Lambourne - (The University of Queensland, 2019)
"""

from loguru import logger
from argparse import ArgumentParser
from pathlib import Path
from pydub import AudioSegment
Expand Down Expand Up @@ -51,7 +52,7 @@ def split_audio_file_on_silence(file_path: str,
audio_segment = silence + segment + silence
normalised_segment = match_target_amplitude(audio_segment, -20)
export_file_name = f"_file_{file_index}-part_{segment_index}.wav"
print(f"Exporting {export_file_name}")
logger.info(f"Exporting {export_file_name}")
normalised_segment.export(Path(output_directory, export_file_name))


Expand Down
3 changes: 2 additions & 1 deletion elpis/engines/common/input/vad.py
Original file line number Diff line number Diff line change
@@ -1,6 +1,7 @@
#!/usr/bin/python3

import librosa
from loguru import logger
import numpy
from typing import Any, Dict, List, Tuple

Expand All @@ -15,7 +16,7 @@ def get_chunks(audio_path: str, method: str, parameter: float) -> List[Tuple[flo
"""
audio_data = read_audio_path(audio_path)
threshold = find_best_threshold(audio_data, method=method, parameter=parameter)
print(f"""Top db = {audio_data["top db"]}, chosen threshold = {threshold} (method = {method})""")
logger.info(f"Top db = {audio_data['top db']}, chosen threshold = {threshold} (method = {method})")
time_voice_sections = get_voice_sections(audio_data, threshold)
return time_voice_sections

Expand Down

0 comments on commit 1f488f0

Please sign in to comment.