Skip to content

Commit

Permalink
Standardise logging headers (#278)
Browse files Browse the repository at this point in the history
  • Loading branch information
mattchrlw committed Dec 17, 2021
1 parent 1f488f0 commit 970bf02
Show file tree
Hide file tree
Showing 5 changed files with 21 additions and 21 deletions.
2 changes: 1 addition & 1 deletion elpis/endpoints/pron_dict.py
Original file line number Diff line number Diff line change
Expand Up @@ -19,7 +19,7 @@ def new():
"status": 500,
"error": e.human_message
})
logger.info(f"****{request.json['name']}****")
logger.info(f"==== {request.json['name']} ====")
dataset = interface.get_dataset(request.json['dataset_name'])
pron_dict.link(dataset)
app.config['CURRENT_PRON_DICT'] = pron_dict
Expand Down
2 changes: 1 addition & 1 deletion elpis/engines/common/output/ctm_to_elan.py
Original file line number Diff line number Diff line change
Expand Up @@ -131,7 +131,7 @@ def main() -> None:
wav_dictionary = wav_scp_to_dictionary(arguments.wav)
output_directory = Path(arguments.outdir)

logger.info("==== CTM to Elan args")
logger.info("==== CTM to Elan args ====")
logger.info(f"{segments_dictionary=}")
logger.info(f"{ctm_dictionary=}")
logger.info(f"{wav_dictionary=}")
Expand Down
20 changes: 10 additions & 10 deletions elpis/engines/hft/objects/model.py
Original file line number Diff line number Diff line change
Expand Up @@ -398,7 +398,7 @@ def get_model(self):
ctc_zero_infinity=True)

def preprocess_dataset(self):
logger.info('=== Preprocessing Dataset')
logger.info('==== Preprocessing Dataset ====')
speech = self.prepare_speech()

def speech_file_to_array_fn(batch):
Expand All @@ -418,11 +418,11 @@ def speech_file_to_array_fn(batch):
remove_columns=self.hft_dataset['train'].column_names,
num_proc=self.data_args.preprocessing_num_workers,
)
logger.info('=== hft_dataset')
logger.info('==== hft_dataset ====')
logger.info(self.hft_dataset)

def prepare_dataset(self):
logger.info('=== Preparing Dataset')
logger.info('==== Preparing Dataset ====')
def prepare_dataset(batch):
assert (
len(set(batch['sampling_rate'])) == 1
Expand All @@ -441,7 +441,7 @@ def prepare_dataset(batch):
)

def prepare_speech(self):
logger.info('=== Preparing Speech')
logger.info('==== Preparing Speech ====')
speech = {}
audio_paths = set()
rejected_count = 0
Expand Down Expand Up @@ -543,9 +543,9 @@ def set_args(self, model_args, data_args, training_args):
self.model_args = model_args
self.data_args = data_args
self.training_args = training_args
logger.info(f'\n\n=== Model args\n {model_args}')
logger.info(f'\n\n=== Data args\n {data_args}')
logger.info(f'\n\n=== Training args\n {training_args}')
logger.info(f'\n\n==== Model args ====\n {model_args}')
logger.info(f'\n\n==== Data args ====\n {data_args}')
logger.info(f'\n\n==== Training args ====\n {training_args}')

def train(self, on_complete:Callable=None):
self.tb_writer = SummaryWriter(self.path / 'runs')
Expand All @@ -558,7 +558,7 @@ def train(self, on_complete:Callable=None):
set_seed(self.training_args.seed)

# 1. Tokenization
logger.info('=== Tokenizing')
logger.info('==== Tokenizing ====')
self._set_finished_training(False)
self._set_stage(TOKENIZATION)

Expand Down Expand Up @@ -632,12 +632,12 @@ def train(self, on_complete:Callable=None):
# 4. Evaluation
self._set_stage(EVALUATION)
if self.training_args.do_eval:
logger.info('=== Evaluate')
logger.info('==== Evaluate ====')
metrics = trainer.evaluate()
metrics['eval_samples'] = len(self.hft_dataset['dev'])
trainer.log_metrics('eval', metrics)
trainer.save_metrics('eval', metrics)
logger.info('=== Metrics')
logger.info('==== Metrics ====')
logger.info(metrics)
self.config['results'] = metrics

Expand Down
14 changes: 7 additions & 7 deletions elpis/engines/hft/objects/transcription.py
Original file line number Diff line number Diff line change
Expand Up @@ -56,19 +56,19 @@ def __init__(self, **kwargs) -> None:
self.build_stage_status(stage_names)

def transcribe(self, on_complete: callable = None) -> None:
logger.info('=== Load processor and model')
logger.info('==== Load processor and model ====')
self._set_finished_transcription(False)
processor, model = self._get_wav2vec2_requirements()

# Load audio
self._set_stage(LOAD_AUDIO)
logger.info('=== Load audio')
logger.info('==== Load audio ====')
audio_input, sample_rate = self._load_audio(self.audio_file_path)
self._set_stage(LOAD_AUDIO, complete=True)

# Pad input values and return pt tensor
self._set_stage(PROCESS_INPUT)
logger.info('=== Process input')
logger.info('==== Process input ====')
input_values = processor(
audio_input, sampling_rate=HFTTranscription.SAMPLING_RATE, return_tensors='pt').input_values
self._set_stage(PROCESS_INPUT, msg='Processed input values')
Expand All @@ -86,15 +86,15 @@ def transcribe(self, on_complete: callable = None) -> None:
self._set_stage(TRANSCRIPTION, complete=True)

self._set_stage(SAVING)
logger.info('=== Save transcription')
logger.info('==== Save transcription ====')
self._save_transcription(transcription)

self._set_stage(SAVING, msg='Saved transcription, generating utterances')
# Utterances to be used creating elan files
logger.info('=== Generate utterances')
logger.info('==== Generate utterances ====')
utterances = self._generate_utterances(
processor, predicted_ids, input_values, transcription)
logger.info('=== Save utterances (elan and text)')
logger.info('==== Save utterances (elan and text) ====')
self._save_utterances(utterances)

self._set_stage(SAVING, complete=True)
Expand Down Expand Up @@ -210,7 +210,7 @@ def _load_audio(self, file: Path) -> Tuple:
return audio, sample_rate

def prepare_audio(self, audio: Path, on_complete: callable = None):
logger.info(f'=== Prepare audio {audio} {self.audio_file_path}')
logger.info(f'==== Prepare audio {audio} {self.audio_file_path} ====')
self._resample_audio_file(audio, self.audio_file_path)
if on_complete is not None:
on_complete()
Expand Down
4 changes: 2 additions & 2 deletions elpis/transformer/elan.py
Original file line number Diff line number Diff line change
Expand Up @@ -111,10 +111,10 @@ def update_ui(file_paths: List[Path], ui):
_tier_names: Set[str] = set(ui['data']['tier_name']['options'])
tier_max_count = 0

logger.info('**** ui data')
logger.info('==== ui data ====')
logger.info(ui['data'])

logger.info('**** _tier_types')
logger.info('==== _tier_types ====')
logger.info(_tier_types)

eaf_paths = [p for p in file_paths if f'{p}'.endswith('.eaf')]
Expand Down

0 comments on commit 970bf02

Please sign in to comment.