Skip to content

Commit

Permalink
Ben transcribe audioname (#311)
Browse files Browse the repository at this point in the history
Harry already approved it, I just closed without merging doh.
  • Loading branch information
benfoley committed Sep 21, 2022
1 parent 72325c9 commit a5df43c
Show file tree
Hide file tree
Showing 4 changed files with 11 additions and 8 deletions.
3 changes: 1 addition & 2 deletions docs/wiki/elpis-workshop.md
Original file line number Diff line number Diff line change
Expand Up @@ -286,8 +286,7 @@ The transcription text may be shown in a range of tones from black to light grey
![](assets/latest/145-transcribed-confidence.png)


Listen in Elan.
> If you are using your own audio, rename the audio to `audio.wav`.
Listen in Elan. You will need to move the audio file into the same location as the Elan file for Elan to link to it.

![](assets/latest/150-elan.png)

Expand Down
2 changes: 1 addition & 1 deletion elpis/engines/common/utilities/resampling.py
Original file line number Diff line number Diff line change
Expand Up @@ -8,7 +8,7 @@
from werkzeug.datastructures import FileStorage


ORIGINAL_SOUND_FILE_DIRECTORY = Path("/tmp/origial_sound_files/")
ORIGINAL_SOUND_FILE_DIRECTORY = Path("/tmp/original_sound_files/")


def load_audio(file: Path, target_sample_rate: int = None) -> Tuple[np.ndarray, int]:
Expand Down
12 changes: 8 additions & 4 deletions elpis/engines/hft/objects/transcription.py
Original file line number Diff line number Diff line change
Expand Up @@ -38,7 +38,8 @@ class HFTTranscription(BaseTranscription):
def __init__(self, **kwargs) -> None:
super().__init__(**kwargs)
# Setup paths
self.audio_file_path = self.path.joinpath("audio.wav")
self.audio_filename = None
self.audio_file_path = None
self.test_labels_path = self.path / "test-labels-path.txt"
self.text_path = self.path / "one-best-hypothesis.txt"
self.xml_path = self.path / "transcription.xml"
Expand Down Expand Up @@ -195,7 +196,7 @@ def _save_utterances(self, utterances) -> None:
"""
result = pympi.Elan.Eaf(author="elpis")

result.add_linked_file("audio.wav")
result.add_linked_file(self.audio_filename)
result.add_tier("default")

to_millis = lambda seconds: int(seconds * 1000)
Expand All @@ -205,8 +206,11 @@ def _save_utterances(self, utterances) -> None:

pympi.Elan.to_eaf(self.elan_path, result)

def prepare_audio(self, audio: Path, on_complete: callable = None):
logger.info(f"=== Prepare audio {audio} {self.audio_file_path}")
def prepare_audio(self, audio: FileStorage, on_complete: callable = None):
logger.info(f"=== Prepare audio for transcription {audio}")
self.audio_filename = audio.filename
self.audio_file_path = self.path.joinpath(self.audio_filename)

resampler.resample_from_file_storage(audio, self.audio_file_path, HFTModel.SAMPLING_RATE)
if on_complete is not None:
on_complete()
Expand Down
2 changes: 1 addition & 1 deletion elpis/test/test_pipeline.py
Original file line number Diff line number Diff line change
Expand Up @@ -92,6 +92,6 @@ def pipeline_upto_step_4(pipeline_upto_step_3):
# Make a transcription interface and transcribe unseen audio to elan.
t = kaldi.new_transcription("transcription_w")
t.link(m)
t.transcribe_algin("/recordings/untranscribed/audio.wav")
t.transcribe_align("/recordings/untranscribed/audio.wav")

return (kaldi, ds, pd, m, t)

0 comments on commit a5df43c

Please sign in to comment.