Ben transcribe audioname (#311)

Harry already approved it, I just closed without merging doh.
CoEDL · Sep 21, 2022 · a5df43c · a5df43c
1 parent 72325c9
commit a5df43c
Show file tree

Hide file tree

Showing 4 changed files with 11 additions and 8 deletions.
diff --git a/docs/wiki/elpis-workshop.md b/docs/wiki/elpis-workshop.md
@@ -286,8 +286,7 @@ The transcription text may be shown in a range of tones from black to light grey
 ![](assets/latest/145-transcribed-confidence.png)
 
 
-Listen in Elan.
-> If you are using your own audio, rename the audio to `audio.wav`.
+Listen in Elan. You will need to move the audio file into the same location as the Elan file for Elan to link to it.
 
 ![](assets/latest/150-elan.png)
 

diff --git a/elpis/engines/common/utilities/resampling.py b/elpis/engines/common/utilities/resampling.py
@@ -8,7 +8,7 @@
 from werkzeug.datastructures import FileStorage
 
 
-ORIGINAL_SOUND_FILE_DIRECTORY = Path("/tmp/origial_sound_files/")
+ORIGINAL_SOUND_FILE_DIRECTORY = Path("/tmp/original_sound_files/")
 
 
 def load_audio(file: Path, target_sample_rate: int = None) -> Tuple[np.ndarray, int]:

diff --git a/elpis/engines/hft/objects/transcription.py b/elpis/engines/hft/objects/transcription.py
@@ -38,7 +38,8 @@ class HFTTranscription(BaseTranscription):
     def __init__(self, **kwargs) -> None:
         super().__init__(**kwargs)
         # Setup paths
-        self.audio_file_path = self.path.joinpath("audio.wav")
+        self.audio_filename = None
+        self.audio_file_path = None
         self.test_labels_path = self.path / "test-labels-path.txt"
         self.text_path = self.path / "one-best-hypothesis.txt"
         self.xml_path = self.path / "transcription.xml"
@@ -195,7 +196,7 @@ def _save_utterances(self, utterances) -> None:
         """
         result = pympi.Elan.Eaf(author="elpis")
 
-        result.add_linked_file("audio.wav")
+        result.add_linked_file(self.audio_filename)
         result.add_tier("default")
 
         to_millis = lambda seconds: int(seconds * 1000)
@@ -205,8 +206,11 @@ def _save_utterances(self, utterances) -> None:
 
         pympi.Elan.to_eaf(self.elan_path, result)
 
-    def prepare_audio(self, audio: Path, on_complete: callable = None):
-        logger.info(f"=== Prepare audio {audio} {self.audio_file_path}")
+    def prepare_audio(self, audio: FileStorage, on_complete: callable = None):
+        logger.info(f"=== Prepare audio for transcription {audio}")
+        self.audio_filename = audio.filename
+        self.audio_file_path = self.path.joinpath(self.audio_filename)
+
         resampler.resample_from_file_storage(audio, self.audio_file_path, HFTModel.SAMPLING_RATE)
         if on_complete is not None:
             on_complete()

diff --git a/elpis/test/test_pipeline.py b/elpis/test/test_pipeline.py
@@ -92,6 +92,6 @@ def pipeline_upto_step_4(pipeline_upto_step_3):
     # Make a transcription interface and transcribe unseen audio to elan.
     t = kaldi.new_transcription("transcription_w")
     t.link(m)
-    t.transcribe_algin("/recordings/untranscribed/audio.wav")
+    t.transcribe_align("/recordings/untranscribed/audio.wav")
 
     return (kaldi, ds, pd, m, t)