Skip to content

Commit

Permalink
Ronny3 issue 327 (#329)
Browse files Browse the repository at this point in the history
  • Loading branch information
benfoley committed Oct 17, 2022
1 parent da4663b commit 112a46d
Showing 1 changed file with 6 additions and 2 deletions.
8 changes: 6 additions & 2 deletions elpis/engines/hft/objects/model.py
Original file line number Diff line number Diff line change
Expand Up @@ -542,6 +542,8 @@ def prepare_dataset(batch):
def prepare_speech(self):
logger.info("==== Preparing Speech ====")
speech = {}
temp_start = []
temp_end = []
audio_paths = set()
rejected_count = 0

Expand Down Expand Up @@ -578,7 +580,7 @@ def prepare_speech(self):
logger.info(
f"Resample from {sample_rate} to {HFTModel.SAMPLING_RATE} | "
f"{os.path.basename(path).rjust(20)} | "
f"{str(start_ms/1000).rjust(15)} : {str(stop_ms/1000).ljust(15)} | "
f"{str(start_ms / 1000).rjust(15)} : {str(stop_ms / 1000).ljust(15)} | "
f"{str(start_frame).rjust(15)} : {str(end_frame).ljust(15)}"
)
resampler = torchaudio.transforms.Resample(sample_rate, HFTModel.SAMPLING_RATE)
Expand All @@ -587,6 +589,8 @@ def prepare_speech(self):
# i.e. don't use the audio file path as the key
unique_key = f"{path}{start_ms}{stop_ms}"
speech[unique_key] = speech_array.squeeze().numpy()
temp_start.append(start_ms)
temp_end.append(stop_ms)
# For debugging/ checking dataset, generate an audio file for listening
# torchaudio.save(self.tmp_audio_path.joinpath(os.path.basename(path)), speech_array, HFTModel.SAMPLING_RATE)
else:
Expand All @@ -595,7 +599,7 @@ def prepare_speech(self):

# Remove rejected speech by filtering on speech matching length the required conditions
self.hft_dataset = self.hft_dataset.filter(
lambda x: f"{path}{start_ms}{stop_ms}" in speech.keys()
lambda x: x["start_ms"] in temp_start and x["stop_ms"] in temp_end
)
logger.info(
f"{rejected_count} files removed due to number of frames, zero wav or too short"
Expand Down

0 comments on commit 112a46d

Please sign in to comment.