Skip to content

Commit

Permalink
Fix failing RIR unit test with lhotse 1.24+ (#9444)
Browse files Browse the repository at this point in the history
Signed-off-by: Jan Lasek <janek.lasek@gmail.com>
  • Loading branch information
pzelasko authored and janekl committed Jun 12, 2024
1 parent 8ce58ac commit 2379f2a
Showing 1 changed file with 117 additions and 27 deletions.
144 changes: 117 additions & 27 deletions tests/collections/common/test_lhotse_dataloading.py
Original file line number Diff line number Diff line change
Expand Up @@ -158,9 +158,10 @@ def nemo_tarred_manifest_path(nemo_manifest_path: Path) -> Tuple[str, str]:
root = nemo_manifest_path.parent / "nemo_tar"
root.mkdir(exist_ok=True)

with TarWriter(f"{root}/audios_%01d.tar", shard_size=5) as tar_writer, SequentialJsonlWriter(
root / "tarred_audio_filepaths.jsonl"
) as mft_writer:
with (
TarWriter(f"{root}/audios_%01d.tar", shard_size=5) as tar_writer,
SequentialJsonlWriter(root / "tarred_audio_filepaths.jsonl") as mft_writer,
):
for idx, d in enumerate(load_jsonl(nemo_manifest_path)):
p = d["audio_filepath"]
name = Path(p).name
Expand Down Expand Up @@ -856,7 +857,7 @@ def test_lazy_nemo_iterator_with_offset_field(tmp_path: Path):
from nemo.collections.common.data.lhotse.nemo_adapters import LazyNeMoIterator

# Have to generate as INT16 to avoid quantization error after saving to 16-bit WAV
INT16MAX = 2 ** 15
INT16MAX = 2**15
expected_audio = np.random.randint(low=-INT16MAX - 1, high=INT16MAX, size=(16000,)).astype(np.float32) / INT16MAX
audio_path = str(tmp_path / "dummy.wav")
sf.write(audio_path, expected_audio, 16000)
Expand Down Expand Up @@ -904,7 +905,7 @@ def test_lazy_nemo_iterator_with_relative_paths(tmp_path: Path):
from nemo.collections.common.data.lhotse.nemo_adapters import LazyNeMoIterator

# Have to generate as INT16 to avoid quantization error after saving to 16-bit WAV
INT16MAX = 2 ** 15
INT16MAX = 2**15
expected_audio = np.random.randint(low=-INT16MAX - 1, high=INT16MAX, size=(16000,)).astype(np.float32) / INT16MAX
audio_path = str(tmp_path / "dummy.wav")
sf.write(audio_path, expected_audio, 16000)
Expand Down Expand Up @@ -950,7 +951,13 @@ def test_lhotse_cuts_resolve_relative_paths(tmp_path: Path):
CutSet([cut]).to_file(cuts_path)

config = OmegaConf.create(
{"cuts_path": cuts_path, "sample_rate": 16000, "use_lhotse": True, "num_workers": 0, "batch_size": 2,}
{
"cuts_path": cuts_path,
"sample_rate": 16000,
"use_lhotse": True,
"num_workers": 0,
"batch_size": 2,
}
)

dl = get_lhotse_dataloader_from_config(config=config, global_rank=0, world_size=1, dataset=_Identity())
Expand Down Expand Up @@ -981,13 +988,21 @@ def test_extended_data_input_cfg(cutset_shar_path, nemo_tarred_manifest_path_mul
"manifest_filepath": nemo_tarred_manifest_path_multi[0],
"tarred_audio_filepaths": nemo_tarred_manifest_path_multi[1],
"weight": 0.5,
"tags": {"language": "en", "modality": "audio", "dataset_name": "D1",},
"tags": {
"language": "en",
"modality": "audio",
"dataset_name": "D1",
},
},
{
"type": "lhotse_shar",
"shar_path": cutset_shar_path,
"weight": 0.5,
"tags": {"language": "en", "modality": "audio", "dataset_name": "D2",},
"tags": {
"language": "en",
"modality": "audio",
"dataset_name": "D2",
},
},
],
"sample_rate": 16000,
Expand Down Expand Up @@ -1031,17 +1046,27 @@ def test_extended_data_input_cfg_subgroup(cutset_shar_path, nemo_tarred_manifest
"manifest_filepath": nemo_tarred_manifest_path_multi[0],
"tarred_audio_filepaths": nemo_tarred_manifest_path_multi[1],
"weight": 0.5,
"tags": {"language": "en", "modality": "audio", "dataset_name": "D1",},
"tags": {
"language": "en",
"modality": "audio",
"dataset_name": "D1",
},
},
{
"type": "lhotse_shar",
"shar_path": cutset_shar_path,
"weight": 0.5,
"tags": {"language": "en", "modality": "audio", "dataset_name": "D2",},
"tags": {
"language": "en",
"modality": "audio",
"dataset_name": "D2",
},
},
],
"weight": 0.2,
"tags": {"group_name": "G1",},
"tags": {
"group_name": "G1",
},
},
{
"type": "group",
Expand All @@ -1052,16 +1077,26 @@ def test_extended_data_input_cfg_subgroup(cutset_shar_path, nemo_tarred_manifest
"manifest_filepath": nemo_tarred_manifest_path_multi[0],
"tarred_audio_filepaths": nemo_tarred_manifest_path_multi[1],
"weight": 0.5,
"tags": {"language": "en", "modality": "audio", "dataset_name": "D3",},
"tags": {
"language": "en",
"modality": "audio",
"dataset_name": "D3",
},
},
{
"type": "lhotse_shar",
"shar_path": cutset_shar_path,
"weight": 0.5,
"tags": {"language": "en", "modality": "audio", "dataset_name": "D4",},
"tags": {
"language": "en",
"modality": "audio",
"dataset_name": "D4",
},
},
],
"tags": {"group_name": "G2",},
"tags": {
"group_name": "G2",
},
},
],
"sample_rate": 16000,
Expand Down Expand Up @@ -1107,13 +1142,21 @@ def test_extended_data_input_cfg_yaml_path(tmp_path, cutset_shar_path, nemo_tarr
"manifest_filepath": str(nemo_tarred_manifest_path_multi[0]),
"tarred_audio_filepaths": str(nemo_tarred_manifest_path_multi[1]),
"weight": 0.5,
"tags": {"language": "en", "modality": "audio", "dataset_name": "D1",},
"tags": {
"language": "en",
"modality": "audio",
"dataset_name": "D1",
},
},
{
"type": "lhotse_shar",
"shar_path": str(cutset_shar_path),
"weight": 0.5,
"tags": {"language": "en", "modality": "audio", "dataset_name": "D2",},
"tags": {
"language": "en",
"modality": "audio",
"dataset_name": "D2",
},
},
]

Expand Down Expand Up @@ -1166,7 +1209,13 @@ def txt_es_path(tmp_path_factory):
def test_text_file_input(txt_en_path, txt_es_path):
config = OmegaConf.create(
{
"input_cfg": [{"type": "txt", "paths": txt_en_path, "language": "en",},],
"input_cfg": [
{
"type": "txt",
"paths": txt_en_path,
"language": "en",
},
],
"shuffle": True,
"num_workers": 0,
"batch_size": 4,
Expand Down Expand Up @@ -1312,13 +1361,17 @@ def test_multimodal_text_audio_dataloading(
"target_paths": es_paths,
"source_language": "en",
"target_language": "es",
"tags": {"modality": "text",},
"tags": {
"modality": "text",
},
},
{
"type": "nemo_tarred",
"manifest_filepath": manifest_filepath,
"tarred_audio_filepaths": tarred_audio_filepaths,
"tags": {"modality": "audio",},
"tags": {
"modality": "audio",
},
},
],
"shuffle": True,
Expand All @@ -1339,7 +1392,11 @@ def test_multimodal_text_audio_dataloading(
)

dl = get_lhotse_dataloader_from_config(
config=config, global_rank=0, world_size=1, dataset=Identity(), tokenizer=en_es_tokenizer,
config=config,
global_rank=0,
world_size=1,
dataset=Identity(),
tokenizer=en_es_tokenizer,
)

# Note: we use islice here because the dataloader will be infinite.
Expand Down Expand Up @@ -1402,7 +1459,12 @@ def test_dataloader_with_noise_nemo_json(cutset_path: Path, nemo_manifest_path:
"shard_seed": 0,
}
)
dl = get_lhotse_dataloader_from_config(config=config, global_rank=0, world_size=1, dataset=Identity(),)
dl = get_lhotse_dataloader_from_config(
config=config,
global_rank=0,
world_size=1,
dataset=Identity(),
)
batch = next(iter(dl))
assert isinstance(batch, CutSet)
assert len(batch) == 2
Expand All @@ -1426,7 +1488,12 @@ def test_dataloader_with_noise_lhotse_jsonl(cutset_path: Path):
"shard_seed": 0,
}
)
dl = get_lhotse_dataloader_from_config(config=config, global_rank=0, world_size=1, dataset=Identity(),)
dl = get_lhotse_dataloader_from_config(
config=config,
global_rank=0,
world_size=1,
dataset=Identity(),
)
batch = next(iter(dl))
assert isinstance(batch, CutSet)
assert len(batch) == 2
Expand All @@ -1443,15 +1510,23 @@ def test_dataloader_with_noise_nemo_tar(cutset_path: Path, nemo_tarred_manifest_
config = OmegaConf.create(
{
"cuts_path": str(cutset_path),
"noise_path": {"manifest_filepath": noise_json, "tarred_audio_filepaths": noise_tar,},
"noise_path": {
"manifest_filepath": noise_json,
"tarred_audio_filepaths": noise_tar,
},
"noise_mix_prob": 1.0,
"noise_snr": [-5.0, 5.0],
"batch_size": 2,
"seed": 0,
"shard_seed": 0,
}
)
dl = get_lhotse_dataloader_from_config(config=config, global_rank=0, world_size=1, dataset=Identity(),)
dl = get_lhotse_dataloader_from_config(
config=config,
global_rank=0,
world_size=1,
dataset=Identity(),
)
batch = next(iter(dl))
assert isinstance(batch, CutSet)
assert len(batch) == 2
Expand All @@ -1464,6 +1539,8 @@ def test_dataloader_with_noise_nemo_tar(cutset_path: Path, nemo_tarred_manifest_


def test_dataloader_with_synth_rir(cutset_path: Path):
from lhotse.augmentation import ReverbWithImpulseResponse

config = OmegaConf.create(
{
"cuts_path": str(cutset_path),
Expand All @@ -1474,7 +1551,12 @@ def test_dataloader_with_synth_rir(cutset_path: Path):
"shard_seed": 0,
}
)
dl = get_lhotse_dataloader_from_config(config=config, global_rank=0, world_size=1, dataset=Identity(),)
dl = get_lhotse_dataloader_from_config(
config=config,
global_rank=0,
world_size=1,
dataset=Identity(),
)
batch = next(iter(dl))
assert isinstance(batch, CutSet)
assert len(batch) == 4
Expand All @@ -1487,8 +1569,16 @@ def test_dataloader_with_synth_rir(cutset_path: Path):
cut = batch[2]
assert isinstance(cut, MonoCut)
assert isinstance(cut.recording.transforms, list) and len(cut.recording.transforms) == 1
assert cut.recording.transforms[0]["name"] == "ReverbWithImpulseResponse"
tfnm = cut.recording.transforms[0]
if isinstance(tfnm, dict): # lhotse<=1.23.0
assert tfnm["name"] == "ReverbWithImpulseResponse"
else: # lhotse>=1.24.0
assert isinstance(tfnm, ReverbWithImpulseResponse)
cut = batch[3]
assert isinstance(cut, MonoCut)
assert isinstance(cut.recording.transforms, list) and len(cut.recording.transforms) == 1
assert cut.recording.transforms[0]["name"] == "ReverbWithImpulseResponse"
tfnm = cut.recording.transforms[0]
if isinstance(tfnm, dict): # lhotse<=1.23.0
assert tfnm["name"] == "ReverbWithImpulseResponse"
else: # lhotse>=1.24.0
assert isinstance(tfnm, ReverbWithImpulseResponse)

0 comments on commit 2379f2a

Please sign in to comment.