Fix failing RIR unit test with lhotse 1.24+ (#9444)

Signed-off-by: Jan Lasek <janek.lasek@gmail.com>
NVIDIA · Jun 12, 2024 · 2379f2a · 2379f2a
1 parent 8ce58ac
commit 2379f2a
Showing 1 changed file with 117 additions and 27 deletions.
diff --git a/tests/collections/common/test_lhotse_dataloading.py b/tests/collections/common/test_lhotse_dataloading.py
@@ -158,9 +158,10 @@ def nemo_tarred_manifest_path(nemo_manifest_path: Path) -> Tuple[str, str]:
     root = nemo_manifest_path.parent / "nemo_tar"
     root.mkdir(exist_ok=True)
 
-    with TarWriter(f"{root}/audios_%01d.tar", shard_size=5) as tar_writer, SequentialJsonlWriter(
-        root / "tarred_audio_filepaths.jsonl"
-    ) as mft_writer:
+    with (
+        TarWriter(f"{root}/audios_%01d.tar", shard_size=5) as tar_writer,
+        SequentialJsonlWriter(root / "tarred_audio_filepaths.jsonl") as mft_writer,
+    ):
         for idx, d in enumerate(load_jsonl(nemo_manifest_path)):
             p = d["audio_filepath"]
             name = Path(p).name
@@ -856,7 +857,7 @@ def test_lazy_nemo_iterator_with_offset_field(tmp_path: Path):
     from nemo.collections.common.data.lhotse.nemo_adapters import LazyNeMoIterator
 
     # Have to generate as INT16 to avoid quantization error after saving to 16-bit WAV
-    INT16MAX = 2 ** 15
+    INT16MAX = 2**15
     expected_audio = np.random.randint(low=-INT16MAX - 1, high=INT16MAX, size=(16000,)).astype(np.float32) / INT16MAX
     audio_path = str(tmp_path / "dummy.wav")
     sf.write(audio_path, expected_audio, 16000)
@@ -904,7 +905,7 @@ def test_lazy_nemo_iterator_with_relative_paths(tmp_path: Path):
     from nemo.collections.common.data.lhotse.nemo_adapters import LazyNeMoIterator
 
     # Have to generate as INT16 to avoid quantization error after saving to 16-bit WAV
-    INT16MAX = 2 ** 15
+    INT16MAX = 2**15
     expected_audio = np.random.randint(low=-INT16MAX - 1, high=INT16MAX, size=(16000,)).astype(np.float32) / INT16MAX
     audio_path = str(tmp_path / "dummy.wav")
     sf.write(audio_path, expected_audio, 16000)
@@ -950,7 +951,13 @@ def test_lhotse_cuts_resolve_relative_paths(tmp_path: Path):
     CutSet([cut]).to_file(cuts_path)
 
     config = OmegaConf.create(
-        {"cuts_path": cuts_path, "sample_rate": 16000, "use_lhotse": True, "num_workers": 0, "batch_size": 2,}
+        {
+            "cuts_path": cuts_path,
+            "sample_rate": 16000,
+            "use_lhotse": True,
+            "num_workers": 0,
+            "batch_size": 2,
+        }
     )
 
     dl = get_lhotse_dataloader_from_config(config=config, global_rank=0, world_size=1, dataset=_Identity())
@@ -981,13 +988,21 @@ def test_extended_data_input_cfg(cutset_shar_path, nemo_tarred_manifest_path_mul
                     "manifest_filepath": nemo_tarred_manifest_path_multi[0],
                     "tarred_audio_filepaths": nemo_tarred_manifest_path_multi[1],
                     "weight": 0.5,
-                    "tags": {"language": "en", "modality": "audio", "dataset_name": "D1",},
+                    "tags": {
+                        "language": "en",
+                        "modality": "audio",
+                        "dataset_name": "D1",
+                    },
                 },
                 {
                     "type": "lhotse_shar",
                     "shar_path": cutset_shar_path,
                     "weight": 0.5,
-                    "tags": {"language": "en", "modality": "audio", "dataset_name": "D2",},
+                    "tags": {
+                        "language": "en",
+                        "modality": "audio",
+                        "dataset_name": "D2",
+                    },
                 },
             ],
             "sample_rate": 16000,
@@ -1031,17 +1046,27 @@ def test_extended_data_input_cfg_subgroup(cutset_shar_path, nemo_tarred_manifest
                             "manifest_filepath": nemo_tarred_manifest_path_multi[0],
                             "tarred_audio_filepaths": nemo_tarred_manifest_path_multi[1],
                             "weight": 0.5,
-                            "tags": {"language": "en", "modality": "audio", "dataset_name": "D1",},
+                            "tags": {
+                                "language": "en",
+                                "modality": "audio",
+                                "dataset_name": "D1",
+                            },
                         },
                         {
                             "type": "lhotse_shar",
                             "shar_path": cutset_shar_path,
                             "weight": 0.5,
-                            "tags": {"language": "en", "modality": "audio", "dataset_name": "D2",},
+                            "tags": {
+                                "language": "en",
+                                "modality": "audio",
+                                "dataset_name": "D2",
+                            },
                         },
                     ],
                     "weight": 0.2,
-                    "tags": {"group_name": "G1",},
+                    "tags": {
+                        "group_name": "G1",
+                    },
                 },
                 {
                     "type": "group",
@@ -1052,16 +1077,26 @@ def test_extended_data_input_cfg_subgroup(cutset_shar_path, nemo_tarred_manifest
                             "manifest_filepath": nemo_tarred_manifest_path_multi[0],
                             "tarred_audio_filepaths": nemo_tarred_manifest_path_multi[1],
                             "weight": 0.5,
-                            "tags": {"language": "en", "modality": "audio", "dataset_name": "D3",},
+                            "tags": {
+                                "language": "en",
+                                "modality": "audio",
+                                "dataset_name": "D3",
+                            },
                         },
                         {
                             "type": "lhotse_shar",
                             "shar_path": cutset_shar_path,
                             "weight": 0.5,
-                            "tags": {"language": "en", "modality": "audio", "dataset_name": "D4",},
+                            "tags": {
+                                "language": "en",
+                                "modality": "audio",
+                                "dataset_name": "D4",
+                            },
                         },
                     ],
-                    "tags": {"group_name": "G2",},
+                    "tags": {
+                        "group_name": "G2",
+                    },
                 },
             ],
             "sample_rate": 16000,
@@ -1107,13 +1142,21 @@ def test_extended_data_input_cfg_yaml_path(tmp_path, cutset_shar_path, nemo_tarr
             "manifest_filepath": str(nemo_tarred_manifest_path_multi[0]),
             "tarred_audio_filepaths": str(nemo_tarred_manifest_path_multi[1]),
             "weight": 0.5,
-            "tags": {"language": "en", "modality": "audio", "dataset_name": "D1",},
+            "tags": {
+                "language": "en",
+                "modality": "audio",
+                "dataset_name": "D1",
+            },
         },
         {
             "type": "lhotse_shar",
             "shar_path": str(cutset_shar_path),
             "weight": 0.5,
-            "tags": {"language": "en", "modality": "audio", "dataset_name": "D2",},
+            "tags": {
+                "language": "en",
+                "modality": "audio",
+                "dataset_name": "D2",
+            },
         },
     ]
 
@@ -1166,7 +1209,13 @@ def txt_es_path(tmp_path_factory):
 def test_text_file_input(txt_en_path, txt_es_path):
     config = OmegaConf.create(
         {
-            "input_cfg": [{"type": "txt", "paths": txt_en_path, "language": "en",},],
+            "input_cfg": [
+                {
+                    "type": "txt",
+                    "paths": txt_en_path,
+                    "language": "en",
+                },
+            ],
             "shuffle": True,
             "num_workers": 0,
             "batch_size": 4,
@@ -1312,13 +1361,17 @@ def test_multimodal_text_audio_dataloading(
                     "target_paths": es_paths,
                     "source_language": "en",
                     "target_language": "es",
-                    "tags": {"modality": "text",},
+                    "tags": {
+                        "modality": "text",
+                    },
                 },
                 {
                     "type": "nemo_tarred",
                     "manifest_filepath": manifest_filepath,
                     "tarred_audio_filepaths": tarred_audio_filepaths,
-                    "tags": {"modality": "audio",},
+                    "tags": {
+                        "modality": "audio",
+                    },
                 },
             ],
             "shuffle": True,
@@ -1339,7 +1392,11 @@ def test_multimodal_text_audio_dataloading(
     )
 
     dl = get_lhotse_dataloader_from_config(
-        config=config, global_rank=0, world_size=1, dataset=Identity(), tokenizer=en_es_tokenizer,
+        config=config,
+        global_rank=0,
+        world_size=1,
+        dataset=Identity(),
+        tokenizer=en_es_tokenizer,
     )
 
     # Note: we use islice here because the dataloader will be infinite.
@@ -1402,7 +1459,12 @@ def test_dataloader_with_noise_nemo_json(cutset_path: Path, nemo_manifest_path:
             "shard_seed": 0,
         }
     )
-    dl = get_lhotse_dataloader_from_config(config=config, global_rank=0, world_size=1, dataset=Identity(),)
+    dl = get_lhotse_dataloader_from_config(
+        config=config,
+        global_rank=0,
+        world_size=1,
+        dataset=Identity(),
+    )
     batch = next(iter(dl))
     assert isinstance(batch, CutSet)
     assert len(batch) == 2
@@ -1426,7 +1488,12 @@ def test_dataloader_with_noise_lhotse_jsonl(cutset_path: Path):
             "shard_seed": 0,
         }
     )
-    dl = get_lhotse_dataloader_from_config(config=config, global_rank=0, world_size=1, dataset=Identity(),)
+    dl = get_lhotse_dataloader_from_config(
+        config=config,
+        global_rank=0,
+        world_size=1,
+        dataset=Identity(),
+    )
     batch = next(iter(dl))
     assert isinstance(batch, CutSet)
     assert len(batch) == 2
@@ -1443,15 +1510,23 @@ def test_dataloader_with_noise_nemo_tar(cutset_path: Path, nemo_tarred_manifest_
     config = OmegaConf.create(
         {
             "cuts_path": str(cutset_path),
-            "noise_path": {"manifest_filepath": noise_json, "tarred_audio_filepaths": noise_tar,},
+            "noise_path": {
+                "manifest_filepath": noise_json,
+                "tarred_audio_filepaths": noise_tar,
+            },
             "noise_mix_prob": 1.0,
             "noise_snr": [-5.0, 5.0],
             "batch_size": 2,
             "seed": 0,
             "shard_seed": 0,
         }
     )
-    dl = get_lhotse_dataloader_from_config(config=config, global_rank=0, world_size=1, dataset=Identity(),)
+    dl = get_lhotse_dataloader_from_config(
+        config=config,
+        global_rank=0,
+        world_size=1,
+        dataset=Identity(),
+    )
     batch = next(iter(dl))
     assert isinstance(batch, CutSet)
     assert len(batch) == 2
@@ -1464,6 +1539,8 @@ def test_dataloader_with_noise_nemo_tar(cutset_path: Path, nemo_tarred_manifest_
 
 
 def test_dataloader_with_synth_rir(cutset_path: Path):
+    from lhotse.augmentation import ReverbWithImpulseResponse
+
     config = OmegaConf.create(
         {
             "cuts_path": str(cutset_path),
@@ -1474,7 +1551,12 @@ def test_dataloader_with_synth_rir(cutset_path: Path):
             "shard_seed": 0,
         }
     )
-    dl = get_lhotse_dataloader_from_config(config=config, global_rank=0, world_size=1, dataset=Identity(),)
+    dl = get_lhotse_dataloader_from_config(
+        config=config,
+        global_rank=0,
+        world_size=1,
+        dataset=Identity(),
+    )
     batch = next(iter(dl))
     assert isinstance(batch, CutSet)
     assert len(batch) == 4
@@ -1487,8 +1569,16 @@ def test_dataloader_with_synth_rir(cutset_path: Path):
     cut = batch[2]
     assert isinstance(cut, MonoCut)
     assert isinstance(cut.recording.transforms, list) and len(cut.recording.transforms) == 1
-    assert cut.recording.transforms[0]["name"] == "ReverbWithImpulseResponse"
+    tfnm = cut.recording.transforms[0]
+    if isinstance(tfnm, dict):  # lhotse<=1.23.0
+        assert tfnm["name"] == "ReverbWithImpulseResponse"
+    else:  # lhotse>=1.24.0
+        assert isinstance(tfnm, ReverbWithImpulseResponse)
     cut = batch[3]
     assert isinstance(cut, MonoCut)
     assert isinstance(cut.recording.transforms, list) and len(cut.recording.transforms) == 1
-    assert cut.recording.transforms[0]["name"] == "ReverbWithImpulseResponse"
+    tfnm = cut.recording.transforms[0]
+    if isinstance(tfnm, dict):  # lhotse<=1.23.0
+        assert tfnm["name"] == "ReverbWithImpulseResponse"
+    else:  # lhotse>=1.24.0
+        assert isinstance(tfnm, ReverbWithImpulseResponse)