Cache using pickle (#148)

Signed-off-by: Jean-Francois Lafleche <jlafleche@nvidia.com>
NVIDIAGameWorks · Feb 5, 2020 · d48f4ea · d48f4ea
1 parent 9d77087
commit d48f4ea
Show file tree

Hide file tree

Showing 4 changed files with 119 additions and 23 deletions.
diff --git a/kaolin/helpers.py b/kaolin/helpers.py
@@ -177,50 +177,47 @@ def _get_hash(x):
 
 
 class Cache(object):
-    """Caches the results of the called function to disk.
+    """Caches the results of a function to disk.
     If already cached, data is returned from disk, otherwise,
-    the function called is executed.
+    the function is executed. Output tensors are always on CPU device.
 
         Args:
             transforms (Iterable): List of transforms to compose.
             cache_dir (str): Directory where objects will be cached. Default
                              to 'cache'.
     """
 
-    def __init__(self, func: Callable, cache_dir: str = 'cache', cache_key: str = ''):
+    def __init__(self, func: Callable, cache_dir: [str, Path], cache_key: str):
         self.func = func
-        self.cache_dir = Path(cache_dir) / cache_key
+        self.cache_dir = Path(cache_dir) / str(cache_key)
         self.cache_dir.mkdir(parents=True, exist_ok=True)
         self.cached_ids = [p.stem for p in self.cache_dir.glob('*')]
 
-    def __call__(self, object_id: str, **kwargs):
+    def __call__(self, unique_id: str, **kwargs):
         """Execute self.func if not cached, otherwise, read data from disk.
 
             Args:
-                object_id (str): The object id with which to name the cache file.
+                unique_id (str): The unique id with which to name the cached file.
                 **kwargs: The arguments to be passed to self.func.
 
             Returns:
                 dict of {str: torch.Tensor}: Dictionary of tensors.
         """
 
-        fpath = self.cache_dir / '{0}.npz'.format(object_id)
+        fpath = self.cache_dir / f'{unique_id}.p'
 
         if not fpath.exists():
             output = self.func(**kwargs)
             self._write(output, fpath)
-            self.cached_ids.append(object_id)
+            self.cached_ids.append(unique_id)
         else:
             output = self._read(fpath)
 
-        return output
+        # Read file to move tensors to CPU.
+        return self._read(fpath)
 
     def _write(self, x, fpath):
-        """Write dictionary of numpy arrays to disk.
-        """
-        np_out = {k: t.data.cpu().numpy() for k, t in x.items()}
-        np.savez(fpath, **np_out)
+        torch.save(x, fpath)
 
     def _read(self, fpath):
-        np_in = np.load(fpath)
-        return {k: torch.from_numpy(arr) for k, arr in np_in.items()}
+        return torch.load(fpath, map_location='cpu')
diff --git a/tests/datasets/test_ShapeNet.py b/tests/datasets/test_ShapeNet.py
@@ -79,7 +79,7 @@ def test_Images(categories):
         assert len(images) == 30644
     for obj in images:
         assert list(obj['data']['images'].shape) == [4, 137, 137]
-        assert os.path.isfile(obj['attributes']['name'] / 'rendering/00.png')
+        assert (Path(obj['attributes']['name']) / 'rendering/00.png').is_file()
         assert list(obj['data']['params']['cam_mat'].shape) == [3, 3]
         assert list(obj['data']['params']['cam_pos'].shape) == [3]
 
@@ -91,7 +91,7 @@ def test_Surface_Meshes():
                                                       mode='Tri')
     assert len(surface_meshes) == 10
     assert surface_meshes.cache_dir.exists()
-    assert len(list(surface_meshes.cache_dir.rglob('*.npz'))) == 10
+    assert len(list(surface_meshes.cache_dir.rglob('*.p'))) == 10
     for smesh in surface_meshes:
         assert smesh['data']['vertices'].shape[0] > 0
         assert smesh['data']['faces'].shape[1] == 3
@@ -104,7 +104,7 @@ def test_Surface_Meshes():
                                                       mode='Quad')
     assert len(surface_meshes) == 10
     assert surface_meshes.cache_dir.exists()
-    assert len(list(surface_meshes.cache_dir.rglob('*.npz'))) == 10
+    assert len(list(surface_meshes.cache_dir.rglob('*.p'))) == 10
     for smesh in surface_meshes:
         assert smesh['data']['vertices'].shape[0] > 0
         assert smesh['data']['faces'].shape[1] == 4
@@ -121,7 +121,7 @@ def test_Points():
 
     assert len(points) == 10
     assert points.cache_dir.exists()
-    assert len(list(points.cache_dir.rglob('*.npz'))) == 10
+    assert len(list(points.cache_dir.rglob('*.p'))) == 10
     for obj in points:
         assert set(obj['data']['points'].shape) == set([5000, 3])
         assert set(obj['data']['normals'].shape) == set([5000, 3])
@@ -135,7 +135,7 @@ def test_Points():
 
     assert len(points) == 10
     assert points.cache_dir.exists()
-    assert len(list(points.cache_dir.rglob('*.npz'))) == 10
+    assert len(list(points.cache_dir.rglob('*.p'))) == 10
     for obj in points:
         assert set(obj['data']['points'].shape) == set([5000, 3])
         assert set(obj['data']['normals'].shape) == set([5000, 3])
@@ -154,7 +154,7 @@ def test_SDF_Points():
 
     assert len(sdf_points) == 10
     assert sdf_points.cache_dir.exists()
-    assert len(list(sdf_points.cache_dir.rglob('*.npz'))) == 10
+    assert len(list(sdf_points.cache_dir.rglob('*.p'))) == 10
     for obj in sdf_points:
         assert set(obj['data']['sdf_points'].shape) == set([5000, 3])
         assert set(obj['data']['sdf_distances'].shape) == set([5000])
@@ -168,7 +168,7 @@ def test_SDF_Points():
 
     assert len(sdf_points) == 10
     assert sdf_points.cache_dir.exists()
-    assert len(list(sdf_points.cache_dir.rglob('*.npz'))) == 10
+    assert len(list(sdf_points.cache_dir.rglob('*.p'))) == 10
     for obj in sdf_points:
         assert set(obj['data']['occ_points'].shape) == set([5000, 3])
         assert set(obj['data']['occ_values'].shape) == set([5000])

diff --git a/tests/datasets/test_usdfile.py b/tests/datasets/test_usdfile.py
@@ -26,7 +26,7 @@ def test_usd_meshes():
     assert len(usd_dataset) == 1
 
     # test caching
-    assert len(list(Path(cache_dir).glob('**/*.npz'))) == 1
+    assert len(list(Path(cache_dir).glob('**/*.p'))) == 1
     shutil.rmtree('tests/datasets_eval/USDMeshes')
 
 # Tests below must be run with KitchenSet dataset

diff --git a/tests/test_helpers.py b/tests/test_helpers.py
@@ -0,0 +1,99 @@
+import os
+import shutil
+
+import pytest
+import torch
+import numpy as np
+
+import kaolin as kal
+from kaolin import helpers
+
+
+CACHE_DIR = 'tests/cache'
+
+
+@pytest.fixture(autouse=True)
+def cleanup():
+    """Cleanup after each test. """
+    yield
+    shutil.rmtree(CACHE_DIR)
+
+
+@pytest.mark.parametrize('device', ['cpu', 'cuda'])
+def test_cache_tensor(device):
+    tensor = torch.ones(5, device=device)
+
+    cache = helpers.Cache(func=lambda x: x, cache_dir=CACHE_DIR, cache_key='test')
+    cache('tensor', x=tensor)
+
+    # Make sure cache is created
+    assert os.path.exists(os.path.join(CACHE_DIR, 'test', 'tensor.p'))
+
+    # Confirm loaded tensor is correct and on CPU device
+    loaded = cache('tensor')
+    assert torch.all(loaded.eq(tensor.cpu()))
+
+@pytest.mark.parametrize('device', ['cpu', 'cuda'])
+def test_cache_dict(device):
+    dictionary = {
+        'a': torch.ones(5, device=device),
+        'b': np.zeros(5),
+    }
+
+    cache = helpers.Cache(func=lambda x: x, cache_dir=CACHE_DIR, cache_key='test')
+    cache('dictionary', x=dictionary)
+
+    # Make sure cache is created
+    assert os.path.exists(os.path.join(CACHE_DIR, 'test', 'dictionary.p'))
+
+    # Confirm loaded dict is correct and on CPU device
+    loaded = cache('dictionary')
+    assert torch.all(loaded['a'].eq(dictionary['a'].cpu()))
+    assert np.all(np.isclose(loaded['b'], dictionary['b']))
+
+@pytest.mark.parametrize('device', ['cpu', 'cuda'])
+def test_cache_mesh(device):
+    vertices = torch.ones(10, 3, device=device)
+    faces = torch.ones(20, 3, device=device, dtype=torch.long)
+    mesh = kal.rep.TriangleMesh.from_tensors(vertices, faces)
+
+    cache = helpers.Cache(func=lambda x: x, cache_dir=CACHE_DIR, cache_key='test')
+    cache('mesh', x=mesh)
+
+    # Make sure cache is created
+    assert os.path.exists(os.path.join(CACHE_DIR, 'test', 'mesh.p'))
+
+    # Confirm loaded mesh is correct and on CPU device
+    loaded = cache('mesh')
+    assert torch.all(loaded.vertices.eq(vertices.cpu()))
+    assert torch.all(loaded.faces.eq(faces.cpu()))
+
+@pytest.mark.parametrize('device', ['cpu', 'cuda'])
+def test_cache_voxelgrid(device):
+    voxels = torch.ones(3, 3, 3, device=device)
+    voxelgrid = kal.rep.VoxelGrid(voxels)
+
+    cache = helpers.Cache(func=lambda x: x, cache_dir=CACHE_DIR, cache_key='test')
+    cache('voxelgrid', x=voxelgrid)
+
+    # Make sure cache is created
+    assert os.path.exists(os.path.join(CACHE_DIR, 'test', 'voxelgrid.p'))
+
+    # Confirm loaded voxelgrid is correct and on CPU device
+    loaded = cache('voxelgrid')
+    assert torch.all(loaded.voxels.eq(voxels.cpu()))
+
+@pytest.mark.parametrize('device', ['cpu', 'cuda'])
+def test_cache_pointcloud(device):
+    points = torch.ones(10, 3, device=device)
+    pointcloud = kal.rep.PointCloud(points)
+
+    cache = helpers.Cache(func=lambda x: x, cache_dir=CACHE_DIR, cache_key='test')
+    cache('pointcloud', x=pointcloud)
+
+    # Make sure cache is created
+    assert os.path.exists(os.path.join(CACHE_DIR, 'test', 'pointcloud.p'))
+
+    # Confirm loaded pointcloud is correct and on CPU device
+    loaded = cache('pointcloud')
+    assert torch.all(loaded.points.eq(points.cpu()))