Skip to content

Commit

Permalink
Cache using pickle (#148)
Browse files Browse the repository at this point in the history
Signed-off-by: Jean-Francois Lafleche <jlafleche@nvidia.com>
  • Loading branch information
Jean-Francois-Lafleche committed Feb 5, 2020
1 parent 9d77087 commit d48f4ea
Show file tree
Hide file tree
Showing 4 changed files with 119 additions and 23 deletions.
27 changes: 12 additions & 15 deletions kaolin/helpers.py
Original file line number Diff line number Diff line change
Expand Up @@ -177,50 +177,47 @@ def _get_hash(x):


class Cache(object):
"""Caches the results of the called function to disk.
"""Caches the results of a function to disk.
If already cached, data is returned from disk, otherwise,
the function called is executed.
the function is executed. Output tensors are always on CPU device.
Args:
transforms (Iterable): List of transforms to compose.
cache_dir (str): Directory where objects will be cached. Default
to 'cache'.
"""

def __init__(self, func: Callable, cache_dir: str = 'cache', cache_key: str = ''):
def __init__(self, func: Callable, cache_dir: [str, Path], cache_key: str):
self.func = func
self.cache_dir = Path(cache_dir) / cache_key
self.cache_dir = Path(cache_dir) / str(cache_key)
self.cache_dir.mkdir(parents=True, exist_ok=True)
self.cached_ids = [p.stem for p in self.cache_dir.glob('*')]

def __call__(self, object_id: str, **kwargs):
def __call__(self, unique_id: str, **kwargs):
"""Execute self.func if not cached, otherwise, read data from disk.
Args:
object_id (str): The object id with which to name the cache file.
unique_id (str): The unique id with which to name the cached file.
**kwargs: The arguments to be passed to self.func.
Returns:
dict of {str: torch.Tensor}: Dictionary of tensors.
"""

fpath = self.cache_dir / '{0}.npz'.format(object_id)
fpath = self.cache_dir / f'{unique_id}.p'

if not fpath.exists():
output = self.func(**kwargs)
self._write(output, fpath)
self.cached_ids.append(object_id)
self.cached_ids.append(unique_id)
else:
output = self._read(fpath)

return output
# Read file to move tensors to CPU.
return self._read(fpath)

def _write(self, x, fpath):
"""Write dictionary of numpy arrays to disk.
"""
np_out = {k: t.data.cpu().numpy() for k, t in x.items()}
np.savez(fpath, **np_out)
torch.save(x, fpath)

def _read(self, fpath):
np_in = np.load(fpath)
return {k: torch.from_numpy(arr) for k, arr in np_in.items()}
return torch.load(fpath, map_location='cpu')
14 changes: 7 additions & 7 deletions tests/datasets/test_ShapeNet.py
Original file line number Diff line number Diff line change
Expand Up @@ -79,7 +79,7 @@ def test_Images(categories):
assert len(images) == 30644
for obj in images:
assert list(obj['data']['images'].shape) == [4, 137, 137]
assert os.path.isfile(obj['attributes']['name'] / 'rendering/00.png')
assert (Path(obj['attributes']['name']) / 'rendering/00.png').is_file()
assert list(obj['data']['params']['cam_mat'].shape) == [3, 3]
assert list(obj['data']['params']['cam_pos'].shape) == [3]

Expand All @@ -91,7 +91,7 @@ def test_Surface_Meshes():
mode='Tri')
assert len(surface_meshes) == 10
assert surface_meshes.cache_dir.exists()
assert len(list(surface_meshes.cache_dir.rglob('*.npz'))) == 10
assert len(list(surface_meshes.cache_dir.rglob('*.p'))) == 10
for smesh in surface_meshes:
assert smesh['data']['vertices'].shape[0] > 0
assert smesh['data']['faces'].shape[1] == 3
Expand All @@ -104,7 +104,7 @@ def test_Surface_Meshes():
mode='Quad')
assert len(surface_meshes) == 10
assert surface_meshes.cache_dir.exists()
assert len(list(surface_meshes.cache_dir.rglob('*.npz'))) == 10
assert len(list(surface_meshes.cache_dir.rglob('*.p'))) == 10
for smesh in surface_meshes:
assert smesh['data']['vertices'].shape[0] > 0
assert smesh['data']['faces'].shape[1] == 4
Expand All @@ -121,7 +121,7 @@ def test_Points():

assert len(points) == 10
assert points.cache_dir.exists()
assert len(list(points.cache_dir.rglob('*.npz'))) == 10
assert len(list(points.cache_dir.rglob('*.p'))) == 10
for obj in points:
assert set(obj['data']['points'].shape) == set([5000, 3])
assert set(obj['data']['normals'].shape) == set([5000, 3])
Expand All @@ -135,7 +135,7 @@ def test_Points():

assert len(points) == 10
assert points.cache_dir.exists()
assert len(list(points.cache_dir.rglob('*.npz'))) == 10
assert len(list(points.cache_dir.rglob('*.p'))) == 10
for obj in points:
assert set(obj['data']['points'].shape) == set([5000, 3])
assert set(obj['data']['normals'].shape) == set([5000, 3])
Expand All @@ -154,7 +154,7 @@ def test_SDF_Points():

assert len(sdf_points) == 10
assert sdf_points.cache_dir.exists()
assert len(list(sdf_points.cache_dir.rglob('*.npz'))) == 10
assert len(list(sdf_points.cache_dir.rglob('*.p'))) == 10
for obj in sdf_points:
assert set(obj['data']['sdf_points'].shape) == set([5000, 3])
assert set(obj['data']['sdf_distances'].shape) == set([5000])
Expand All @@ -168,7 +168,7 @@ def test_SDF_Points():

assert len(sdf_points) == 10
assert sdf_points.cache_dir.exists()
assert len(list(sdf_points.cache_dir.rglob('*.npz'))) == 10
assert len(list(sdf_points.cache_dir.rglob('*.p'))) == 10
for obj in sdf_points:
assert set(obj['data']['occ_points'].shape) == set([5000, 3])
assert set(obj['data']['occ_values'].shape) == set([5000])
Expand Down
2 changes: 1 addition & 1 deletion tests/datasets/test_usdfile.py
Original file line number Diff line number Diff line change
Expand Up @@ -26,7 +26,7 @@ def test_usd_meshes():
assert len(usd_dataset) == 1

# test caching
assert len(list(Path(cache_dir).glob('**/*.npz'))) == 1
assert len(list(Path(cache_dir).glob('**/*.p'))) == 1
shutil.rmtree('tests/datasets_eval/USDMeshes')

# Tests below must be run with KitchenSet dataset
Expand Down
99 changes: 99 additions & 0 deletions tests/test_helpers.py
Original file line number Diff line number Diff line change
@@ -0,0 +1,99 @@
import os
import shutil

import pytest
import torch
import numpy as np

import kaolin as kal
from kaolin import helpers


CACHE_DIR = 'tests/cache'


@pytest.fixture(autouse=True)
def cleanup():
"""Cleanup after each test. """
yield
shutil.rmtree(CACHE_DIR)


@pytest.mark.parametrize('device', ['cpu', 'cuda'])
def test_cache_tensor(device):
tensor = torch.ones(5, device=device)

cache = helpers.Cache(func=lambda x: x, cache_dir=CACHE_DIR, cache_key='test')
cache('tensor', x=tensor)

# Make sure cache is created
assert os.path.exists(os.path.join(CACHE_DIR, 'test', 'tensor.p'))

# Confirm loaded tensor is correct and on CPU device
loaded = cache('tensor')
assert torch.all(loaded.eq(tensor.cpu()))

@pytest.mark.parametrize('device', ['cpu', 'cuda'])
def test_cache_dict(device):
dictionary = {
'a': torch.ones(5, device=device),
'b': np.zeros(5),
}

cache = helpers.Cache(func=lambda x: x, cache_dir=CACHE_DIR, cache_key='test')
cache('dictionary', x=dictionary)

# Make sure cache is created
assert os.path.exists(os.path.join(CACHE_DIR, 'test', 'dictionary.p'))

# Confirm loaded dict is correct and on CPU device
loaded = cache('dictionary')
assert torch.all(loaded['a'].eq(dictionary['a'].cpu()))
assert np.all(np.isclose(loaded['b'], dictionary['b']))

@pytest.mark.parametrize('device', ['cpu', 'cuda'])
def test_cache_mesh(device):
vertices = torch.ones(10, 3, device=device)
faces = torch.ones(20, 3, device=device, dtype=torch.long)
mesh = kal.rep.TriangleMesh.from_tensors(vertices, faces)

cache = helpers.Cache(func=lambda x: x, cache_dir=CACHE_DIR, cache_key='test')
cache('mesh', x=mesh)

# Make sure cache is created
assert os.path.exists(os.path.join(CACHE_DIR, 'test', 'mesh.p'))

# Confirm loaded mesh is correct and on CPU device
loaded = cache('mesh')
assert torch.all(loaded.vertices.eq(vertices.cpu()))
assert torch.all(loaded.faces.eq(faces.cpu()))

@pytest.mark.parametrize('device', ['cpu', 'cuda'])
def test_cache_voxelgrid(device):
voxels = torch.ones(3, 3, 3, device=device)
voxelgrid = kal.rep.VoxelGrid(voxels)

cache = helpers.Cache(func=lambda x: x, cache_dir=CACHE_DIR, cache_key='test')
cache('voxelgrid', x=voxelgrid)

# Make sure cache is created
assert os.path.exists(os.path.join(CACHE_DIR, 'test', 'voxelgrid.p'))

# Confirm loaded voxelgrid is correct and on CPU device
loaded = cache('voxelgrid')
assert torch.all(loaded.voxels.eq(voxels.cpu()))

@pytest.mark.parametrize('device', ['cpu', 'cuda'])
def test_cache_pointcloud(device):
points = torch.ones(10, 3, device=device)
pointcloud = kal.rep.PointCloud(points)

cache = helpers.Cache(func=lambda x: x, cache_dir=CACHE_DIR, cache_key='test')
cache('pointcloud', x=pointcloud)

# Make sure cache is created
assert os.path.exists(os.path.join(CACHE_DIR, 'test', 'pointcloud.p'))

# Confirm loaded pointcloud is correct and on CPU device
loaded = cache('pointcloud')
assert torch.all(loaded.points.eq(points.cpu()))

0 comments on commit d48f4ea

Please sign in to comment.