In [77]:
import numpy as np
import pandas as pd
import h5py
import os
import skimage.io as sio
import tqdm

In [2]:
data_root = "/home/shubham/datasets/subset/ShapeNet/"
dataset_dir = "/home/shubham/datasets/"

In [64]:
os.makedirs(os.path.join(dataset_dir, "hdf_data"), exist_ok=True)
save_path = os.path.join(dataset_dir, "hdf_data")

In [None]:
def save_dict_to_hdf5(dic, filename):
    """
    ....
    """
    with h5py.File(filename, 'w') as h5file:
        recursively_save_dict_contents_to_group(h5file, '/', dic)

def recursively_save_dict_contents_to_group(h5file, path, dic):
    """
    ....
    """
    for key, item in dic.items():
        if isinstance(item, (np.ndarray, np.int64, np.float64, str, bytes)):
            h5file[path + key] = item
        elif isinstance(item, dict):
            recursively_save_dict_contents_to_group(h5file, path + key + '/', item)
        else:
            raise ValueError('Cannot save %s type'%type(item))

def load_dict_from_hdf5(filename):
    """
    ....
    """
    with h5py.File(filename, 'r') as h5file:
        return recursively_load_dict_contents_from_group(h5file, '/')

def recursively_load_dict_contents_from_group(h5file, path):
    """
    ....
    """
    ans = {}
    for key, item in h5file[path].items():
        if isinstance(item, h5py._hl.dataset.Dataset):
            ans[key] = item.value
        elif isinstance(item, h5py._hl.group.Group):
            ans[key] = recursively_load_dict_contents_from_group(h5file, path + key + '/')
    return ans

In [None]:
def load_data(path):   
    # Load the pointcloud.npz and points.npz file
    pc_file = np.load(os.path.join(path, "pointcloud.npz"))
    points_file = np.load(os.path.join(path, "points.npz"))
    
    # create image placeholder and camera data placeholder
    img_data = []
    cam_data = None
    
    # Load images
    for imx in os.listdir(os.path.join(path, "img_choy2016")):
        current = os.path.join(path, "img_choy2016", imx)
        if 'npz' in imx:
            cam_data = np.load(current)
        else:
            img_current = sio.imread(current)
            if img_current.ndim == 2:
                img_current = np.stack([img_current, img_current, img_current], axis=-1)
            img_data.append(img_current)
    img_data = np.asarray(img_data)
    
    all_data = {
        'images': img_data,
        'camera': dict(cam_data),
        'points': dict(points_file),
        'pointcloud': dict(pc_file)
    }
    
    return all_data

In [76]:
for cid in os.listdir(data_root)[:1]:
    objs_path = os.path.join(data_root, cid)
    obj_list = os.listdir(objs_path)
    for obx in obj_list[:10]:
        current_path = os.path.join(objs_path, obx)
        new_filename = "{}_{}.h5".format(cid, obx)
        
        try:
            data_current = load_data(current_path)
            save_dict_to_hdf5(data_current, os.path.join(save_path, new_filename))
        except:
            print("Error at {}-{}".format(cid, obx))

/home/shubham/datasets/hdf_data/02933112_7238faf31667078b2ea98d69e91ba870.h5
/home/shubham/datasets/hdf_data/02933112_1f674f735abb7b1d75869f989849123f.h5
/home/shubham/datasets/hdf_data/02933112_6219b46946f62474c62bee40dcdc539.h5
/home/shubham/datasets/hdf_data/02933112_346419b6e2131dda5785f58f071c8c43.h5
/home/shubham/datasets/hdf_data/02933112_1175801334a9e410df3a1b0d597ce76e.h5
/home/shubham/datasets/hdf_data/02933112_6352c69907b70c0480fa521a9c7198a.h5
/home/shubham/datasets/hdf_data/02933112_809d5384f84d55273a11565e5be9cf53.h5
/home/shubham/datasets/hdf_data/02933112_12f1e4964078850cc7113d9e058b9db7.h5
/home/shubham/datasets/hdf_data/02933112_8fd43ffcc981f6eb14038d588fd1342f.h5
/home/shubham/datasets/hdf_data/02933112_4c8e95fe5fdbb125c59350d819542ec7.h5
