# Data processing

Processing the shapenet dataset to be compatible with if-net texture

## Locate the folders and identifiers

Shapenet has a different folder system, where each mesh is in a subfolder, in this first step, we find each unique identifier of the models and create new folders at the desired location

In [3]:
from pathlib import Path
import os
import numpy as np
from scipy.spatial import cKDTree as KDTree
import trimesh
import data_processing.utils as utils
import data_processing.mesharray as mesharray
import config.config_loader as cfg_loader
import glob

# The location of the original dataset
datasetLocation = Path("../../datasets/ShapeNet/03001627")
# The desired output location for the processed dataset
datasetOutFolder = Path("dataset/SHARP2020/shapenet")
# The config file
configFilePath = Path("config/SHARP2020/shapenet.yaml")

utils.make_dir_if_not_exist(datasetOutFolder)
utils.make_dir_if_not_exist(datasetOutFolder/ "test")
utils.make_dir_if_not_exist(datasetOutFolder/ "train")

Folder does not exist, creating the folder dataset/SHARP2020/shapenet/test
Folder does not exist, creating the folder dataset/SHARP2020/shapenet/train


In [4]:
# Looking for all obj files
path = datasetLocation / "**" / "*.obj"
print("Looking for all files that match: " + str(path))
files = glob.glob(str(path), recursive=True)
print("Found " + str(len(files)) + " files")

Looking for all files that match: ../../datasets/ShapeNet/03001627/**/*.obj
Found 6778 files


## Load the configuration

In [5]:
cfg = cfg_loader.load(configFilePath)

# shorthands
trainTestRatio =  cfg["preprocessing"]["trainTestRatio"]
bbox = cfg['data_bounding_box']
res = cfg['input_resolution']
num_points = cfg['input_points_number']
bbox_str = cfg['data_bounding_box_str']
grid_points = utils.create_grid_points_from_xyz_bounds(*bbox, res)
kdtree = KDTree(grid_points)

## Create the processed files

1) Load the trimesh scene
2) sample each submesh into a separate pointcloud
3) Combine them into one big material pointcloud
4) Resample to theset number of points
5) cut random holes in the point cloud by selecting nearest neighbours
6) Save the complete sampled shape as a npz file
    > id/id_normalized_color_samples100000_bbox-0.8,0.8,-0.15,2.1,-0.8,0.8.npz
7) save 4 variations in the same folder

In [6]:
nrOfFiles = 100 #len(files)
trainCases = int(np.round(nrOfFiles * trainTestRatio))

for i in range(nrOfFiles):
    print("Processing " + str(i) + "/" + str(nrOfFiles))
    objFile = files[i]
    newId = Path(objFile).parent.parent.name
    # Make the new destination folder for the files
    if(i < trainCases):
        newIdPath = datasetOutFolder / "train" / newId
    else:
        newIdPath = datasetOutFolder / "test" / newId
    utils.make_dir_if_not_exist(newIdPath)
    
    # Create a new mesharray object to parse the data
    meshArray = mesharray.MeshArray(
        id = newId, 
        bbox=bbox, 
        kdtree = kdtree,
        grid_points=grid_points, 
        num_points=num_points ).from_trimesh(objFile, colorMode = "singleChannel")

    # Save the full point cloud as a colored point cloud
    meshArray.savez(newIdPath / (newId + "_normalized_color_samples" + str(num_points) + "_bbox" + bbox_str + ".npz"))
    # Create a number of incomplete meshes and save them as voxelised point clouds for standardisation
    meshArray.filter_points(newIdPath, 4, 4, 2e-2)

Processing 0/100
Folder does not exist, creating the folder dataset/SHARP2020/shapenet/train/3a4d572dc16ac52e201e82f29479384c


Processing 1/100
Folder does not exist, creating the folder dataset/SHARP2020/shapenet/train/91b738d40201bf18a413908c0e169330
Processing 2/100
Folder does not exist, creating the folder dataset/SHARP2020/shapenet/train/477dfe89f1d5df337fa68300c57bff0a
Processing 3/100
Folder does not exist, creating the folder dataset/SHARP2020/shapenet/train/d4f5c3e3eab52d0a3334fb6668ccd834
Processing 4/100
Folder does not exist, creating the folder dataset/SHARP2020/shapenet/train/ccea874d869ff9a579368d1198f406e7
Processing 5/100
Folder does not exist, creating the folder dataset/SHARP2020/shapenet/train/a3aa7e473a8059133087f84b199fd297
Processing 6/100
Folder does not exist, creating the folder dataset/SHARP2020/shapenet/train/611a76dca411bf089579c5df0e7fa96
Processing 7/100
Folder does not exist, creating the folder dataset/SHARP2020/shapenet/train/2f2da13322d30ccaf4b6538438a0b930
Processing 8/100
Folder does not exist, creating the folder dataset/SHARP2020/shapenet/train/28a60e0e9adf7eb0a3340564b5

### Export to obj groundtruth

In [7]:
nrOfFiles = 100 #len(files)
trainCases = int(np.round(nrOfFiles * trainTestRatio))

for i in range(nrOfFiles):
    print("Processing " + str(i) + "/" + str(nrOfFiles))
    objFile = files[i]
    newId = Path(objFile).parent.parent.name
    # Make the new destination folder for the files
    if(i < trainCases):
        newIdPath = datasetOutFolder / "train" / newId
    else:
        newIdPath = datasetOutFolder / "test" / newId
    data = np.load(newIdPath / (newId + "_normalized_color_samples" + str(num_points) + "_bbox" + bbox_str + ".npz"))
    cloud = trimesh.PointCloud(data["points"])
    # Save the full point cloud as a colored point cloud
    cloud.export(newIdPath / (newId + "_normalized.obj"))

Processing 0/100
Processing 1/100
Processing 2/100
Processing 3/100
Processing 4/100
Processing 5/100
Processing 6/100
Processing 7/100
Processing 8/100
Processing 9/100
Processing 10/100
Processing 11/100
Processing 12/100
Processing 13/100
Processing 14/100
Processing 15/100
Processing 16/100
Processing 17/100
Processing 18/100
Processing 19/100
Processing 20/100
Processing 21/100
Processing 22/100
Processing 23/100
Processing 24/100
Processing 25/100
Processing 26/100
Processing 27/100
Processing 28/100
Processing 29/100
Processing 30/100
Processing 31/100
Processing 32/100
Processing 33/100
Processing 34/100
Processing 35/100
Processing 36/100
Processing 37/100
Processing 38/100
Processing 39/100
Processing 40/100
Processing 41/100
Processing 42/100
Processing 43/100
Processing 44/100
Processing 45/100
Processing 46/100
Processing 47/100
Processing 48/100
Processing 49/100
Processing 50/100
Processing 51/100
Processing 52/100
Processing 53/100
Processing 54/100
Processing 55/100
Pr

## Visualise the created files

The new files are compressed numpy arrays. We can convert them back to trimesh objects to visualise them

In [27]:
from pathlib import Path
import numpy as np

filePath = Path("dataset/SHARP2020/shapenet/test/1c17cc67b8c747c3febad4f49b26ec52/1c17cc67b8c747c3febad4f49b26ec52_normalized-partial-0_voxelized_colored_point_cloud_res128_points100000_bbox-1,1,-1,1,-1,1.npz")

data = np.load(filePath)
cloud = trimesh.PointCloud(data["points"])
cloud.export(str(filePath)[:-3] + "obj")
print(list(data.keys()))

['R', 'G', 'B', 'S', 'colored_point_cloud', 'bbox', 'res']


In [17]:
import trimesh


print(cloud)

<trimesh.PointCloud(vertices.shape=(100000, 3))>


In [11]:
partialPath = Path("dataset/SHARP2020/shapenet/train/2a56ff0143d4e69fe6fe3612af521500/2a56ff0143d4e69fe6fe3612af521500_normalized-partial-3_voxelized_colored_point_cloud_res128_points100000_bbox-1,1,-1,1,-1,1.npz")

data = np.load(partialPath)
print(list(data.keys()))


['R', 'G', 'B', 'S', 'colored_point_cloud', 'bbox', 'res']


In [16]:
print(np.unique(data["R"]))

[ -1   0 255]


In [39]:

cloud = trimesh.PointCloud(data["colored_point_cloud"])
print(cloud)

<trimesh.PointCloud(vertices.shape=(92013, 3))>


In [40]:
cloud.export(str(partialPath)[:-3] + "obj")


'# https://github.com/mikedh/trimesh\nv 0.27352216 -0.30919648 0.08457063\nv -0.08225055 -0.25447415 -0.17146989\nv 0.39105184 -0.47468908 -0.34531779\nv 0.04533117 0.20163388 0.44163138\nv -0.04707838 -0.28429441 0.32389421\nv -0.02380959 0.27275888 0.42353133\nv -0.06412140 -0.25447415 0.22616815\nv 0.24103890 -0.30099218 -0.21812667\nv 0.39012018 -0.10263776 -0.34840796\nv -0.14319382 -0.27383199 0.31183173\nv 0.41930325 -0.04631806 -0.13668358\nv 0.12497173 -0.03710514 0.39156698\nv -0.35346466 -0.26532579 0.32777430\nv 0.44460544 0.00412369 -0.15878773\nv -0.03097083 -0.32690021 0.30465667\nv -0.41817474 -0.01684007 -0.23468386\nv -0.32609155 -0.13733316 0.03315790\nv -0.32451104 -0.13681482 -0.03581742\nv 0.40630735 -0.10197672 0.10889223\nv 0.24938890 0.14119898 0.42325115\nv -0.29287866 -0.01693912 0.30821383\nv -0.37409175 0.03211211 -0.33493636\nv -0.19930467 -0.03590764 0.38712899\nv -0.38529294 -0.13499457 -0.27923137\nv 0.07272403 0.00419416 0.31159278\nv 0.24940919 0.2235