# Data processing

Processing the shapenet dataset to be compatible with if-net texture

## Locate the folders and identifiers

Shapenet has a different folder system, where each mesh is in a subfolder, in this first step, we find each unique identifier of the models and create new folders at the desired location

In [1]:
from pathlib import Path
import os
import numpy as np
import generationtools as gnt
from scipy.spatial import cKDTree as KDTree
import trimesh

# The location of the original dataset
datasetLocation = Path("C:/Users/u0146408/Documents/Datasets/Shapenet/03001627")
# The desired output location for the processed dataset
datasetOutFolder = Path("data/processed")

def make_dir_if_not_exist(path):
    if(not os.path.exists(path)):
        print("Folder does not exist, creating the folder" + path)
        os.mkdir(path)

make_dir_if_not_exist(datasetOutFolder)
make_dir_if_not_exist(datasetOutFolder/ "test")
make_dir_if_not_exist(datasetOutFolder/ "train")

Jupyter environment detected. Enabling Open3D WebVisualizer.
[Open3D INFO] WebRTC GUI backend enabled.
[Open3D INFO] WebRTCWindowSystem: HTTP handshake server disabled.


In [2]:
import glob

# Looking for all obj files
path = datasetLocation / "**" / "*.obj"
print(str(path))
files = glob.glob(str(path), recursive=True)
print(len(files))

C:\Users\u0146408\Documents\Datasets\Shapenet\03001627\**\*.obj
6778


## Set the configuration

In [3]:
# shorthands
testTrainRatio = 0.8
bbox = [-7, 7, -1, 20, -7, 7]
res = 128
num_points = 100000
bbox_str = str(bbox)
grid_points = gnt.create_grid_points_from_xyz_bounds(*bbox, res)
kdtree = KDTree(grid_points)

## Create the processed files

1) Load the trimesh scene
2) sample each submesh into a separate pointcloud
3) Combine them into one big material pointcloud
4) Resample to theset number of points
5) cut random holes in the point cloud by selecting nearest neighbours
6) Save the complete sampled shape as a npz file
    > id/id_normalized_color_samples100000_bbox-0.8,0.8,-0.15,2.1,-0.8,0.8.npz
7) save 4 variations in the same folder

In [4]:
class MeshArray ():

    def __init__(self, R = None, G = None, B = None, S = None, pcd = None, bbox = None, res = None):
        self.R = R
        self.G = G
        self.B = B
        self.S = S
        self.pcd = pcd
        self.bbox = bbox
        self.res = res

        self.fullPcd = None
        self.id = None
    
    def from_trimesh(self, meshPath):

        mesh = trimesh.load(meshPath)
        # check if the mesh multiple materials (sub meshes)
        if isinstance(mesh, trimesh.Scene):
            pointArray = []
            i = 1
            subMeshes = mesh.dump()
            for sub in subMeshes:
                colored_addition = np.hstack((sub.sample(num_points), np.ones((num_points,1)) * i))
                pointArray.append(colored_addition)
                i+=1

            pointArray = np.asarray(pointArray).reshape((-1,4))
            colored_point_cloud = pointArray[np.random.choice(len(pointArray), size=num_points, replace=False)]
        else:
            colored_point_cloud = np.hstack((mesh.sample(num_points), np.ones((num_points,1)))) 

        # encode uncolorized, complete shape of object (at inference time obtained from IF-Nets surface reconstruction)
        # encoding is done by sampling a pointcloud and voxelizing it (into discrete grid for 3D CNN usage)
        full_shape = gnt.as_mesh(mesh)
        shape_point_cloud = full_shape.sample(num_points)
        S = np.zeros(len(grid_points), dtype=np.int8)
        self.fullPcd = shape_point_cloud

        _, idx = kdtree.query(shape_point_cloud)
        S[idx] = 1

        self.S = S
        self.R = self.G = self.B =  colored_point_cloud[:,3]
        self.pcd = colored_point_cloud[:,:3]

        return self

    def filter_points(self, outPath, nrOfVariants, nrOfHoles, dropout):
        
        for nr in range(nrOfVariants):
        
            filteredIndexes = gnt.shoot_holes(self.pcd, nrOfHoles, dropout)
            filteredPcd = np.delete(self.pcd,filteredIndexes,0)
            filteredColor = np.delete(self.R,filteredIndexes,0)
            np.savez(outPath / (self.id + "-partial-" + str(nr) + ".npz"), R=filteredColor, G=filteredColor,B=filteredColor, S=self.S,  colored_point_cloud=filteredPcd, bbox = self.bbox, res = self.res)



    def savez(self, out_file):
        np.savez(out_file, R=self.R, G=self.G,B=self.B, S=self.S,  colored_point_cloud=self.pcd, bbox = self.bbox, res = self.res)

In [6]:

testCases = int(np.round(len(files) * testTrainRatio))
# single example

objFile = files[0]
newId = Path(objFile).parent.parent.name
# Make the new destination folder for the files
newIdPath = datasetOutFolder / "train" / newId
make_dir_if_not_exist(newIdPath)
# Convert the obj to a npz file
meshArray = MeshArray().from_trimesh(objFile)
meshArray.id = newId
meshArray.savez(newIdPath / (newId + ".npz"))
meshArray.filter_points(newIdPath, 4, 4, 2e-2)
    



In [None]:
from pathlib import Path
import os
import numpy as np
from scipy.spatial import cKDTree as KDTree
import trimesh

# The location of the original dataset
datasetLocation = Path("../../datasets/ShapeNet/03001627")
# The desired output location for the processed dataset
datasetOutFolder = Path("dataset/SHARP2020/shapenet")

def make_dir_if_not_exist(path):
    if(not os.path.exists(path)):
        print("Folder does not exist, creating the folder " + str(path))
        os.mkdir(path)

make_dir_if_not_exist(datasetOutFolder)
make_dir_if_not_exist(datasetOutFolder/ "test")
make_dir_if_not_exist(datasetOutFolder/ "train")

In [None]:
import glob

# Looking for all obj files
path = datasetLocation / "**" / "*.obj"
print(str(path))
files = glob.glob(str(path), recursive=True)
print(len(files))

In [None]:
print(files[1000])

In [None]:
def as_mesh(scene_or_mesh):
    if isinstance(scene_or_mesh, trimesh.Scene):
        mesh = trimesh.util.concatenate([
            trimesh.Trimesh(vertices=m.vertices, faces=m.faces)
            for m in scene_or_mesh.geometry.values()])
    else:
        mesh = scene_or_mesh
    return mesh

def create_grid_points_from_xyz_bounds(min_x, max_x, min_y, max_y ,min_z, max_z, res):
    x = np.linspace(min_x, max_x, res)
    y = np.linspace(min_y, max_y, res)
    z = np.linspace(min_z, max_z, res)
    X, Y, Z = np.meshgrid(x, y, z, indexing='ij', sparse=False)
    X = X.reshape((np.prod(X.shape),))
    Y = Y.reshape((np.prod(Y.shape),))
    Z = Z.reshape((np.prod(Z.shape),))

    points_list = np.column_stack((X, Y, Z))
    del X, Y, Z, x
    return points_list

import numbers

def shoot_holes(vertices, n_holes, dropout, mask_faces=None, faces=None,
                rng=None):
    """Generate a partial shape by cutting holes of random location and size.

    Each hole is created by selecting a random point as the center and removing
    the k nearest-neighboring points around it.

    Args:
        vertices: The array of vertices of the mesh.
        n_holes (int or (int, int)): Number of holes to create, or bounds from
            which to randomly draw the number of holes.
        dropout (float or (float, float)): Proportion of points (with respect
            to the total number of points) in each hole, or bounds from which
            to randomly draw the proportions (a different proportion is drawn
            for each hole).
        mask_faces: A boolean mask on the faces. 1 to keep, 0 to ignore. If
                    set, the centers of the holes are sampled only on the
                    non-masked regions.
        faces: The array of faces of the mesh. Required only when `mask_faces`
               is set.
        rng: (optional) An initialised np.random.Generator object. If None, a
             default Generator is created.

    Returns:
        array: Indices of the points defining the holes.
    """
    if rng is None:
        rng = np.random.default_rng()

    if not isinstance(n_holes, numbers.Integral):
        n_holes_min, n_holes_max = n_holes
        n_holes = rng.integers(n_holes_min, n_holes_max)

    if mask_faces is not None:
        valid_vertex_indices = np.unique(faces[mask_faces > 0])
        valid_vertices = vertices[valid_vertex_indices]
    else:
        valid_vertices = vertices

    # Select random hole centers.
    center_indices = rng.choice(len(valid_vertices), size=n_holes)
    centers = valid_vertices[center_indices]

    n_vertices = len(valid_vertices)
    if isinstance(dropout, numbers.Number):
        hole_size = n_vertices * dropout
        hole_sizes = [hole_size] * n_holes
    else:
        hole_size_bounds = n_vertices * np.asarray(dropout)
        hole_sizes = rng.integers(*hole_size_bounds, size=n_holes)

    # Identify the points indices making up the holes.
    kdtree = KDTree(vertices, leafsize=200)
    to_crop = []
    for center, size in zip(centers, hole_sizes):
        _, indices = kdtree.query(center, k=size)
        to_crop.append(indices)
    to_crop = np.unique(np.concatenate(to_crop))
    return to_crop

In [None]:
# shorthands
trainTestRatio = 0.8
bbox = [-7, 7, -1, 20, -7, 7]
res = 128
num_points = 100000
bbox_str = str(bbox)
grid_points = create_grid_points_from_xyz_bounds(*bbox, res)
kdtree = KDTree(grid_points)

## Create the processed files

1) Load the trimesh scene
2) sample each submesh into a separate pointcloud
3) Combine them into one big material pointcloud
4) Resample to theset number of points
5) cut random holes in the point cloud by selecting nearest neighbours
6) Save the complete sampled shape as a npz file
    > id/id_normalized_color_samples100000_bbox-0.8,0.8,-0.15,2.1,-0.8,0.8.npz
7) save 4 variations in the same folder

In [None]:
class MeshArray ():

    def __init__(self, R = None, G = None, B = None, S = None, pcd = None, bbox = None, res = None):
        self.R = R
        self.G = G
        self.B = B
        self.S = S
        self.pcd = pcd
        self.bbox = bbox
        self.res = res

        self.fullPcd = None
        self.id = None
    
    def from_trimesh(self, meshPath):

        mesh = trimesh.load(meshPath)
        # check if the mesh multiple materials (sub meshes)
        if isinstance(mesh, trimesh.Scene):
            pointArray = []
            i = 1
            subMeshes = mesh.dump()
            for sub in subMeshes:
                colored_addition = np.hstack((sub.sample(num_points), np.ones((num_points,1)) * i))
                pointArray.append(colored_addition)
                i+=1

            pointArray = np.asarray(pointArray).reshape((-1,4))
            colored_point_cloud = pointArray[np.random.choice(len(pointArray), size=num_points, replace=False)]
        else:
            colored_point_cloud = np.hstack((mesh.sample(num_points), np.ones((num_points,1)))) 

        # encode uncolorized, complete shape of object (at inference time obtained from IF-Nets surface reconstruction)
        # encoding is done by sampling a pointcloud and voxelizing it (into discrete grid for 3D CNN usage)
        full_shape = as_mesh(mesh)
        shape_point_cloud = full_shape.sample(num_points)
        S = np.zeros(len(grid_points), dtype=np.int8)
        self.fullPcd = shape_point_cloud

        _, idx = kdtree.query(shape_point_cloud)
        S[idx] = 1

        self.S = S
        self.R = self.G = self.B =  colored_point_cloud[:,3]
        self.pcd = colored_point_cloud[:,:3]

        return self

    def filter_points(self, outPath, nrOfVariants, nrOfHoles, dropout):
        
        for nr in range(nrOfVariants):
        
            filteredIndexes = shoot_holes(self.pcd, nrOfHoles, dropout)
            filteredPcd = np.delete(self.pcd,filteredIndexes,0)
            filteredColor = np.delete(self.R,filteredIndexes,0)
            np.savez(outPath / (self.id + "-partial-" + str(nr) + ".npz"), R=filteredColor, G=filteredColor,B=filteredColor, S=self.S,  colored_point_cloud=filteredPcd, bbox = self.bbox, res = self.res)



    def savez(self, out_file):
        np.savez(out_file, R=self.R, G=self.G,B=self.B, S=self.S,  colored_point_cloud=self.pcd, bbox = self.bbox, res = self.res)

In [None]:
nrOfFiles = len(files)
trainCases = int(np.round(nrOfFiles * trainTestRatio))

for i in range(nrOfFiles):
    print("Processing " + str(i) + "/" + str(nrOfFiles))
    objFile = files[i]
    newId = Path(objFile).parent.parent.name
    # Make the new destination folder for the files
    if(i < trainCases):
        newIdPath = datasetOutFolder / "train" / newId
    else:
        newIdPath = datasetOutFolder / "test" / newId
    make_dir_if_not_exist(newIdPath)
    # Convert the obj to a npz file
    meshArray = MeshArray().from_trimesh(objFile)
    meshArray.id = newId
    meshArray.savez(newIdPath / (newId + ".npz"))
    meshArray.filter_points(newIdPath, 4, 4, 2e-2)
    
