# Data processing

Processing the shapenet dataset to be compatible with if-net texture

## Locate the folders and identifiers

Shapenet has a different folder system, where each mesh is in a subfolder, in this first step, we find each unique identifier of the models and create new folders at the desired location

In [28]:
from pathlib import Path
import os
import numpy as np
from scipy.spatial import cKDTree as KDTree
import trimesh
import data_processing.utils as utils
import data_processing.mesharray as mesharray
import config.config_loader as cfg_loader
import glob

# The location of the original dataset
datasetLocation = Path("../../datasets/ShapeNet/03001627")
# The desired output location for the processed dataset
datasetOutFolder = Path("dataset/SHARP2020/shapenet")
# The config file
configFilePath = Path("projects/if-net_texture/config/SHARP2020/shapenet.yaml")

utils.make_dir_if_not_exist(datasetOutFolder)
utils.make_dir_if_not_exist(datasetOutFolder/ "test")
utils.make_dir_if_not_exist(datasetOutFolder/ "train")

In [29]:
# Looking for all obj files
path = datasetLocation / "**" / "*.obj"
print("Looking for all files that match: " + str(path))
files = glob.glob(str(path), recursive=True)
print("Found " + str(len(files)) + " files")

../../datasets/ShapeNet/03001627/**/*.obj
6778


## Load the configuration

In [31]:
cfg = cfg_loader.load(configFilePath)

# shorthands
trainTestRatio =  cfg["preprossesing"]["trainTestRatio"]
bbox = cfg['data_bounding_box']
res = cfg['input_resolution']
num_points = cfg['input_points_number']
bbox_str = cfg['data_bounding_box_str']
grid_points = utils.create_grid_points_from_xyz_bounds(*bbox, res)
kdtree = KDTree(grid_points)

## Create the processed files

1) Load the trimesh scene
2) sample each submesh into a separate pointcloud
3) Combine them into one big material pointcloud
4) Resample to theset number of points
5) cut random holes in the point cloud by selecting nearest neighbours
6) Save the complete sampled shape as a npz file
    > id/id_normalized_color_samples100000_bbox-0.8,0.8,-0.15,2.1,-0.8,0.8.npz
7) save 4 variations in the same folder

In [34]:
class MeshArray ():

    def __init__(self, R = None, G = None, B = None, S = None, pcd = None, bbox = None, res = None):
        self.R = R
        self.G = G
        self.B = B
        self.S = S
        self.pcd = pcd
        self.bbox = bbox
        self.res = res

        self.fullPcd = None
        self.id = None
    
    def from_trimesh(self, meshPath):

        mesh = trimesh.load(meshPath)
        scale = 1 / np.max(mesh.extents)
        center = mesh.centroid
        for3matx = np.hstack((np.identity(3) * scale, center.reshape((3,1)) ))
        transformMtx = np.vstack((for3matx, [0,0,0,1]))
        mesh.apply_transform(transformMtx)
        # check if the mesh multiple materials (sub meshes)
        if isinstance(mesh, trimesh.Scene):
            pointArray = []
            i = 1
            subMeshes = mesh.dump()
            for sub in subMeshes:
                colored_addition = np.hstack((sub.sample(num_points), np.ones((num_points,1)) * i))
                pointArray.append(colored_addition)
                i+=1

            pointArray = np.asarray(pointArray).reshape((-1,4))
            colored_point_cloud = pointArray[np.random.choice(len(pointArray), size=num_points, replace=False)]
        else:
            colored_point_cloud = np.hstack((mesh.sample(num_points), np.ones((num_points,1)))) 

        # encode uncolorized, complete shape of object (at inference time obtained from IF-Nets surface reconstruction)
        # encoding is done by sampling a pointcloud and voxelizing it (into discrete grid for 3D CNN usage)
        full_shape = utils.as_mesh(mesh)
        shape_point_cloud = full_shape.sample(num_points)
        S = np.zeros(len(grid_points), dtype=np.int8)
        self.fullPcd = shape_point_cloud

        _, idx = kdtree.query(shape_point_cloud)
        S[idx] = 1

        self.S = S
        self.R = self.G = self.B =  colored_point_cloud[:,3]
        self.pcd = colored_point_cloud[:,:3]

        return self

    def filter_points(self, outPath, nrOfVariants, nrOfHoles, dropout):
        
        # step 1: remove a number of points from the lists
        # voxelise the partial pointcloud
        # relink the colors to the voxels
        
        for nr in range(nrOfVariants):
            f = open(outPath / (self.id + "_normalized-partial-" + str(nr) + ".txt"), "w")
            f.write(str(outPath / (self.id + "_normalized-partial-" + str(nr) + ".txt")))
            f.close()
            filteredIndexes = utils.shoot_holes(self.pcd, nrOfHoles, dropout)
            filteredPcd = np.delete(self.pcd,filteredIndexes,0)
            filteredColor = np.delete(self.R,filteredIndexes,0)

            R = - 1 * np.ones(len(grid_points), dtype=np.int16)
            G = - 1 * np.ones(len(grid_points), dtype=np.int16)
            B = - 1 * np.ones(len(grid_points), dtype=np.int16)

            _, idx = kdtree.query(filteredPcd)
            R[idx] = filteredColor
            G[idx] = filteredColor
            B[idx] = filteredColor

            if(not os.path.exists(outPath / (self.id + "-partial-" + str(nr) + ".npz"))):
                np.savez(outPath / (self.id + "_normalized-partial-" + str(nr) + "_voxelized_colored_point_cloud_res128_points100000_bbox-1,1,-1,1,-1,1.npz"), R=R, G=G,B=B, S=self.S,  colored_point_cloud=filteredPcd, bbox = self.bbox, res = self.res)


    def savez(self, out_file):
        col = self.R.reshape((-1,1))
        colors = np.hstack((col, col, col))
        np.savez(out_file, points = self.pcd, grid_coords = utils.to_grid_sample_coords(self.pcd, self.bbox), colors = colors)
        #np.savez(out_file, R=self.R, G=self.G,B=self.B, S=self.S,  colored_point_cloud=self.pcd, bbox = self.bbox, res = self.res)

### Create the incomplete point clouds

In [35]:
nrOfFiles = 100 #len(files)
trainCases = int(np.round(nrOfFiles * trainTestRatio))

for i in range(nrOfFiles):
    print("Processing " + str(i) + "/" + str(nrOfFiles))
    objFile = files[i]
    newId = Path(objFile).parent.parent.name
    # Make the new destination folder for the files
    if(i < trainCases):
        newIdPath = datasetOutFolder / "train" / newId
    else:
        newIdPath = datasetOutFolder / "test" / newId
    utils.make_dir_if_not_exist(newIdPath)
    # Convert the obj to a npz file
    meshArray = mesharray.MeshArray(
        id = newId, 
        bbox=bbox, 
        kdtree = kdtree,
        grid_points=grid_points, 
        num_points=num_points ).from_trimesh(objFile)

    meshArray.savez(newIdPath / (newId + "_normalized_color_samples" + str(num_points) + "_bbox" + bbox_str + ".npz"))
    meshArray.filter_points(newIdPath, 4, 4, 2e-2)
    



Processing 0/100
Processing 1/100
Processing 2/100
Processing 3/100
Processing 4/100
Processing 5/100
Processing 6/100
Processing 7/100
Processing 8/100
Processing 9/100
Processing 10/100
Processing 11/100
Processing 12/100
Processing 13/100
Processing 14/100
Processing 15/100
Processing 16/100
Processing 17/100
Processing 18/100
Processing 19/100
Processing 20/100
Processing 21/100
Processing 22/100
Processing 23/100
Processing 24/100
Processing 25/100
Processing 26/100
Processing 27/100
Processing 28/100
Processing 29/100
Processing 30/100
Processing 31/100
Processing 32/100
Processing 33/100
Processing 34/100
Processing 35/100
Processing 36/100
Processing 37/100
Processing 38/100
Processing 39/100
Processing 40/100
Processing 41/100
Processing 42/100
Processing 43/100
Processing 44/100
Processing 45/100
Processing 46/100
Processing 47/100
Processing 48/100
Processing 49/100
Processing 50/100
Processing 51/100
Processing 52/100
Processing 53/100
Processing 54/100
Processing 55/100
Pr

In [27]:
import numpy as np
path = 'projects/if-net_texture/dataset/SHARP2020/shapenet/split.npz'
split = np.load(path[24:])
print(split.files)

for key in split.files:
    print(key)
    print(split[key])
    print(np.unique(split[key]))

['train', 'test', 'val']
train
['dataset/SHARP2020/shapenet/train/4e26eab28703c12bdd5f3f2440a93d21/4e26eab28703c12bdd5f3f2440a93d21_normalized-partial-1.txt'
 'dataset/SHARP2020/shapenet/train/477dfe89f1d5df337fa68300c57bff0a/477dfe89f1d5df337fa68300c57bff0a_normalized-partial-3.txt'
 'dataset/SHARP2020/shapenet/train/451458fe90bd0f4144f23566597ec464/451458fe90bd0f4144f23566597ec464_normalized-partial-0.txt'
 'dataset/SHARP2020/shapenet/train/66c791cf5f1e61a09753496ba23f2183/66c791cf5f1e61a09753496ba23f2183_normalized-partial-3.txt'
 'dataset/SHARP2020/shapenet/train/3bb8e6e640c32a7c36b0f2a1430e993a/3bb8e6e640c32a7c36b0f2a1430e993a_normalized-partial-1.txt'
 'dataset/SHARP2020/shapenet/train/bc80b0b638f8a4e61a54bcb8e47577d6/bc80b0b638f8a4e61a54bcb8e47577d6_normalized-partial-2.txt'
 'dataset/SHARP2020/shapenet/train/4566839a362c5c223ec13b32c4d64f06/4566839a362c5c223ec13b32c4d64f06_normalized-partial-3.txt'
 'dataset/SHARP2020/shapenet/train/c2a7da6a1e1f896a301e9490bfb35bc0/c2a7da6a1e1f