# Data processing

Processing the shapenet dataset to be compatible with if-net texture

## Locate the folders and identifiers

Shapenet has a different folder system, where each mesh is in a subfolder, in this first step, we find each unique identifier of the models and create new folders at the desired location

In [12]:
from pathlib import Path
import os
import numpy as np
from scipy.spatial import cKDTree as KDTree
import trimesh
import data_processing.utils as utils
import data_processing.mesharray as mesharray
import config.config_loader as cfg_loader
import glob

# The location of the original dataset
datasetLocation = Path("../../datasets/ShapeNet/03001627")
# The desired output location for the processed dataset
datasetOutFolder = Path("dataset/SHARP2020/shapenet100")
# The config file
configFilePath = Path("config/SHARP2020/shapenet100.yaml")

utils.make_dir_if_not_exist(datasetOutFolder)
utils.make_dir_if_not_exist(datasetOutFolder/ "test")
utils.make_dir_if_not_exist(datasetOutFolder/ "train")

In [8]:
# Looking for all obj files
path = datasetLocation / "**" / "*.obj"
print("Looking for all files that match: " + str(path))
files = glob.glob(str(path), recursive=True)
print("Found " + str(len(files)) + " files")

Looking for all files that match: ../../datasets/ShapeNet/03001627/**/*.obj
Found 6778 files


## Load the configuration

In [13]:
cfg = cfg_loader.load(configFilePath)

# shorthands
trainTestRatio =  cfg["preprocessing"]["trainTestRatio"]
bbox = cfg['data_bounding_box']
res = cfg['input_resolution']
num_points = cfg['input_points_number']
bbox_str = cfg['data_bounding_box_str']
grid_points = utils.create_grid_points_from_xyz_bounds(*bbox, res)
kdtree = KDTree(grid_points)

## Create the processed files

1) Load the trimesh scene
2) sample each submesh into a separate pointcloud
3) Combine them into one big material pointcloud
4) Resample to theset number of points
5) cut random holes in the point cloud by selecting nearest neighbours
6) Save the complete sampled shape as a npz file
    > id/id_normalized_color_samples100000_bbox-0.8,0.8,-0.15,2.1,-0.8,0.8.npz
7) save 4 variations in the same folder

In [14]:
nrOfFiles = 100#len(files)
trainCases = int(np.round(nrOfFiles * trainTestRatio))

for i in range(0,nrOfFiles):
    print("Processing " + str(i) + "/" + str(nrOfFiles))
    objFile = files[i]
    newId = Path(objFile).parent.parent.name
    # Make the new destination folder for the files
    if(i < trainCases):
        newIdPath = datasetOutFolder / "train" / newId
    else:
        newIdPath = datasetOutFolder / "test" / newId
    utils.make_dir_if_not_exist(newIdPath)
    
    # Create a new mesharray object to parse the data
    meshArray = mesharray.MeshArray(
        id = newId, 
        bbox=bbox, 
        kdtree = kdtree,
        grid_points=grid_points, 
        num_points=num_points ).from_trimesh(objFile, colorMode = "singleChannel")

    # Save the full point cloud as a colored point cloud
    meshArray.savez(newIdPath / (newId + "_normalized_color_samples" + str(num_points) + "_bbox" + bbox_str + ".npz"))
    # Create a number of incomplete meshes and save them as voxelised point clouds for standardisation
    meshArray.filter_points(newIdPath, 4, 4, 0.02)

Processing 0/100
Folder does not exist, creating the folder dataset/SHARP2020/shapenet100/train/3a4d572dc16ac52e201e82f29479384c
Processing 1/100
Folder does not exist, creating the folder dataset/SHARP2020/shapenet100/train/91b738d40201bf18a413908c0e169330
Processing 2/100
Folder does not exist, creating the folder dataset/SHARP2020/shapenet100/train/477dfe89f1d5df337fa68300c57bff0a
Processing 3/100
Folder does not exist, creating the folder dataset/SHARP2020/shapenet100/train/d4f5c3e3eab52d0a3334fb6668ccd834
Processing 4/100
Folder does not exist, creating the folder dataset/SHARP2020/shapenet100/train/ccea874d869ff9a579368d1198f406e7
Processing 5/100
Folder does not exist, creating the folder dataset/SHARP2020/shapenet100/train/a3aa7e473a8059133087f84b199fd297
Processing 6/100
Folder does not exist, creating the folder dataset/SHARP2020/shapenet100/train/611a76dca411bf089579c5df0e7fa96
Processing 7/100
Folder does not exist, creating the folder dataset/SHARP2020/shapenet100/train/2f

### Export to obj groundtruth

In [16]:
nrOfFiles = 100
trainCases = int(np.round(nrOfFiles * trainTestRatio))

for i in range(0,nrOfFiles):
    print("Processing " + str(i) + "/" + str(nrOfFiles))
    objFile = files[i]
    newId = Path(objFile).parent.parent.name
    # Make the new destination folder for the files
    if(i < trainCases):
        newIdPath = datasetOutFolder / "train" / newId
    else:
        newIdPath = datasetOutFolder / "test" / newId
    data = np.load(newIdPath / (newId + "_normalized_color_samples" + str(num_points) + "_bbox" + bbox_str + ".npz"))
    cloud = trimesh.PointCloud(data["points"])
    # Save the full point cloud as a colored point cloud
    cloud.export(newIdPath / (newId + "_normalized.obj"))

Processing 0/100
Processing 1/100
Processing 2/100
Processing 3/100
Processing 4/100
Processing 5/100
Processing 6/100
Processing 7/100
Processing 8/100
Processing 9/100
Processing 10/100
Processing 11/100
Processing 12/100
Processing 13/100
Processing 14/100
Processing 15/100
Processing 16/100
Processing 17/100
Processing 18/100
Processing 19/100
Processing 20/100
Processing 21/100
Processing 22/100
Processing 23/100
Processing 24/100
Processing 25/100
Processing 26/100
Processing 27/100
Processing 28/100
Processing 29/100
Processing 30/100
Processing 31/100
Processing 32/100
Processing 33/100
Processing 34/100
Processing 35/100
Processing 36/100
Processing 37/100
Processing 38/100
Processing 39/100
Processing 40/100
Processing 41/100
Processing 42/100
Processing 43/100
Processing 44/100
Processing 45/100
Processing 46/100
Processing 47/100
Processing 48/100
Processing 49/100
Processing 50/100
Processing 51/100
Processing 52/100
Processing 53/100
Processing 54/100
Processing 55/100
Pr

## Visualise the created files

The new files are compressed numpy arrays. We can convert them back to trimesh objects to visualise them

In [8]:
from pathlib import Path
import numpy as np

filePath = Path("dataset/SHARP2020/shapenet/test/1c17cc67b8c747c3febad4f49b26ec52/1c17cc67b8c747c3febad4f49b26ec52_normalized-partial-0_voxelized_colored_point_cloud_res128_points100000_bbox-1,1,-1,1,-1,1.npz")

data = np.load(filePath)
print(list(data.keys()))
cloud = trimesh.PointCloud(data["colored_point_cloud"])
cloud.export(str(filePath)[:-3] + "obj")
print(list(data.keys()))

['R', 'G', 'B', 'S', 'colored_point_cloud', 'bbox', 'res']
['R', 'G', 'B', 'S', 'colored_point_cloud', 'bbox', 'res']


In [20]:

cloud = trimesh.Trimesh(data["colored_point_cloud"]) #trimesh.PointCloud(data["colored_point_cloud"])
print(cloud)
#trimesh.visual.color.ColorVisuals(vertex_colors=np.vstack((data['R'],data['G'],data['B'])).T)
cloud.visual.vertex_colors = colors
#cloud.colors = np.vstack((data['R'],data['G'],data['B'])).T
print(cloud.visual.vertex_colors)
print (cloud.visual.kind)

<trimesh.Trimesh(vertices.shape=(92000, 3), faces.shape=(0, 3))>
[[102 102 102 255]
 [102 102 102 255]
 [102 102 102 255]
 ...
 [102 102 102 255]
 [102 102 102 255]
 [102 102 102 255]]
None


In [None]:
cloud.show()

In [21]:
cloud.export(str(filePath)[:-3] + "obj", include_color=True)


'# https://github.com/mikedh/trimesh\nv -0.21486716 -0.28419898 -0.09518495\nv 0.38782956 -0.22199036 -0.13290124\nv -0.27956463 -0.28445346 -0.13885872\nv -0.22169085 -0.27978161 -0.36132871\nv -0.38147035 -0.53154038 0.36781053\nv -0.38335064 -0.14084562 -0.21659811\nv 0.39750773 -0.18891424 0.06319945\nv 0.36493348 0.12687172 0.40969452\nv 0.29238335 -0.32640094 0.30507197\nv -0.35317036 0.00612658 0.15240543\nv -0.35354824 -0.09352318 -0.09297506\nv 0.25355373 -0.26049430 -0.26123364\nv -0.17269640 -0.29855005 -0.35082976\nv 0.44343257 0.04731032 0.28316245\nv -0.27026479 -0.27459649 -0.35960537\nv 0.31611707 -0.32487125 0.26026154\nv 0.00803251 -0.28404899 -0.36089661\nv 0.44033088 0.22676009 0.42268099\nv -0.26073687 -0.13584191 -0.32108254\nv 0.40840205 -0.55307843 0.37437089\nv -0.35700206 -0.58449394 0.40127649\nv -0.40263766 -0.07756692 0.09777141\nv -0.24697181 -0.30644568 -0.05962725\nv 0.30045710 -0.13712949 0.01103789\nv -0.20221489 -0.13589668 -0.30850418\nv 0.28708315 -

### The minimal loss

In [1]:
import numpy as np
path = "experiments/shapenet/val_min.npy"

file = np.load(path)

print(file)

[1019.           65.81086356]


## Evaluation

We evaluate the resulting objs by comparing them against the ground truth

### Single object

In [27]:
import trimesh
from pathlib import Path
import numpy as np

# define the ground truth path
testGtFolderPath = Path("dataset/SHARP2020/shapenet/test/")
testResultsFolderPath = Path("experiments/shapenet_channel/evaluation_283/")

objectId = "1c17cc67b8c747c3febad4f49b26ec52"

# Load the ground truth
gtFilePath = testGtFolderPath / objectId / (objectId + "_normalized_color_samples100000_bbox-1,1,-1,1,-1,1.npz")
gtMesh = np.load(gtFilePath)

# Load the completed mesh
meshNr = 0
partialMesh = trimesh.load(testResultsFolderPath / objectId / (objectId + "_normalized-partial-" + str(meshNr) + "_color_reconstruction.obj"))

# Process the vertex colors to math the ground truth
partialMesh.visual.vertex_colors
partialColors = partialMesh.visual.vertex_colors[:,:3]
partialColors[partialColors > 128] = 255
partialColors[partialColors <= 128] = 0

# Compare the color values
partialColors = np.asarray(partialColors)
gtColors = np.asarray(gtMesh["colors"]).astype(int)
totalMatches = np.sum(np.all(np.equal(partialColors, gtColors), axis=1))

percentage = totalMatches/gtMesh["points"].shape[0]

print(percentage)

### Whole folder

In [50]:
import trimesh
from pathlib import Path
import numpy as np
import os

# define the ground truth path
testGtFolderPath = Path("dataset/SHARP2020/shapenet/test/")
testResultsFolderPath = Path("experiments/shapenet_channel/evaluation_283/")
nrOfIncompleteMeshes = 4
correctnessDict = {"id" : "Score"}

for root, dirs, files in os.walk(testGtFolderPath, topdown=False):
    for name in dirs:
        objectId = name

        # Load the ground truth
        gtFilePath = testGtFolderPath / objectId / (objectId + "_normalized_color_samples100000_bbox-1,1,-1,1,-1,1.npz")
        gtMesh = np.load(gtFilePath)
        partialArray = np.zeros(nrOfIncompleteMeshes)
        for meshNr in range(nrOfIncompleteMeshes):
        # Load the completed mesh
            partialMesh = trimesh.load(testResultsFolderPath / objectId / (objectId + "_normalized-partial-" + str(meshNr) + "_color_reconstruction.obj"))
            # Process the vertex colors to math the ground truth
            partialMesh.visual.vertex_colors
            partialColors = partialMesh.visual.vertex_colors[:,:3]
            partialColors[partialColors > 128] = 255
            partialColors[partialColors <= 128] = 0

            # Compare the color values
            partialColors = np.asarray(partialColors)
            gtColors = np.asarray(gtMesh["colors"]).astype(int)
            totalMatches = np.sum(np.all(np.equal(partialColors, gtColors), axis=1))

            percentage = totalMatches/gtMesh["points"].shape[0]
            partialArray[meshNr] = percentage
            print(percentage)
        
        correctnessDict[objectId] = partialArray

0.63666
0.6406
0.64015
0.64004
0.65848
0.6598
0.66472
0.65444
1.0
1.0
1.0
1.0
0.58215
0.57237
0.57307
0.57089
0.71941
0.71661
0.71582
0.7213
0.9343
0.93743
0.93954
0.94056
0.79608
0.79524
0.79791
0.80203
0.47957
0.48128
0.47735
0.47526
0.47426
0.47644
0.48184
0.48076
0.93572
0.94059
0.94172
0.93103
0.8954
0.89856
0.89528
0.88903
0.99679
0.98622
0.99089
0.99761
0.91854
0.92031
0.91823
0.91799
1.0
1.0
1.0
1.0
0.60153
0.62226
0.60332
0.61614
0.62481
0.62976
0.62878
0.62501
0.95236
0.95191
0.94993
0.9509
0.71732
0.71022
0.72415
0.71589
0.36567
0.35818
0.36721
0.36384
0.48052
0.48641
0.48405
0.48419
0.95232
0.9542
0.95838
0.9584
0.49375
0.48969
0.49374
0.49135
0.99579
0.9966
0.99173
0.9958
0.48392
0.482
0.48088
0.47405
1.0
1.0
1.0
1.0
1.0
1.0
1.0
1.0
0.87788
0.87472
0.86724
0.87467
0.90453
0.90039
0.90132
0.90493
0.99692
0.98787
0.99674
0.99612
0.67356
0.66897
0.65898
0.67195
0.59735
0.59729
0.59612
0.5912
0.47237
0.47592
0.47404
0.47317
1.0
1.0
1.0
1.0
1.0
1.0
1.0
1.0
0.73356
0.70387
0.719

In [58]:
averageArray = {"id": "value"}

for id in correctnessDict:
    if(id != "id"):
        averageArray[id] = np.average(np.asarray(correctnessDict[id]))

In [59]:
import csv

with open('if-net-texture-evaluation-average.csv', 'w') as f:  # You will need 'wb' mode in Python 2.x
    w = csv.DictWriter(f, averageArray.keys())
    w.writeheader()
    w.writerow(averageArray)