In [1]:
import os
import re
import open3d as o3d
import numpy as np
import h5py
import random

Jupyter environment detected. Enabling Open3D WebVisualizer.
[Open3D INFO] WebRTC GUI backend enabled.
[Open3D INFO] WebRTCWindowSystem: HTTP handshake server disabled.


In [2]:
#Print grid in readable format
def pretty_print(grid):
    for row in grid:
        print(row)

#from path of description file get the coupling grid, model data and 
#remove models with error
def process_file(path):
    with open(path,'r') as f:
        #don't consider first two lines
        f.readline()           #segmenti x x x
        f.readline()           #rotazioni x x x

        line=f.readline()      #numero pezzi x  

        #get number of fragments
        number=int(re.findall("\d+",line)[0])

        #coupling matrix
        grid=[]
        for i in range(number):
            line=f.readline()
            ret=re.findall('-1|0|1',line)
            grid.append(list(map(int,ret)))

        #model data
        model_names=[]
        non_valid_indeces=[]
        for m in range(number):
            f.readline()        #blank line
            name=f.readline()   #model name
            mesh=f.readline()   #mesh n
            f.readline()        #external n
            f.readline()        #internal n

            #if mesh=0 I don't consider the element
            mesh_n=int(mesh.rstrip().split(" ")[1]) 
            if mesh_n!=0: 
                #try create file name
                file_name= name.rstrip().replace(".","_")
                model_names.append(file_name)          
            else:
                #saving indices to remove later
                non_valid_indeces.append(m)

        #removing elements from grid
        #sorted in reverse to avoid wrong index
        for index in sorted(non_valid_indeces, reverse=True):
            #remove row
            del grid[index]
            #remove columns
            for row in grid:
                del row[index]
        
        return grid,model_names


In [3]:
#get a set of fragments (i.e. a subfolder)
def get_set(folder,verbose=True):
    folder_path=os.path.join(main_path,folder)
    models_file=[]

    #files are either text files or models
    for file in os.listdir(folder_path):
        if '.txt' in file:
            description_file=file
        elif '.stl' in file:
            #not used, they are not in the same order of the file
            models_file.append(file)

    #parsing description file
    full_description_file=os.path.join(folder_path,description_file)
    grid,model_names=process_file(full_description_file)

    #get path of models of current set
    model_prefix=folder.replace("generatedTest_","")
    complete_models_path=[]
    for i in range(len(model_names)):
        #e.g. 2021_11_29_10_00_40_Cube_001.stl
        correct_model_name=f"{model_prefix}_{model_names[i]}.stl"
        #saving only names, could be useful but now not used
        model_names[i]=correct_model_name

        complete_models_path.append(os.path.join(folder_path,correct_model_name))

    #create set and put into list of sets
    set={"models":complete_models_path,"grid":grid}
    
    if verbose:
        print("A set: \n")
        print("Models: ",set["models"])
        print("Grid: ")
        pretty_print(set["grid"])

    return set

In [4]:
#Create all the possible pairs of fragments
#duplicates are not considered, e.g.: (i,j) - (j,i)
def create_pairs(num):
    lista = []
    for i in range(num):
        for j in range(i+1, num):
            lista.append((i, j))

    return lista

print(create_pairs(5))


[(0, 1), (0, 2), (0, 3), (0, 4), (1, 2), (1, 3), (1, 4), (2, 3), (2, 4), (3, 4)]


In [5]:
def create_dataset(main_path,num_points,max_elements=-1):
    
    num_elements=0
    num_pairs=0
    num_total_pairs=0
    folder_index = 0

    all_data=[]
    all_labels=[]
    sets=[]
    print("Creating dataset...")
    print(f"Each point cloud is sampled with {num_points} points")
    num_adjacent=0

    

    #with h5py.File('random.hdf5', 'w') as f:
        #f.create_dataset("data", (num_pairs,2,num_points,3))


    for folder in os.listdir(main_path):
        if max_elements>0 and num_elements>max_elements:
            print("maximum number of objects reached, finishing dataset")
            break

        #check only folders, not files
        if '.' not in folder:
            print(folder_index, " ", folder)
            folder_index+=1
            fragment_set=get_set(folder,verbose=False)  #set of fragments of one model
            num_elements+=len(fragment_set["models"])
            sets.append(fragment_set)
            
            meshes=[]
            for path in fragment_set["models"]:
                meshes.append(o3d.io.read_triangle_mesh(path))

            #get indeces of pairs
            pairs=create_pairs(len(fragment_set["models"]))


            #shuffle dati 
            random.shuffle(pairs)

            #get total number of adjacent pairs of fragmetns
            #i.e. number of 1 in the grid
            grid = np.array(fragment_set["grid"])
            unique, counts = np.unique(grid, return_counts=True)
            dic = dict(zip(unique, counts))
            #num_zeros = dic[0]
            num_ones = dic[1]

            #considerare solo a*N coppie non adiacenti
            max_not_adjacent_pairs=1*num_ones
            num_not_adjacent=0

            for pair in pairs:
                num_total_pairs+=1
                idx1,idx2=pair
                label=fragment_set["grid"][idx1][idx2]

                if label==1:
                    num_adjacent+=1

                if label==0:
                    num_not_adjacent+=1
                
                #if adjacent pair or limit not exceeded
                if label==1 or (label==0 and num_not_adjacent<max_not_adjacent_pairs):
                    num_pairs+=1

                    #get path of the two fragments
                    #path1=fragment_set["models"][idx1]
                    #path2=fragment_set["models"][idx2]

                    #generate pointclouds of fragmetns
                    #mesh1 = o3d.io.read_triangle_mesh(path1)
                    #mesh2 = o3d.io.read_triangle_mesh(path2)

                    mesh1=meshes[idx1]
                    mesh2=meshes[idx2]


                    pointcloud1 = mesh1.sample_points_poisson_disk(num_points)
                    pointcloud2 = mesh2.sample_points_poisson_disk(num_points)

                    #generate pair and add to dataset
                    pair=[ np.asarray(pointcloud1.points) ,  np.asarray(pointcloud2.points) ]

                    all_data.append(pair)
                    all_labels.append(label)
            
            print(f"currently dataset contains {num_pairs} pairs")


            #with h5py.File('data.hdf5', 'w') as f: 

                #f.close()

    print(f"Dataset contains {num_elements} fragments --> {num_total_pairs} total pairs ")
    print("considering only: ",num_pairs," pairs, of which ", num_adjacent, "are adjacent")
    return np.array(all_data),np.array(all_labels)



#used in class ModelNet40
#for now we don't use it
def translate_pointcloud(pointcloud):
    xyz1 = np.random.uniform(low=2./3., high=3./2., size=[3])
    xyz2 = np.random.uniform(low=-0.2, high=0.2, size=[3])
       
    translated_pointcloud = np.add(np.multiply(pointcloud, xyz1), xyz2).astype('float32')
    return translated_pointcloud         

def load_dataset(path):
    #carica dal file il dataset
    return


In [6]:
#here i save all the sets of fragments
sets=[]

#root folder
main_path="produzione_29112021"

num_points=100

In [7]:
create_dataset(main_path,num_points)

Creating dataset...
Each point cloud is sampled with 100 points
0   generatedTest_2021_11_29_10_00_40
currently dataset contains 28 pairs
1   generatedTest_2021_11_29_10_01_59
currently dataset contains 56 pairs
2   generatedTest_2021_11_29_10_02_12
currently dataset contains 84 pairs
3   generatedTest_2021_11_29_10_02_50
currently dataset contains 374 pairs
4   generatedTest_2021_11_29_10_02_58
currently dataset contains 631 pairs
5   generatedTest_2021_11_29_10_03_27
currently dataset contains 912 pairs
6   generatedTest_2021_11_29_10_03_51
currently dataset contains 1739 pairs
7   generatedTest_2021_11_29_10_04_09
currently dataset contains 2464 pairs
8   generatedTest_2021_11_29_10_04_25
currently dataset contains 3186 pairs
9   generatedTest_2021_11_29_10_05_19


KeyboardInterrupt: 