In [1]:
import os
import re
import open3d as o3d
import numpy as np
import h5py
import random
import time

Jupyter environment detected. Enabling Open3D WebVisualizer.
[Open3D INFO] WebRTC GUI backend enabled.
[Open3D INFO] WebRTCWindowSystem: HTTP handshake server disabled.


In [2]:
#Print grid in readable format
def pretty_print(grid):
    for row in grid:
        print(row)

#from path of description file get the coupling grid, model data and 
#remove models with error
def process_file(path):
    with open(path,'r') as f:
        #don't consider first two lines
        f.readline()           #segmenti x x x
        f.readline()           #rotazioni x x x

        line=f.readline()      #numero pezzi x  

        #get number of fragments
        number=int(re.findall("\d+",line)[0])

        #coupling matrix
        grid=[]
        for i in range(number):
            line=f.readline()
            ret=re.findall('-1|0|1',line)
            grid.append(list(map(int,ret)))

        #model data
        model_names=[]
        non_valid_indeces=[]
        for m in range(number):
            f.readline()        #blank line
            name=f.readline()   #model name
            mesh=f.readline()   #mesh n
            f.readline()        #external n
            f.readline()        #internal n

            #if mesh=0 I don't consider the element
            mesh_n=int(mesh.rstrip().split(" ")[1]) 
            if mesh_n!=0: 
                #try create file name
                file_name= name.rstrip().replace(".","_")
                model_names.append(file_name)          
            else:
                #saving indices to remove later
                non_valid_indeces.append(m)

        #removing elements from grid
        #sorted in reverse to avoid wrong index
        for index in sorted(non_valid_indeces, reverse=True):
            #remove row
            del grid[index]
            #remove columns
            for row in grid:
                del row[index]
        
        return grid,model_names


In [3]:
#get a set of fragments (i.e. a subfolder)
def get_set(folder,verbose=True):
    folder_path=os.path.join(main_path,folder)
    models_file=[]

    #files are either text files or models
    for file in os.listdir(folder_path):
        if '.txt' in file:
            description_file=file
        elif '.stl' in file:
            #not used, they are not in the same order of the file
            models_file.append(file)

    #parsing description file
    full_description_file=os.path.join(folder_path,description_file)
    grid,model_names=process_file(full_description_file)

    #get path of models of current set
    model_prefix=folder.replace("generatedTest_","")
    complete_models_path=[]
    for i in range(len(model_names)):
        #e.g. 2021_11_29_10_00_40_Cube_001.stl
        correct_model_name=f"{model_prefix}_{model_names[i]}.stl"
        #saving only names, could be useful but now not used
        model_names[i]=correct_model_name

        complete_models_path.append(os.path.join(folder_path,correct_model_name))

    #create set and put into list of sets
    set={"models":complete_models_path,"grid":grid}
    
    if verbose:
        print("A set: \n")
        print("Models: ",set["models"])
        print("Grid: ")
        pretty_print(set["grid"])

    return set

In [4]:
#Create all the possible pairs of fragments
#duplicates are not considered, e.g.: (i,j) - (j,i)
def create_pairs(num):
    lista = []
    for i in range(num):
        for j in range(i+1, num):
            lista.append((i, j))

    return lista

print(create_pairs(5))


[(0, 1), (0, 2), (0, 3), (0, 4), (1, 2), (1, 3), (1, 4), (2, 3), (2, 4), (3, 4)]


In [98]:
def count_dataset_pairs(main_path,max_elements=-1,alpha=1):
    num_total_pairs=0
    
    #get total number of pairs we consider:
    for folder in os.listdir(main_path):
        #check only folders, not files
        if '.' not in folder:
            fragment_set=get_set(folder,verbose=False)  #set of fragments of one model

            #get total number of adjacent pairs of fragments
            #i.e. number of 1 in the grid
            grid = np.array(fragment_set["grid"])
            unique, counts = np.unique(grid, return_counts=True)
            dic = dict(zip(unique, counts))
            #divide by two because we consider half of the pairs, not both of (i,j),(j,i)
            num_zeros = int(dic[0]/2)
            num_ones = int(dic[1]/2)

            #considerare solo alpha*N coppie non adiacenti
            max_not_adjacent_pairs=alpha*num_ones
            estimated_num_pairs=int(min(max_not_adjacent_pairs,num_zeros)+num_ones)
            num_total_pairs+=estimated_num_pairs

    if max_elements>0:
        num_total_pairs=min(max_elements,num_total_pairs)
    print("dataset will contain:", num_total_pairs," total pairs")
    return num_total_pairs
   


In [124]:
def create_dataset(main_path,num_points,max_elements=-1,alpha=1):
    
    num_elements=0
    num_pairs=0
    num_total_pairs=0
    folder_index = 0
    tot_num_adjacent=0

    sets=[]

    

    print("Creating dataset...")
    dataset_total_pairs=count_dataset_pairs(main_path,max_elements,alpha)

    dataset_file=None
    dataset_file_name=f"dataset_{dataset_total_pairs}pairs_{num_points}points_{int(time.time())}.hdf5"

    try: 
        dataset_file.close()
        os.remove(dataset_file_name)
    except:
        print("file dataset not found, creating")
    

    dataset_file=h5py.File(dataset_file_name, 'w')
    dataset_data=dataset_file.create_dataset("data", (dataset_total_pairs,2,num_points,3))
    dataset_label=dataset_file.create_dataset("labels", (dataset_total_pairs,),dtype='i')
    
    
    
    #with h5py.File(dataset_file_name, 'w') as f:
        #f.create_dataset("data", (dataset_total_pairs,2,num_points,3))
        #f.create_dataset("labels", (dataset_total_pairs,),dtype='i')
        #f.close()


    total_dataset_start_time=time.time()

    print(f"Each point cloud is sampled with {num_points} points\n\n")
    for folder in os.listdir(main_path):
        #check only folders, not files
        if '.' not in folder:

            set_data=[]
            set_labels=[]

            folder_index+=1
            set_start_time=time.time()
            print(f"\n\nStarting set {folder_index}: folder - {folder}")            

            fragment_set=get_set(folder,verbose=False)  #set of fragments of one model
            num_elements+=len(fragment_set["models"])
            sets.append(fragment_set)
            
            #save first pointcloud of meshes
            set_pointcloud=[]
            for path in fragment_set["models"]:
                mesh=o3d.io.read_triangle_mesh(path)
                set_pointcloud.append(mesh.sample_points_poisson_disk(num_points))

            #get indeces of pairs
            pairs=create_pairs(len(fragment_set["models"]))

            #shuffle to get random pairs not in order
            random.shuffle(pairs)

            #get total number of adjacent pairs of fragments
            #i.e. number of 1 in the grid
            grid = np.array(fragment_set["grid"])
            unique, counts = np.unique(grid, return_counts=True)
            dic = dict(zip(unique, counts))
            #divide by two because we consider half of the pairs, not both of (i,j),(j,i)
            num_zeros = int(dic[0]/2)
            num_ones = int(dic[1]/2)

            #considerare solo a*N coppie non adiacenti
            max_not_adjacent_pairs=alpha*num_ones

            estimated_num_pairs=int(min(max_not_adjacent_pairs,num_zeros)+num_ones)

            print("Set stats: ")
            print(f"  --number of fragments: {len(fragment_set['models'])}")
            print(f"  --total adjacent pairs: {num_ones}; total not adjacent pairs: {num_zeros}")
            print(f"  --dataset for set will contain: {estimated_num_pairs} pairs")
            print(f"     --> adj: {num_ones}    n-adj: {int(min(max_not_adjacent_pairs,num_zeros))}")

            #some stats
            set_num_not_adjacent=0
            set_num_adjacent=0
            current_set_pairs=0
            current_set_not_adj=0
            
            for pair in pairs:
                #if limit of maximum pairs is not exceeded
                if max_elements<0 or num_pairs<=max_elements:
                        
                    num_total_pairs+=1
                    idx1,idx2=pair
                    label=fragment_set["grid"][idx1][idx2]
                    
                    #total number of not adjacent
                    if label==0:
                        set_num_not_adjacent+=1
                    else:
                        set_num_adjacent+=1
                        tot_num_adjacent+=1
                    
                    #if adjacent pair or not adjacent pairs limit not exceeded
                    if label==1 or (label==0 and set_num_not_adjacent<=max_not_adjacent_pairs):

                        #add stats for number of fragments in dataset and current set
                        num_pairs+=1
                        current_set_pairs+=1

                        #add stats for adjacent fragment in dataset
                        if label==0: current_set_not_adj+=1

                        pointcloud1=set_pointcloud[idx1]
                        pointcloud2=set_pointcloud[idx2]

                        #generate pair and add to dataset
                        pointcloud_pair=[ np.asarray(pointcloud1.points) ,  np.asarray(pointcloud2.points) ]

                        set_data.append(pointcloud_pair)
                        set_labels.append(label)

                        
                        #print(f"Completed: {(current_set_pairs/estimated_num_pairs) *100}%",end='')
                        #print('\r', end='')

            set_elapsed_time=time.time()-set_start_time
            print("Set completed in: %.3f seconds" % (set_elapsed_time),end=": ")
            print(f"added {current_set_pairs} pairs --> adj: {set_num_adjacent}   n_adj: {current_set_not_adj}")
            print(f"currently dataset contains {num_pairs} pairs",end=". ")
            print()

            dataset_data[num_pairs-current_set_pairs:num_pairs]=set_data
            dataset_label[num_pairs-current_set_pairs:num_pairs]=set_labels

            #with h5py.File(dataset_file_name, 'w') as f: 
                #f["data"][num_pairs-current_set_pairs:num_pairs]=all_data
                #f["labels"][num_pairs-current_set_pairs:num_pairs]=all_labels
                #f.close()

    dataset_file.close()
    total_dataset_time=time.time()-total_dataset_start_time
    print(f"Dataset contains {num_elements} fragments --> {num_total_pairs} total pairs ")
    print("we consider only: ",num_pairs," pairs, of which ", tot_num_adjacent, "are adjacent")
    print("total time: %.3f seconds" % (total_dataset_time))
    #return np.array(all_data),np.array(all_labels)



#used in class ModelNet40
#for now we don't use it
def translate_pointcloud(pointcloud):
    xyz1 = np.random.uniform(low=2./3., high=3./2., size=[3])
    xyz2 = np.random.uniform(low=-0.2, high=0.2, size=[3])
       
    translated_pointcloud = np.add(np.multiply(pointcloud, xyz1), xyz2).astype('float32')
    return translated_pointcloud         

def load_dataset(path):
    #carica dal file il dataset
    return


In [125]:
#here i save all the sets of fragments
sets=[]

#root folder
main_path="produzione_29112021"

num_points=100

In [126]:
#some tests
arr = np.random.randn(100)

f=h5py.File("ciao.hdf5", 'w')
dset = f.create_dataset("default", (1000,))
dset[10:20] = arr[50:60]
f.close()

f=h5py.File("ciao.hdf5", 'w')
f["default"][10:20] = arr[50:60]
f.close()

f=h5py.File("ciao.hdf5", 'r')
print(f["default"][10:20])
f.close()


OSError: Unable to create file (unable to truncate a file which is already open)

In [127]:
create_dataset(main_path,num_points)


Creating dataset...
dataset will contain: 90810  total pairs
file dataset not found, creating
Each point cloud is sampled with 100 points




Starting set 1: folder - generatedTest_2021_11_29_10_00_40
Set stats: 
  --number of fragments: 8
  --total adjacent pairs: 19; total not adjacent pairs: 9
  --dataset for set will contain: 28 pairs
     --> adj: 19    n-adj: 9
Set completed in: 0.227 seconds: added 28 pairs --> adj: 19   n_adj: 9
currently dataset contains 28 pairs. 


Starting set 2: folder - generatedTest_2021_11_29_10_01_59
Set stats: 
  --number of fragments: 8
  --total adjacent pairs: 19; total not adjacent pairs: 9
  --dataset for set will contain: 28 pairs
     --> adj: 19    n-adj: 9
Set completed in: 0.241 seconds: added 28 pairs --> adj: 19   n_adj: 9
currently dataset contains 56 pairs. 


Starting set 3: folder - generatedTest_2021_11_29_10_02_12
Set stats: 
  --number of fragments: 8
  --total adjacent pairs: 18; total not adjacent pairs: 10
  --dataset for set wil