# Testing codes for loading and equalizing the YCB data set 

Hyper-paramters and packages 

In [32]:
# Hyper-parameters 

nb_classes = 16
rand_seed = 42
one_hot = True
test_train_ratio = 0.2
verbose = 2
seq_len = 5
nb_samples = 100000
joint_dist_lim = 0.005
nb_tot_samp_class = 10000

In [3]:
# Data set location 
data_set_loc_str = r"C:\Users\phili\Documents\GitHub\DexterousManipulation\generations\DATA_SET_YCB_filtered"

In [4]:
import numpy as np
import random
import os 
import tensorflow as tf 
import time 
from sklearn.model_selection import train_test_split

Functions from previous codes 

In [5]:
def load_non_zero_grasps(data_set_loc_str, nb_samples, nb_classes, lim):

    x_min = -0.15
    x_max = 0.15
    y_min = -0.15
    y_max = 0.15 
    z_min = 0.13
    z_max = 0.35
    gripper_min = 0 
    gripper_max = 0.041664
     
    non_zero_metrics_list = [] 
    non_zero_hands_list = [] 
    for index in range(nb_classes):
        non_zero_metrics_list.append([])
        non_zero_hands_list.append([])
        
    for filename in os.listdir(data_set_loc_str): 
        file_data = np.load(data_set_loc_str + "/" + filename)
        
        file_metrics = file_data["metric"].astype(np.float64)
        file_hand_info = file_data["hand"].astype(np.float64)
        file_obj_classes = file_data["obj"].astype(np.float64)
        
        for metric_index in range(len(file_metrics)):
            if file_metrics[metric_index, 0] == 1.0 and file_metrics[metric_index,1] > lim:
                curr_class = int(file_obj_classes[metric_index,0])
                
                non_zero_metrics_list[curr_class].append((file_metrics[metric_index,1] - gripper_min) / (gripper_max - gripper_min))
                tmp_hand_info = file_hand_info[metric_index]
                tmp_hand_info[0] = (tmp_hand_info[0] - x_min) / (x_max - x_min)
                tmp_hand_info[1] = (tmp_hand_info[1] - y_min) / (y_max - y_min)
                tmp_hand_info[2] = (tmp_hand_info[2] - z_min) / (z_max - z_min)
                
                non_zero_hands_list[curr_class].append(tmp_hand_info)
                
    return non_zero_metrics_list, non_zero_hands_list

In [6]:
def load_non_zero_seq(data_set_loc_str, nb_samples, test_train_ratio, rand_seed, verbose, seq_len, nb_classes, lim):
    if verbose >= 1:
        print("Loading Data.")
    start_time = time.time()
    non_zero_metrics_list, non_zero_hands_list = load_non_zero_grasps(data_set_loc_str, nb_samples, nb_classes, lim)
    end_time = time.time()
    if verbose >= 1:
        print("Time taken to load data: ", end_time - start_time)
        
    inputs = [] 
    outputs = []
    
    # Parsing all buckets 
    for bucket_index in range(nb_classes):
        bucket_offset = 0 
        while (bucket_offset + seq_len - 1  < len(non_zero_metrics_list[bucket_index])):
            # Build the sequence 
            tmp_buffer = [] 
            for seq_index in range(seq_len): 
                # Fusing the data 
                tmp_list = [] 
                tmp_list.append(non_zero_metrics_list[bucket_index][bucket_offset + seq_index])
                for hand_index in range(13): 
                    tmp_list.append(non_zero_hands_list[bucket_index][bucket_offset + seq_index][hand_index])
                tmp_buffer.append(tmp_list.copy())
            
            inputs.append(tmp_buffer.copy())
            outputs.append(tf.one_hot(bucket_index, nb_classes, dtype=np.float64).numpy().tolist())
            bucket_offset += seq_len 
            
        
    # Shuffling the data 
    X_train, X_test, y_train, y_test = train_test_split(inputs, outputs, test_size=test_train_ratio, random_state=rand_seed)
    
    if verbose >= 1:
        print("Size of Train set: ", len(X_train))
        print("Size of Test set: ", len(X_test))
        
    return X_train, X_test, y_train, y_test

New functions 

In [7]:
def load_non_zero_fused_grasps(data_set_loc_str, nb_classes, lim):

    x_min = -0.15
    x_max = 0.15
    y_min = -0.15
    y_max = 0.15 
    z_min = 0.13
    z_max = 0.35
    gripper_min = 0 
    gripper_max = 0.041664
     
    non_zero_grasps_list = []
    for index in range(nb_classes):
        non_zero_grasps_list.append([])
        
    for filename in os.listdir(data_set_loc_str): 
        file_data = np.load(data_set_loc_str + "/" + filename)
        
        file_metrics = file_data["metric"].astype(np.float64)
        file_hand_info = file_data["hand"].astype(np.float64)
        file_obj_classes = file_data["obj"].astype(np.float64)
        
        for metric_index in range(len(file_metrics)):
            if file_metrics[metric_index, 0] == 1.0 and file_metrics[metric_index,1] > lim:
                curr_class = int(file_obj_classes[metric_index,0])
                tmp_list = [] 
                
                tmp_list.append((file_metrics[metric_index,1] - gripper_min) / (gripper_max - gripper_min))
                tmp_hand_info = file_hand_info[metric_index]
                tmp_hand_info[0] = (tmp_hand_info[0] - x_min) / (x_max - x_min)
                tmp_hand_info[1] = (tmp_hand_info[1] - y_min) / (y_max - y_min)
                tmp_hand_info[2] = (tmp_hand_info[2] - z_min) / (z_max - z_min)
                
                for tmp_index in range(13): 
                    tmp_list.append(tmp_hand_info[tmp_index])
                
                non_zero_grasps_list[curr_class].append(tmp_list.copy())
                
    return non_zero_grasps_list

In [8]:
def load_zero_fused_grasps_buckets(data_set_loc_str, nb_classes, lim, nb_tot_samp_class): 
    x_min = -0.15
    x_max = 0.15
    y_min = -0.15
    y_max = 0.15 
    z_min = 0.13
    z_max = 0.35
    gripper_min = 0 
    gripper_max = 0.041664
     
    zero_grasps_list = []
    for index in range(nb_classes):
        zero_grasps_list.append([])
        
    for filename in os.listdir(data_set_loc_str): 
        file_data = np.load(data_set_loc_str + "/" + filename)
        
        file_metrics = file_data["metric"].astype(np.float64)
        file_hand_info = file_data["hand"].astype(np.float64)
        file_obj_classes = file_data["obj"].astype(np.float64)
        
        counters = [] 
        for tmp_index in range(nb_classes): 
            counters.append(len(zero_grasps_list[tmp_index]))
            
        enough_samples = True 
        for tmp_index in range(nb_classes): 
            if counters[tmp_index] < nb_tot_samp_class:
                # not enough samples 
                enough_samples = False 
        if enough_samples: 
            break 
        
        for metric_index in range(len(file_metrics)):
            if file_metrics[metric_index, 0] == 1.0 and file_metrics[metric_index,1] < lim:
                curr_class = int(file_obj_classes[metric_index,0])
                tmp_list = [] 
                
                tmp_list.append((file_metrics[metric_index,1] - gripper_min) / (gripper_max - gripper_min))
                tmp_hand_info = file_hand_info[metric_index]
                tmp_hand_info[0] = (tmp_hand_info[0] - x_min) / (x_max - x_min)
                tmp_hand_info[1] = (tmp_hand_info[1] - y_min) / (y_max - y_min)
                tmp_hand_info[2] = (tmp_hand_info[2] - z_min) / (z_max - z_min)
                
                for tmp_index in range(13): 
                    tmp_list.append(tmp_hand_info[tmp_index])
                
                zero_grasps_list[curr_class].append(tmp_list.copy())
                
    return zero_grasps_list

In [40]:
def equalize_class_buckets(non_zero_grasps_list, zero_grasps_list, nb_tot_samp_class, nb_classes):
    equalized_grasps = non_zero_grasps_list.copy()
    for class_index in range(nb_classes): 
        curr_len = len(non_zero_grasps_list[class_index])
        samp_to_complete = nb_tot_samp_class - curr_len
        # Clip, if necessary 
        if samp_to_complete < 0: 
            equalized_grasps[class_index] = equalized_grasps[class_index][:nb_tot_samp_class]
        else: 
            for samp_index in range(samp_to_complete): 
                equalized_grasps[class_index].append(zero_grasps_list[class_index][samp_index])
    # Shuffle the data per class 
    for class_index in range(nb_classes): 
        random.shuffle(equalized_grasps[class_index])
    return equalized_grasps

In [10]:
def load_total_YCB_eq_data_seq(data_set_loc_str, nb_samples, nb_classes, joint_dist_lim, seq_len, nb_tot_samp_class, test_train_ratio, rand_seed, verbose): 
    if verbose >= 1:
        print("Loading Data.")
    start_time = time.time()
    
    non_zero_grasps_list = load_non_zero_fused_grasps(data_set_loc_str, nb_classes, joint_dist_lim)
    zero_grasps_list = load_zero_fused_grasps_buckets(data_set_loc_str, nb_classes, joint_dist_lim, nb_tot_samp_class)
    equalized_grasps = equalize_class_buckets(non_zero_grasps_list, zero_grasps_list, nb_tot_samp_class, nb_classes)
    
    end_time = time.time()
    if verbose >= 1:
        print("Time taken to load data: ", end_time - start_time)
        
    inputs = [] 
    outputs = []
    
    # Parsing all buckets 
    for bucket_index in range(nb_classes):
        bucket_offset = 0 
        while (bucket_offset + seq_len - 1  < len(non_zero_metrics_list[bucket_index])):
            # Build the sequence 
            tmp_buffer = [] 
            for seq_index in range(seq_len): 
                # Fusing the data 
                tmp_buffer.append(equalized_grasps[bucket_index][bucket_offset + seq_index].copy())
            
            inputs.append(tmp_buffer.copy())
            outputs.append(tf.one_hot(bucket_index, nb_classes, dtype=np.float64).numpy().tolist())
            bucket_offset += seq_len 
            
        
    # Shuffling the data 
    X_train, X_test, y_train, y_test = train_test_split(inputs, outputs, test_size=test_train_ratio, random_state=rand_seed)
    
    if verbose >= 1:
        print("Size of Train set: ", len(X_train))
        print("Size of Test set: ", len(X_test))
        
    return X_train, X_test, y_train, y_test
    

Test codes for equalizing 

In [52]:
non_zero_metrics_list, non_zero_hands_list = load_non_zero_grasps(data_set_loc_str, nb_samples, nb_classes, joint_dist_lim)

In [33]:
non_zero_grasps_list = load_non_zero_fused_grasps(data_set_loc_str, nb_classes, joint_dist_lim)

In [34]:
zero_grasps_list = load_zero_fused_grasps_buckets(data_set_loc_str, nb_classes, joint_dist_lim, nb_tot_samp_class)

In [41]:
equalized_grasps = equalize_class_buckets(non_zero_grasps_list, zero_grasps_list, nb_tot_samp_class, nb_classes)

In [77]:
X_train, X_test, y_train, y_test = load_total_YCB_eq_data_seq(data_set_loc_str, nb_samples, nb_classes, joint_dist_lim, seq_len, nb_tot_samp_class, test_train_ratio, rand_seed, verbose)

Loading Data.
Time taken to load data:  55.12330365180969
Size of Train set:  7800
Size of Test set:  1951


In [36]:
print(equalized_grasps[1][3])

[0.34534918164373724, 0.4333641429742177, 0.39342081050078076, 0.5504633675922047, 0.0, -0.7448044419288635, 0.6672828793525696, 0.0, 0.6672828793525696, 0.7448044419288635, -1.0, 0.0, -4.371139183945161e-08, 1.0]


In [42]:
print(len(equalized_grasps[0]))

10000


In [43]:
counters = [] 
for class_index in range(nb_classes): 
    counters.append(len(equalized_grasps[class_index]))

In [44]:
print(counters)
print(max(counters))

[10000, 10000, 10000, 10000, 10000, 10000, 10000, 10000, 10000, 10000, 10000, 10000, 10000, 10000, 10000, 10000]
10000


In [18]:
print(zero_grasps_list[1][0])

[0.0017974316897477844, 0.16824208696683246, 0.6753605902194977, 0.07357561317357149, 0.0, 0.3725760877132416, -0.9280017018318176, 0.0, -0.9280017018318176, -0.3725760877132416, -1.0, 0.0, -4.371138828673793e-08, 1.0]


In [19]:
print(len(zero_grasps_list[0]))

20005


In [20]:
counters = [] 
for class_index in range(nb_classes): 
    counters.append(len(zero_grasps_list[class_index]))

In [21]:
print(counters)
print(max(counters))

[20005, 28294, 29189, 31977, 33734, 34323, 33175, 33848, 33253, 34078, 26529, 30754, 31736, 32530, 32390, 33226]
34323


In [22]:
print(len(non_zero_grasps_list[0]))

20000


In [27]:
counters = [] 
for class_index in range(nb_classes): 
    counters.append(len(non_zero_grasps_list[class_index]))

In [28]:
print(counters)
print(max(counters))

[2223, 3496, 4480, 2495, 3518, 2802, 1863, 2173, 2197, 2423, 1586, 15780, 10946, 4795, 4800, 8012]
15780


In [30]:
tmp_test_list = non_zero_grasps_list[11]
print(len(tmp_test_list))

15780


In [31]:
tmp_test_list = tmp_test_list[:10000]
print(len(tmp_test_list))

10000
