In [1]:
import json
import matplotlib.pyplot as plt
import os
import numpy as np

In [2]:
# files
base = "../dataset/0701/" 
vision = base + "vision/"
llm = base + "llm/"
labels = base + "labels/"
end_state = base +"end_sim_state/"

print( vision, llm, labels, end_state)

../dataset/0701/vision/ ../dataset/0701/llm/ ../dataset/0701/labels/ ../dataset/0701/end_sim_state/


In [3]:
num_files_cycle = len([f for f in os.listdir(end_state) if os.path.isfile(os.path.join(end_state, f))])
print(num_files_cycle)

193


In [4]:
num_files_vision= len([f for f in os.listdir(vision) if os.path.isfile(os.path.join(vision, f))])
num_files_labels= len([f for f in os.listdir(labels) if os.path.isfile(os.path.join(labels, f))])
print(f"Number of vision Files: {num_files_vision}, Number of label files: {num_files_labels}")
to_dataset_samples = []
discard_samples = []
distance_thresh = 0.05
held_zub = 0.1
held_zlb = 0.02

Number of vision Files: 408, Number of label files: 408


In [5]:
def euclidean_distance(pos1, pos2):
    return np.linalg.norm(np.array(pos1) - np.array(pos2))

In [6]:
previous_sim_id = 0
iter_counter = 0

clean_dataset_action_counts = {
    "pick": 0,
    "insert": 0,
    "lock": 0,
    "putdown": 0
}
for v in range(num_files_vision):
    with open(f"{vision}sample_{v}.json", "r") as vision_in:
        vision_file = json.load(vision_in)
    wires = vision_file["wires"]

    # Checks the z coordinate for on table wires
    for wire in wires:
        if wire["state"] == "on_table" and wire["position"][2] > 0.01 or wire["position"][2] < 0.0:
            discard_samples.append([v, current_sim_id])
        else:
            clean_dataset_action_counts["pick"] += 1
    terminals = vision_file["terminals"]
    current_sim_id = vision_file["metadata"]["simulation_id"] # Save Sim cycle ID from vision file
    with open(f"{labels}sample_{v}.json", "r") as label_in:
        label_file = json.load(label_in)
    target_wire_id = label_file["target_wire"]["ID"]
    target_terminal = label_file["target_terminal"]["name"]
    target_wire_coords = wires[target_wire_id]["position"]
    target_terminal_coords = terminals[target_terminal]["position"]

    # Checks the distance wire-terminal for "inserted" wire
    if label_file["correct_action"] == "lock":
        distance_wire_terminal = euclidean_distance(target_wire_coords, target_terminal_coords)
        if distance_wire_terminal <= distance_thresh and target_wire_coords[2] < held_zlb:
            clean_dataset_action_counts["lock"] += 1
            continue
        else:
            if [v, current_sim_id] not in discard_samples:
                discard_samples.append([v, current_sim_id])

    # Checks the z coordinate for "held" wire
    elif label_file["correct_action"] == "insert":
        if target_wire_coords[2] > held_zub or target_wire_coords[2] < held_zlb and [v, current_sim_id] not in discard_samples:
            print(f"\n\nCHeck this sample: z {v}")
            discard_samples.append([v, current_sim_id])
        else:
            clean_dataset_action_counts["insert"] += 1
            continue
print(discard_samples)
print(len(discard_samples))
print(f"Percentage of usable data samples: {((num_files_vision-len(discard_samples)) * 100) / num_files_vision:.2f}%")



CHeck this sample: z 15


CHeck this sample: z 62


CHeck this sample: z 114


CHeck this sample: z 173


CHeck this sample: z 243


CHeck this sample: z 321


CHeck this sample: z 353


CHeck this sample: z 378


CHeck this sample: z 388
[[9, 9], [10, 10], [11, 10], [12, 11], [13, 11], [14, 11], [15, 12], [16, 12], [24, 15], [24, 15], [24, 15], [25, 16], [25, 16], [32, 19], [33, 20], [40, 24], [41, 25], [42, 25], [62, 33], [63, 33], [66, 34], [68, 36], [69, 37], [88, 44], [88, 44], [88, 44], [89, 45], [89, 45], [89, 45], [90, 45], [90, 45], [90, 45], [91, 45], [95, 48], [96, 49], [97, 49], [104, 52], [104, 52], [105, 53], [105, 53], [106, 53], [106, 53], [114, 57], [138, 68], [139, 69], [148, 73], [158, 78], [158, 78], [159, 79], [159, 79], [173, 85], [174, 85], [177, 86], [178, 86], [179, 87], [180, 88], [181, 88], [181, 88], [182, 89], [182, 89], [183, 89], [183, 89], [188, 91], [189, 92], [203, 99], [203, 99], [204, 100], [205, 100], [209, 101], [210, 102], [211, 102], [224, 110]

In [7]:
sample_ids = []
cycle_ids = []
for sample in discard_samples:
    sample_ids.append(sample[0])
    cycle_ids.append(sample[1])
print(len(sample_ids), len(cycle_ids))

144 144


In [8]:
import shutil 
source_directory_base = "../dataset/0701/"
destination_directory_base = "../dataset/0701_clean/"
parent_dirs = ["vision/", "llm/", "labels/"]
for parent_d in parent_dirs:
    source_directory = source_directory_base + parent_d
    destination_directory = destination_directory_base + parent_d
    counter = 0
    for f in range(num_files_vision):
        if f not in sample_ids:
            old_file_name = f"sample_{f}.json"
            new_file_name = f"sample_{counter}.json"
            counter += 1
            source_path = os.path.join(source_directory, old_file_name)
            destination_path = os.path.join(destination_directory, new_file_name)
            try:
                shutil.copy(source_path, destination_path)
                print(f"File '{old_file_name}' copied successfully from '{source_directory}' to '{destination_directory}'.")
            except FileNotFoundError:
                print(f"Error: Source file '{source_path}' not found.")
            except Exception as e:
                print(f"An error occurred: {e}")


File 'sample_0.json' copied successfully from '../dataset/0701/vision/' to '../dataset/0701_clean/vision/'.
File 'sample_1.json' copied successfully from '../dataset/0701/vision/' to '../dataset/0701_clean/vision/'.
File 'sample_2.json' copied successfully from '../dataset/0701/vision/' to '../dataset/0701_clean/vision/'.
File 'sample_3.json' copied successfully from '../dataset/0701/vision/' to '../dataset/0701_clean/vision/'.
File 'sample_4.json' copied successfully from '../dataset/0701/vision/' to '../dataset/0701_clean/vision/'.
File 'sample_5.json' copied successfully from '../dataset/0701/vision/' to '../dataset/0701_clean/vision/'.
File 'sample_6.json' copied successfully from '../dataset/0701/vision/' to '../dataset/0701_clean/vision/'.
File 'sample_7.json' copied successfully from '../dataset/0701/vision/' to '../dataset/0701_clean/vision/'.
File 'sample_8.json' copied successfully from '../dataset/0701/vision/' to '../dataset/0701_clean/vision/'.
File 'sample_17.json' copied

In [9]:
directory = destination_directory_base + parent_dirs[0]
num_files_clean = len([f for f in os.listdir(directory) if os.path.isfile(os.path.join(directory, f))])
num_files_clean

290

In [10]:
with open("../dataset/0701_clean/meta/action_counts.json", "w") as write_file:
    json.dump(clean_dataset_action_counts, write_file)