# Object spawning

In [1]:
from lxml import etree

function to create and empty world

In [2]:
def create_digital_tree_world(world_name='name'):
    # Create the root element with tag 'sdf' and attribute version='1.6'
    root = etree.Element("sdf", version='1.6')
    
    # Create the 'world' sub-element
    world = etree.SubElement(root, 'world', name=world_name)

    # List of XML fragments to be appended
    xml_strings = [
        """
        <light name='sun' type='directional'>
            <cast_shadows>1</cast_shadows>
            <pose frame=''>0 0 10 0 -0 0</pose>
            <diffuse>1 1 1 1</diffuse>
            <specular>0.2 0.2 0.2 1</specular>
            <attenuation>
                <range>1000</range>
                <constant>0.9</constant>
                <linear>0.01</linear>
                <quadratic>0.001</quadratic>
            </attenuation>
            <direction>-0.5 -0.5 -1</direction>
        </light>
        """,
        """
        <physics name='default_physics' default='0' type='ode'>
            <max_step_size>0.001</max_step_size>
            <real_time_factor>1</real_time_factor>
            <real_time_update_rate>1000</real_time_update_rate>
        </physics>
        """,
        """ 
        <scene>
            <ambient>1 1 1 1</ambient>
            <background>0.0 0.0 0.0 1</background>
            <shadows>0</shadows>
        </scene>
        """,
        """ 
        <spherical_coordinates>
            <surface_model>EARTH_WGS84</surface_model>
            <latitude_deg>0.00</latitude_deg>
            <longitude_deg>0.00</longitude_deg>
            <elevation>0</elevation>
            <heading_deg>0</heading_deg>
        </spherical_coordinates>
        """,
        """ 
        <state world_name='default'>
            <sim_time>203 490000000</sim_time>
            <real_time>168 470627290</real_time>
            <wall_time>1596490486 155571625</wall_time>
            <iterations>167975</iterations>
            <light name='sun'>
                <pose frame=''>0 0 10 0 -0 0</pose>
            </light>
            <light type="directional" name="light1">
                <pose>0 0 50 0 0 0</pose>
                <diffuse>0.5 0.5 0.5 1</diffuse>
                <specular>1 1 1 0</specular>
                <direction>1 0 0.5</direction>
            </light>
         
            <model name='digital_tree'>
                <pose frame=''>0 0 0 0 0 1.57</pose>
                <scale>1 1 1</scale>
                <link name='digital_tree_link'>
                <!-- TO MODIFY THE POSITION OF THE OBJ ON THE LOCAL MAP USE THIS POSE -->
                <pose frame=''> 0 0 0 0 0 3.14</pose>
                <velocity>0 0 0 0 0 0</velocity>
                <acceleration>0 0 0 0 0 0</acceleration>
                <wrench>0 0 0 0 -0 0</wrench>
                </link>
            </model>
        </state>
        """,
        """ 
        <gui fullscreen='0'>
            <camera name='user_camera'>
                <pose frame=''>0 5 1 0 0 -1.57</pose>
                <view_controller>orbit</view_controller>
                <projection_type>perspective</projection_type>
            </camera>
        </gui>
        """,
        """
        <gravity>0 0 -9.8</gravity>
        """,
        """
        <magnetic_field>6e-06 2.3e-05 -4.2e-05</magnetic_field>
        """,
        """
        <atmosphere type='adiabatic'/>
        """,
        """ 
        <model name='digital_tree'>
                <link name='digital_tree_link'>
                    <pose frame=''>0 0 0 0 -0 0</pose>
                    <inertial>
                    <pose frame=''>0 0 0 0 -0 0</pose>
                    <mass>50</mass>
                    <inertia>
                        <ixx>1000</ixx>
                        <ixy>1000</ixy>
                        <ixz>1000</ixz>
                        <iyy>1000</iyy>
                        <iyz>1000</iyz>
                        <izz>1000</izz>
                    </inertia>
                    </inertial>
                    <collision name='digital_tree_link_collision'>
                    <pose frame=''>0 0 0 0 -0 0</pose>
                    <geometry>
                        <mesh>
                        <scale>1 1 1</scale>
                        <uri>model://digital_tree/meshes/digital_tree.dae</uri>
                        </mesh>
                    </geometry>
                    <max_contacts>10</max_contacts>
                    <surface>
                        <contact>
                        <ode/>
                        </contact>
                        <bounce/>
                        <friction>
                        <torsional>
                            <ode/>
                        </torsional>
                        <ode/>
                        </friction>
                    </surface>
                    </collision>
                    <visual name='digital_tree_link_visual'>
                    <pose frame=''>0 0 0 0 -0 0</pose>
                    <geometry>
                        <mesh>
                        <scale>1 1 1</scale>
                        <uri>model://digital_tree/meshes/digital_tree.dae</uri>
                        </mesh>
                    </geometry>
                    </visual>
                    <self_collide>0</self_collide>
                    <enable_wind>0</enable_wind>
                    <kinematic>0</kinematic>
                </link>
                <static>1</static>
                <pose frame=''>0 0 0 0 -0 0</pose>
            </model>
        """
    ]

    # Append each XML fragment to the 'world' element
    for xml_string in xml_strings:
        world.append(etree.fromstring(xml_string))

    return root


function to update the world `<state>` tag with the spawned object (apple, tennis ball)

In [3]:
def create_xml_state_model(objID, xyz_up=[0, 0, 0], scale=(1, 1, 1)):
  return f"""
          <model name='{objID}'>
            <pose frame=''>0 0 0 0 0 0</pose>
            <scale>{scale[0]} {scale[1]} {scale[2]}</scale>
            <link name='{objID}_link'>
              <!-- TO MODIFY THE POSITION OF THE OBJ ON THE LOCAL MAP USE THIS POSE -->
              <pose frame=''> {xyz_up[0]} {xyz_up[1]} {xyz_up[2]} 0 0 0</pose>
              <velocity>0 0 0 0 0 0</velocity>
              <acceleration>0 0 0 0 0 0</acceleration>
              <wrench>0 0 0 0 -0 0</wrench>
            </link>
          </model>"""

function to update the digital tree

In [4]:
def obj_scale(size=(1, 1, 1)):
    
    x_model = 0.060  # model size x
    y_model = 0.060  # model size y
    z_model = 0.060  # model size z

    k_x = round(size[0] / x_model, 2)
    k_y = round(size[1] / y_model, 2)
    k_z = round(size[2] / z_model, 2)

    return (k_x, k_y, k_z)

In [5]:
def update_tree_model(objID, ObjTag, mass, rgba=(0.51, 0.75, 0, 1)):

    # object find the key the use model item
    meshes_collection = {
    'Tennis_ball' : '//tennis_ball/meshes/TennisBall.dae',
    'Apple' : '//apple/meshes/apple.dae'
    }
    
    return f"""
            <model name='{objID}'>
                <link name='{objID}_link'>
                    <pose frame=''>0 0 0 0 -0 0</pose>
                    <inertial>
                    <pose frame=''>0 0 0 0 -0 0</pose>
                    <mass>{mass}</mass>
                    <inertia>
                        <ixx>1000</ixx>
                        <ixy>1000</ixy>
                        <ixz>1000</ixz>
                        <iyy>1000</iyy>
                        <iyz>1000</iyz>
                        <izz>1000</izz>
                    </inertia>
                    </inertial>
                    <collision name='{objID}_link_collision'>
                    <pose frame=''>0 0 0 0 -0 0</pose>
                    <geometry>
                        <mesh>
                        <scale>1 1 1</scale>
                        <uri>model:{meshes_collection[ObjTag]}</uri>
                        </mesh>
                    </geometry>
                    <max_contacts>10</max_contacts>
                    <surface>
                        <contact>
                        <ode/>
                        </contact>
                        <bounce/>
                        <friction>
                        <torsional>
                            <ode/>
                        </torsional>
                        <ode/>
                        </friction>
                    </surface>
                    </collision>
                    <visual name='{objID}_link_visual'>
                    <pose frame=''>0 0 0 0 -0 0</pose>
                    <geometry>
                        <mesh>
                        <scale>1 1 1</scale>
                        <uri>model:{meshes_collection[ObjTag]}</uri>
                        </mesh>
                    </geometry>
                    <material>
                        <ambient>{rgba[0]} {rgba[1]} {rgba[2]} {rgba[3]}</ambient>
                        <diffuse>0.7 0.9 0 1</diffuse>
                        <specular>0.2 0.2 0.2 64</specular>
                        <emissive>0.1 0 0.1 1</emissive>
                    </material>
                    </visual>
                    <self_collide>0</self_collide>
                    <enable_wind>0</enable_wind>
                    <kinematic>0</kinematic>
                </link>
                <static>1</static>
                <pose frame=''>0 0 0 0 -0 0</pose>
            </model>
            """

# Feature extraction

In [6]:
import matplotlib.pyplot as plt
import pandas as pd
import numpy as np
import json
import cv2
import os

importing the round truthing data

In [7]:
GROUND_TRUTH_DF = pd.read_csv('../data/ground_truth_data.csv')
GROUND_TRUTH_DF = GROUND_TRUTH_DF.set_index('item')
GROUND_TRUTH_DF

Unnamed: 0_level_0,0_0_id,0_0_setup,0_0_d1,0_0_d2,0_0_position,0_0_x,0_0_level,0_0_y,0_0_ydef,0_0_z,...,2_2_position,2_2_x,2_2_level,2_2_y,2_2_ydef,2_2_z,2_2_zdef,2_2_camh1,2_2_camh2,2_2_camh3
item,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1,Unnamed: 5_level_1,Unnamed: 6_level_1,Unnamed: 7_level_1,Unnamed: 8_level_1,Unnamed: 9_level_1,Unnamed: 10_level_1,Unnamed: 11_level_1,Unnamed: 12_level_1,Unnamed: 13_level_1,Unnamed: 14_level_1,Unnamed: 15_level_1,Unnamed: 16_level_1,Unnamed: 17_level_1,Unnamed: 18_level_1,Unnamed: 19_level_1,Unnamed: 20_level_1,Unnamed: 21_level_1
1,Tb,2d,65.6,65.6,L,-500.0,1.0,2000.0,1887.0,1516.0,...,L,-200.0,2.0,1700.0,1590.0,,,1100.0,1600.0,1850.0
2,Tb,2d,65.6,65.6,M,0.0,1.0,2000.0,1887.0,1516.0,...,M,0.0,2.0,1700.0,1590.0,,,1100.0,1600.0,1850.0
3,Tb,2d,65.6,65.6,R,500.0,1.0,2000.0,1887.0,1516.0,...,R,200.0,2.0,1700.0,1590.0,,,1100.0,1600.0,1850.0
4,Tb,2d,65.35,65.81,L,-500.0,2.0,1700.0,1587.0,1516.0,...,L,-200.0,3.0,1400.0,1290.0,,,1100.0,1600.0,1850.0
5,Tb,2d,65.35,65.81,M,0.0,2.0,1700.0,1587.0,1516.0,...,M,0.0,3.0,1400.0,1289.0,,,1100.0,1600.0,1850.0
6,Tb,2d,65.35,65.81,R,500.0,2.0,1700.0,1587.0,1516.0,...,R,200.0,3.0,1400.0,1288.0,,,1100.0,1600.0,1850.0
7,Tb,2d,65.41,65.6,L,-500.0,3.0,1400.0,1287.0,1516.0,...,L,-200.0,4.0,1100.0,986.0,,,1100.0,1600.0,1850.0
8,Tb,2d,65.41,65.6,M,0.0,3.0,1400.0,1287.0,1516.0,...,M,0.0,4.0,1100.0,992.0,,,1100.0,1600.0,1850.0
9,Tb,2d,65.41,65.6,R,500.0,3.0,1400.0,1287.0,1516.0,...,R,200.0,4.0,1100.0,989.0,,,1100.0,1600.0,1850.0
10,Tb,2d,65.62,65.7,L,-500.0,4.0,1100.0,987.0,1516.0,...,,,,,,,,,,


In [9]:
GROUND_TRUTH_ATTRIBUTES = GROUND_TRUTH_DF.columns  # get the data attributes

In [10]:
IMG2D_PATH = '../data/2D'
IMG3D_PATH = '../data/3D'

In [11]:
# Set Pandas display options to show all columns and rows while printing
pd.set_option('display.max_columns', None)
pd.set_option('display.max_rows', None)

In [34]:
# manhattan

def manhattan_distance(xyz_up_ref, xyz_up_estimated):
    '''
    It takes input coordinates in meters
    return error in meters
    '''
    def apply_resolution(array, resolution):
        # Round each element in the array to the nearest multiple of the resolution
        return np.round(array, resolution)
    
       
    # Apply the resolution to the second array
    xyz_up_ref = apply_resolution(xyz_up_ref, 2)
    xyz_up_estimated = apply_resolution(xyz_up_estimated, 2)

    # compute distance
    x_e = np.abs(xyz_up_estimated[0] - xyz_up_ref[0]) 
    y_e = np.abs(xyz_up_estimated[1] - xyz_up_ref[1])
    z_e = np.abs(xyz_up_estimated[2] - xyz_up_ref[2])

    
    return np.sum([x_e, y_e, z_e])

In [35]:
# euclidean

def euclidean_distance(xyz_up_ref, xyz_up_estimated):
    '''
    It takes input coordinates in meters
    return error in meters
    '''

    def apply_resolution(array, resolution):
        # Round each element in the array to the nearest multiple of the resolution
        return np.round(array, resolution)
    
       
    # Apply the resolution to the second array
    xyz_up_ref = apply_resolution(xyz_up_ref, 2)
    xyz_up_estimated = apply_resolution(xyz_up_estimated, 2)

    # compute distance
    x_e = (xyz_up_estimated[0] - xyz_up_ref[0]) **2
    y_e = (xyz_up_estimated[1] - xyz_up_ref[1]) **2
    z_e = (xyz_up_estimated[2] - xyz_up_ref[2]) **2

    return np.sqrt(np.sum(np.array([x_e, y_e, z_e])))


In [16]:
def get_rgba_color(matrix: np.ndarray, ObjTag):
    # color dict
    # for the tag Tennis ball give bacck rgba (0.51, 0.75, 0, 1) 
    if ObjTag != 'Tennis_ball':
        # convert to rgba
        rgba_mat = cv2.cvtColor(matrix, cv2.COLOR_RGB2RGBA).astype('float64')
        rgba_mat /= 255.

        h, w, _ = rgba_mat.shape
        x = w//2
        y = h//2
        
        return np.nanmedian(rgba_mat[y - h //6 : y + h //6, 
                x - w //6 : x + w//6], axis=(0, 1))
    
    else: return [1., 1., 0, 1]


In [29]:

def digital_twin(root, world_name=None, ObjID=None, ObjTag=None, xyz_up=[1,1,1], size=[10,10,10], mass=10, rgba_color=[.5, 0.5, 0.5, 1.]):

    # Find the 'state' tag
    state = root.find('.//state')
    model = root.find('world')

    # get scaling factors
    scale = obj_scale(size)

    # init the object in the digital twin
    new_state_model_string = create_xml_state_model(ObjID, xyz_up=xyz_up, scale=scale)    
    new_state_model_element = etree.fromstring(new_state_model_string)

    # Add the object model element to the root element
    state.append(new_state_model_element)

    # new model - updatecolor features
    model_str = update_tree_model(ObjID, ObjTag, mass, rgba_color)
    model_element = etree.fromstring(model_str)
    # print(model_str)

    model.append(model_element)  # add th eobject to the digital tree

    # Create an ElementTree from the root element
    tree = etree.ElementTree(root)

    # Write the modified XML back to a file
    with open(f'../worlds/digital_tree_{world_name}.world', 'wb') as f:
        tree.write(f, pretty_print=True, encoding='utf-8', xml_declaration=True)

In [27]:
def gazebo_transfomration_matrix(xyz_up):
    ''' 
    params:
        xyz_up = np.ndarray vector
    ''' 

    xyz_up = np.append(xyz_up, 1)
    

    # get opposite directions for gazebo
    rotateMatrix = np.array([(-1, 0, 0, 0), 
                             (0, 1, 0, 0), 
                             (0, 0, 1, 0), 
                             (0, 0, 0, 1)])

    # merge transofrmation matrices (shift, rotation)
    endMatrix = np.dot(xyz_up, rotateMatrix)
    
    return endMatrix[0:3]

In [38]:
DELTA_TRUNK_POSITION_FROM_SOIL = 125  #mm

# export dataframe
df = pd.DataFrame(columns=['image', 'setup', 'ObjType', 'Position', 'Xgt', 'Ygt', 'Zgt', 'Xestimated', 'Yestimated', 'Zestimated', 'Manhattan', 'Euclidean'])
rows = 0

folders = [IMG2D_PATH, IMG3D_PATH]

for folder in folders:
        for subfolder in os.listdir(os.path.join(os.getcwd(), folder )):

                world_name = f'{os.path.basename(folder)}_{subfolder}'
                print(f'{world_name= }')

                # Init the digital twin
                root = create_digital_tree_world(world_name=f'{world_name}' )

                files = os.listdir(os.path.join(os.getcwd(), folder , subfolder))
                
                # pre-set varibale
                image_orientation = None

                # get col mat
                col_file = [file for file in files if file.find('color') != -1][0]
                # open color matrix
                im_rgb = cv2.imread(os.path.join(folder, subfolder, col_file))[:,:,::-1]  # RGB [8-bit]


                # get depth mat
                depth_file = [file for file in files if file.find('depth') != -1][0]
                # open depth matrix
                depth_file = os.path.join(folder, subfolder, depth_file)
                im_depth = cv2.imread(depth_file, cv2.IMREAD_ANYDEPTH)  # grayscale [mm]


                # get json annnot
                annot_file = [file for file in files if file.find('annot') != -1][0]
                # open the annotation file
                annot_file = os.path.join(folder, subfolder, annot_file)
                # open the json_file
                with open(annot_file, 'r') as json_file:
                        annot = json.load(json_file)


                # --- METADATA ---

                # from the image name get the trial and repetition code -- this is needed to access the ground truth data
                color_fn_split = str(col_file).split('_')

                trial_code = f'{color_fn_split[1]}_{color_fn_split[2]}'
                
                # SET CAMERA CARACTERISTICS FOR PIXELS RESOLUTION
                W = im_rgb.shape[1]
                H = im_rgb.shape[0]
                if im_rgb.shape[0] < im_rgb.shape[1]:
                        image_orientation = 'H'
                        HFOV = 69
                        VFOV = 42
                        
                else:
                        image_orientation = 'V'
                        HFOV = 42
                        VFOV = 69
                       

                # -- PROCESSING ---

                # 1. get trunk location and distance form the camera

                trunk_pos = None
                trunk_data = {'origin' : None, 'tl' : [], 'br' : []}
                while trunk_pos == None:
                        for Obj in annot['objects']:

                                # object type
                                ObjType = Obj['classTitle']

                                if ObjType == 'Trunk':
                                        # object absolute coordinates
                                        Trunk_absCoords_list = Obj['points']['exterior']
                                        
                                        # split into TL and BR
                                        trunk_tl, trunk_br = Trunk_absCoords_list

                                        trunk_data['tl'] = trunk_tl
                                        trunk_data['br'] = trunk_br

                                        if image_orientation == 'H':
                                               pass
                                        else:
                                               trunk_data['origin'] = (int(trunk_data['tl'][0]), trunk_data['tl'][1])  # pixels
                                        trunk_pos = True
                                        break
                
                # get the trunk-camera distance
                trunk_cam_dist_mm = np.nanmedian(im_depth[trunk_data['tl'][1] : trunk_data['br'][1],
                                        trunk_data['tl'][0] : trunk_data['br'][0]])
                
                pix_res_mm = np.sqrt((4 * trunk_cam_dist_mm**2 * np.tan(np.deg2rad(VFOV)/2) * np.tan(np.deg2rad(HFOV) / 2))/ (H * W))  # mm/pix

                print(f'{trunk_cam_dist_mm= } | {pix_res_mm= }')


                # 2. process each hang object

                for ind,  Obj in enumerate(annot['objects']):

                        if Obj['classTitle'] != 'Trunk':

                                # obj position
                                ObjPosition = Obj['description']
                                print(f'{ObjPosition= }')

                                # object type
                                ObjTag = Obj['classTitle']
                                print(f'{ObjTag= }')


                                # object absolute coordinates [x, y]
                                tl, br = Obj['points']['exterior']

                                print(f'\n{ObjTag= } - {ObjPosition}')

                                # get object related gtruthing data as a series
                                bbox_gtruth = GROUND_TRUTH_DF.loc[int(ObjPosition), :]

                                # get ground truthing attributes related to the specific trial-repetition
                                attributes = [attr for attr in bbox_gtruth.index if attr.find(trial_code) != -1]

                                # object-related df
                                bbox_gtruth = bbox_gtruth.loc[attributes].copy()

                                ''' 
                                REFERENCE POSITION (XYZ)gt
                                '''
                                x_gt = bbox_gtruth.loc[f'{trial_code}_x'] / 1000  # proper x [m]
                                # TODO: is 'def' the right attribute?
                                y_gt = bbox_gtruth.loc[f'{trial_code}_ydef'] / 1000 # height [m]
                                z_gt = bbox_gtruth.loc[f'{trial_code}_zdef']  # obj - camera distance [mm]
                                z_gt = (z_gt - trunk_cam_dist_mm) / 1000 # [m]
                                z_gt = 0

                                # CRS change
                                xyz_gt = [x_gt, y_gt, z_gt]

                                # XYZ_up trough trasnformation matrix
                                xyz_up_ref = np.dot(np.array(xyz_gt), [[1, 0, 0], [0, 0, 1], [0, 1, 0]]) 

                                # print(f'[GT] {xyz_gt} --- [UP] {xyz_up_ref}')

                                ''' 
                                POSITION ESTIMATION 
                                '''
                                # trunk position to normalize the positions
                                trunk_x =  trunk_data['origin'][0]  
                                trunk_y =  trunk_data['origin'][1]  


                                print('TRUNK: ', trunk_x, trunk_y)

                                xc =  int(tl[0] + (br[0] - tl[0]) / 2)  
                                yc =  int(tl[1] + (br[1] - tl[1]) / 2)  

                                w = int((br[0] - tl[0]))
                                h = int((br[1] - tl[1]))

                                # bbox for color estimation
                                bbox_im = im_rgb[yc - h // 2 : yc + h //2, 
                                                xc - w //2 : xc + w//2].copy()
                                
                             
                                # Make relative coordinates 
                                # the trunk is in the bottom of the image
                                x_estimated = xc - trunk_x  # pixels
                                y_estimated = trunk_y - yc # pixels

                                # get obj-camera distance at the center of the bbox
                                obj_cam_dist = np.nanmedian(im_depth[yc - h // 4 : yc + h //4, 
                                                                        xc - w //4 : xc + w//4])  # mm
                                
                                # get pixel resolution in mm
                                # NOTE: to size the fruit is good to use a pixel resolution related to the Obj-camera distance, but when positioning
                                # it should be computerd the pixel_resolution by exploiting the Trunk-camera distance. If this is not done, up to 15mm errors 
                                # occur in the position of fruits

                                # pix_res_mm = np.sqrt((4 * obj_cam_dist**2 * np.tan(np.deg2rad(VFOV)/2) * np.tan(np.deg2rad(HFOV) / 2))/ (H * W))  # mm/pix

                                # position estimation
                                x_estimated= (x_estimated * pix_res_mm) / 1000 # m

                                y_estimated = ((y_estimated * pix_res_mm) + DELTA_TRUNK_POSITION_FROM_SOIL) / 1000 # m

                                # object-trunk distance
                                z_estimated = (trunk_cam_dist_mm - obj_cam_dist) / 1000  # m


                                # collection CRS
                                xyz_estimated = [x_estimated, y_estimated, z_estimated]

                                # XYZ_up trough transformation matrix
                                xyz_up_estimated = np.dot(np.array(xyz_estimated), [[1, 0, 0], [0, 0, 1], [0, 1, 0]]) 

                                print(f'[UP ref] {xyz_up_ref} --- [UP estimated] {xyz_up_estimated}')


                                # Manhattan
                                manhattan_dist = manhattan_distance(xyz_up_estimated, xyz_up_ref)  # meters

                                # Euclidean
                                euclidean_dist = euclidean_distance(xyz_up_estimated, xyz_up_ref)  # meters

                                # color
                                rgba_color = get_rgba_color(bbox_im, ObjTag)
                                print(f'{rgba_color= }')


                                # update dataframe
                                df.loc[rows] = ([col_file, os.path.basename(folder), ObjTag, ObjPosition, x_gt, y_gt, z_gt, x_estimated, y_estimated, z_estimated, manhattan_dist, euclidean_dist])
                                rows += 1

                                

                                if ObjTag != 'Tennis_ball':
                                        ObjMass = (bbox_gtruth.loc[f'{trial_code}_w'] /1000) / 9.81  # kg

                                        Obj_d1 = bbox_gtruth.loc[f'{trial_code}_d1'] / 1000 # m
                                        Obj_d2 = bbox_gtruth.loc[f'{trial_code}_d2'] / 1000 # m
                                        Obj_h = bbox_gtruth.loc[f'{trial_code}_h'] / 1000 # m
                                        ObjSize = (Obj_d1, Obj_d2, Obj_h)
                                else:
                                        ObjMass = 0.0058

                                        Obj_d1 = bbox_gtruth.loc[f'{trial_code}_d1'] / 1000 # m
                                        Obj_d2 = bbox_gtruth.loc[f'{trial_code}_d1'] / 1000 # m
                                        Obj_h = bbox_gtruth.loc[f'{trial_code}_d1'] / 1000 # m
                                        ObjSize = (Obj_d1, Obj_d2, Obj_h)

                                # transform the coords for the digital twin -- flip X axis
                                xyz_up_estimated = gazebo_transfomration_matrix(xyz_up=xyz_up_estimated)

                                digital_twin(root, world_name=world_name, ObjID=ObjPosition, ObjTag=ObjTag, xyz_up=xyz_up_estimated, size=ObjSize, mass=ObjMass, rgba_color=rgba_color)
        
# save file
df.to_csv(f'../data/IEEE_2024_positioning_evaluation.csv', index=False)

world_name= '2D_3_1_2_1'
trunk_cam_dist_mm= np.float64(1531.0) | pix_res_mm= np.float64(1.0921909213564789)
ObjPosition= '1'
ObjTag= 'Apple'

ObjTag= 'Apple' - 1
TRUNK:  547 1770
[UP ref] [-0.5    0.     1.892] --- [UP estimated] [-0.5176985   0.084       1.82554126]
rgba_color= array([0.96862745, 0.53333333, 0.41960784, 1.        ])
ObjPosition= '2'
ObjTag= 'Apple'

ObjTag= 'Apple' - 2
TRUNK:  547 1770
[UP ref] [0.    0.    1.892] --- [UP estimated] [0.         0.081      1.83100222]
rgba_color= array([0.79607843, 0.67058824, 0.31764706, 1.        ])
ObjPosition= '3'
ObjTag= 'Apple'

ObjTag= 'Apple' - 3
TRUNK:  547 1770
[UP ref] [0.5   0.    1.892] --- [UP estimated] [0.50786878 0.087      1.81025059]
rgba_color= array([0.87647059, 0.7627451 , 0.3254902 , 1.        ])
ObjPosition= '4'
ObjTag= 'Apple'

ObjTag= 'Apple' - 4
TRUNK:  547 1770
[UP ref] [-0.5    0.     1.592] --- [UP estimated] [-0.51660631  0.081       1.51535904]
rgba_color= array([0.87843137, 0.2745098 , 0.19215686, 1.   