In [None]:
!git clone https://github.com/google-research-datasets/Objectron

Cloning into 'Objectron'...
remote: Enumerating objects: 199, done.[K
remote: Counting objects: 100% (39/39), done.[K
remote: Compressing objects: 100% (10/10), done.[K
remote: Total 199 (delta 35), reused 29 (delta 29), pack-reused 160[K
Receiving objects: 100% (199/199), 40.03 MiB | 34.36 MiB/s, done.
Resolving deltas: 100% (90/90), done.


In [None]:
import numpy as np
import os
import requests
import struct
import sys
import subprocess
import cv2

from IPython.core.display import display,HTML
import matplotlib.pyplot as plt

# I'm running this Jupyter notebook locally. Manually import the objectron module.
module_path = os.path.abspath(os.path.join('Objectron/'))
if module_path not in sys.path:
    sys.path.append(module_path)
    
# The AR Metadata captured with each frame in the video
from objectron.schema import a_r_capture_metadata_pb2 as ar_metadata_protocol
# The annotations are stored in protocol buffer format. 
from objectron.schema import object_pb2 as object_protocol
from objectron.schema import annotation_data_pb2 as annotation_protocol
import objectron.dataset.box as Box

In [None]:
# Returns only the specified frames from the video
def grab_frames(video_file, frame_ids):
  """Grab an image frame from the video file."""
  frames = []
  capture = cv2.VideoCapture(video_file)
  height = int(capture.get(cv2.CAP_PROP_FRAME_HEIGHT))
  width = int(capture.get(cv2.CAP_PROP_FRAME_WIDTH))
  capture.release()
  frame_size = width * height * 3

  for frame_id in frame_ids:
    frame_filter = r'select=\'eq(n\,{:d})\''.format(frame_id)
    command = [
        'ffmpeg', '-i', video_file, '-f', 'image2pipe', '-vf', frame_filter,
        '-pix_fmt', 'rgb24', '-vcodec', 'rawvideo', '-vsync', 'vfr', '-'
    ]
    pipe = subprocess.Popen(
        command, stdout=subprocess.PIPE, bufsize = 151 * frame_size)
    current_frame = np.frombuffer(
        pipe.stdout.read(frame_size), dtype='uint8').reshape(width, height, 3)
    pipe.stdout.flush()

    frames.append(current_frame)
  return frames

In [None]:
# Get the geometry data for all frames
def get_geometry_data(geometry_filename):
    sequence_geometry = []
    with open(geometry_filename, 'rb') as pb:
        proto_buf = pb.read()

        i = 0
        frame_number = 0

        while i < len(proto_buf):
            # Read the first four Bytes in little endian '<' integers 'I' format
            # indicating the length of the current message.
            msg_len = struct.unpack('<I', proto_buf[i:i + 4])[0]
            i += 4
            message_buf = proto_buf[i:i + msg_len]
            i += msg_len
            frame_data = ar_metadata_protocol.ARFrame()
            frame_data.ParseFromString(message_buf)


            transform = np.reshape(frame_data.camera.transform, (4, 4))
            projection = np.reshape(frame_data.camera.projection_matrix , (4, 4))
            extrinsics = np.reshape(frame_data.camera.view_matrix , (4, 4))
            intrinsics = np.reshape(frame_data.camera.intrinsics, (3, 3))
            position = transform[:3, -1]

            current_points = [np.array([v.x, v.y, v.z])
                              for v in frame_data.raw_feature_points.point]
            current_points = np.array(current_points)

            sequence_geometry.append((transform, projection, extrinsics, intrinsics, current_points))
    return sequence_geometry


In [None]:
# Get the annotation data for all frames
def get_frame_annotation(annotation_filename):
    """Grab an annotated frame from the sequence."""
    result = []
    instances = []
    with open(annotation_filename, 'rb') as pb:
        sequence = annotation_protocol.Sequence()
        sequence.ParseFromString(pb.read())

        object_id = 0
        object_rotations = []
        object_translations = []
        object_scale = []
        num_keypoints_per_object = []
        object_categories = []
        annotation_types = []
        
        # Object instances in the world coordinate system, These are stored per sequence, 
        # To get the per-frame version, grab the transformed keypoints from each frame_annotation
        for obj in sequence.objects:
            rotation = np.array(obj.rotation).reshape(3, 3)
            translation = np.array(obj.translation)
            scale = np.array(obj.scale)
            points3d = np.array([[kp.x, kp.y, kp.z] for kp in obj.keypoints])
            instances.append((rotation, translation, scale, points3d))
        
        # Grab teh annotation results per frame
        for data in sequence.frame_annotations:
            # Get the camera for the current frame. We will use the camera to bring
            # the object from the world coordinate to the current camera coordinate.
            transform = np.array(data.camera.transform).reshape(4, 4)
            view = np.array(data.camera.view_matrix).reshape(4, 4)
            intrinsics = np.array(data.camera.intrinsics).reshape(3, 3)
            projection = np.array(data.camera.projection_matrix).reshape(4, 4)

            keypoint_size_list = []
            object_keypoints_2d = []
            object_keypoints_3d = []
            for annotations in data.annotations:
                num_keypoints = len(annotations.keypoints)
                keypoint_size_list.append(num_keypoints)
                for keypoint_id in range(num_keypoints):
                    keypoint = annotations.keypoints[keypoint_id]
                    object_keypoints_2d.append((keypoint.point_2d.x, keypoint.point_2d.y, keypoint.point_2d.depth))
                    object_keypoints_3d.append((keypoint.point_3d.x, keypoint.point_3d.y, keypoint.point_3d.z))
                num_keypoints_per_object.append(num_keypoints)
                object_id += 1
            result.append((object_keypoints_2d, object_keypoints_3d, keypoint_size_list, view, projection))

    return result, instances

In [None]:
def get_x_y(points_2d, num_instances):
    x_min = []
    y_min = []
    x_max = []
    y_max = []

    for instance in range(num_instances):
        x_min.append(min(points_2d[instance][:, 0]))
        x_max.append(max(points_2d[instance][:, 0]))
        y_min.append(min(points_2d[instance][:, 1]))
        y_max.append(max(points_2d[instance][:, 1]))
    
    return x_min, y_min, x_max, y_max

In [None]:
import math

def plane_equation(p1, p2, p3):
    x1, y1, z1 = p1
    x2, y2, z2 = p2
    x3, y3, z3 = p3

    a1 = x2 - x1
    b1 = y2 - y1
    c1 = z2 - z1
    a2 = x3 - x1
    b2 = y3 - y1
    c2 = z3 - z1
    a = b1 * c2 - b2 * c1
    b = a2 * c1 - a1 * c2
    c = a1 * b2 - b1 * a2
    d = (-a * x1 - b * y1 - c * z1)

    return a, b, c, d

def point_to_plane_distance(p1, plane_p1, plane_p2, plane_p3):
    a, b, c, d = plane_equation(plane_p1, plane_p2, plane_p3)

    x1, y1, z1 = p1

    d = abs((a * x1 + b * y1 + c * z1 + d))
    e = (math.sqrt(a * a + b * b + c * c))

    return d/e

In [None]:
def convert_objectron_data_sunrgb3d(obj, index, max_no=750):
    public_url = "https://storage.googleapis.com/objectron"
    train_blob_path = public_url + "/v1/index/" + obj + "_annotations_train"
    train_video_ids = requests.get(train_blob_path).text
    train_video_ids = train_video_ids.split('\n')
    test_blob_path = public_url + "/v1/index/" + obj + "_annotations_test"
    test_video_ids = requests.get(test_blob_path).text
    test_video_ids = test_video_ids.split('\n')

    train_counter = 0
    test_counter = 0

    depth_points_no = 1

    image_path = '/content/drive/MyDrive/Objectron-to-SUNRGBD3D-format/sunrgbd/sunrgbd_trainval/image/'
    label_path = '/content/drive/MyDrive/Objectron-to-SUNRGBD3D-format/sunrgbd/sunrgbd_trainval/label_v1/'
    depth_path = '/content/drive/MyDrive/Objectron-to-SUNRGBD3D-format/sunrgbd/points/'
    calib_path = '/content/drive/MyDrive/Objectron-to-SUNRGBD3D-format/sunrgbd/sunrgbd_trainval/calib/'

    mean_x = 0
    mean_y = 0
    mean_z = 0
    counter = 0

    # # TRAIN DATASET: Download all videos for a dataset and process them one by one
    for id in train_video_ids:

        if train_counter > max_no:
            break

        video_filename = public_url + "/videos/" + id + "/video.MOV"
        metadata_filename = public_url + "/videos/" + id + "/geometry.pbdata"
        annotation_filename = public_url + "/annotations/" + id + ".pbdata"
        # video.content contains the video file.
        video = requests.get(video_filename)
        metadata = requests.get(metadata_filename)

        # Please refer to Parse Annotation tutorial to see how to parse the annotation files.
        annotation = requests.get(annotation_filename)

        # Save the files locally
        file = open("video.MOV", "wb")
        file.write(video.content)
        file.close()
        
        file = open("geometry.pbdata", "wb")
        file.write(metadata.content)
        file.close()
        
        file = open("annotation.pbdata", "wb")
        file.write(annotation.content)
        file.close()

        # Get the maximum number of frames from the video
        cap = cv2.VideoCapture("video.MOV")
        frames_no = int(cap.get(cv2.CAP_PROP_FRAME_COUNT))
        print('[' + id + '] has: ' + str(frames_no) + ' frames')

        # Use ~1 frame per second
        frame_ids = []
        for frame in range(0, frames_no, 30):
            frame_ids.append(frame)

        # Update the counter
        # train_counter += len(frame_ids)

        # Get the frames from the video file
        frames = grab_frames("video.MOV", frame_ids)

        # Get the geometry data for all frames
        sequence_geometry = get_geometry_data('geometry.pbdata')

        # Get the annotation data for all frames
        annotation_data, instances = get_frame_annotation('annotation.pbdata')

        for i, frame in enumerate(frame_ids):

            print('Train counter: ' + str(train_counter))

            # Save the 3D depth points
            _, _, extrinsics, intrinsics, depth_points_3d = sequence_geometry[frame]
            # print('Depth points: ' + str(depth_points_3d.flatten().shape[0]))
            if depth_points_3d.flatten().shape[0] < depth_points_no:
                # print('Skip depth points')
                continue

            points_2d, points_3d, num_keypoints, frame_view_matrix, frame_projection_matrix = annotation_data[frame]

            # Number of detected instances
            num_instances = len(num_keypoints)

            if num_instances == 0:
                # print('num_instances is 0')
                # train_counter -= 1
                continue

            if len(np.array(points_3d)) % (num_instances * 3) != 0:
                # print('points_3d dim not divisible with num_instances points')
                # train_counter -= 1
                continue

            height, width, _ = frames[i].shape

            name_prefix = '0' * (6 - len(str(index))) + str(index)

            # print(name_prefix)

            # Write the index to train_data_idx
            with open("/content/train_data_idx.txt", "a") as myfile:
                myfile.write(str(index) + '\n')

            # Save the frame as an image
            cv2.imwrite(image_path + name_prefix + '.jpg', frames[i])

            # fig = plt.figure()
            # ax = fig.add_subplot(111, projection='3d')
            # ax.scatter(depth_points_3d[:, 0], depth_points_3d[:, 1], depth_points_3d[:, 2])

            # depth_len = int(depth_points_3d.flatten().shape[0] // 6)
            # print('shape: ' + str(depth_points_3d.flatten().shape[0]) + ' depth_len: ' + str(depth_len * 6))
            depth_points_3d.flatten()[:depth_points_no].astype(np.float32).tofile(depth_path + name_prefix + '.bin')

            # Save the calib file
            with open(calib_path + name_prefix + '.txt', "w") as calibFile:
                Rt = extrinsics[:3, :3].astype(np.float32)
                K = intrinsics.flatten().astype(np.float32)
                np.savetxt(calibFile, Rt, newline=' ')
                calibFile.write('\n')
                np.savetxt(calibFile, K, newline=' ')

            # # Save the annotation data
            f = open(label_path + name_prefix + '.txt', "w")

            # Compute the 2D bounding box
            points_2d = []
            # Now, let's compute the box's vertices in 3D, then project them back to 2D:
            for instance_id in range(num_instances):
                # The annotation contains the box's transformation and scale in world coordinate system
                # Here the instance_vertices_3d are the box vertices in the "BOX" coordinate, (i.e. it's a unit box)
                # and has to be transformed to the world coordinate.
                instance_rotation, instance_translation, instance_scale, instance_vertices_3d = instances[instance_id]

                box_transformation = np.eye(4)
                box_transformation[:3, :3] = np.reshape(instance_rotation, (3, 3))
                box_transformation[:3, -1] = instance_translation
                vertices_3d = instance_vertices_3d * instance_scale.T;
                # Homogenize the points
                vertices_3d_homg = np.concatenate((vertices_3d, np.ones_like(vertices_3d[:, :1])), axis=-1).T
                # Transform the homogenious 3D vertices with the box transformation
                box_vertices_3d_world = np.matmul(box_transformation, vertices_3d_homg) 

                # If we transform these vertices to the camera frame, we get the 3D keypoints in the annotation data
                # i.e. vertices_3d_cam == points_3d
                vertices_3d_cam = np.matmul(frame_view_matrix, box_vertices_3d_world)
                vertices_2d_proj = np.matmul(frame_projection_matrix, vertices_3d_cam)
                
                # Project the points
                points2d_ndc = vertices_2d_proj[:-1, :] / vertices_2d_proj[-1, :]
                points2d_ndc = points2d_ndc.T
                
                # Convert the 2D Projected points from the normalized device coordinates to pixel values
                x = points2d_ndc[:, 1]
                y = points2d_ndc[:, 0]
                points2d = np.copy(points2d_ndc)
                points2d[:, 0] = ((1 + x) * 0.5) * width
                points2d[:, 1] = ((1 + y) * 0.5) * height    
                points_2d.append(points2d.astype(int))
                # points2d are the projected 3D points on the image plane.


            # 2D bounding box
            x_min, y_min, x_max, y_max = get_x_y(points_2d, num_instances)

            # Change x_max and y_min to be compatible with the SUNRGBD3D format
            x_max = np.array(x_max) - np.array(x_min)
            y_max = np.array(y_max) - np.array(y_min)

            # Compute the 3D points in SUNRGBD3D format
            points_3d = np.array(points_3d)
            points_3d = points_3d.reshape(num_instances, -1, 3)
            # print('points_3d shape: ' + str(points_3d.shape))

            for instance_id in range(num_instances):
                if points_3d.shape[1] < 6:
                    continue

                # print('Here')

                centroid = points_3d[instance_id][0]

                p1 = points_3d[instance_id][1]
                p2 = points_3d[instance_id][2]
                p3 = points_3d[instance_id][3]
                p5 = points_3d[instance_id][5]

                # width
                w = point_to_plane_distance(centroid, p1, p2, p5)

                # length
                l = point_to_plane_distance(centroid, p1, p2, p3)

                # height
                h = point_to_plane_distance(centroid, p1, p3, p5)

                # We are not interested in the orientation part
                orientation = np.zeros((2))

                mean_x += w
                mean_y += l
                mean_z += h
                counter += 1

                # Write a line in the new annotation file
                f.write(obj + ' ')                            # data[0]

                f.write(str(int(x_min[instance_id])) + ' ')   # data[1]
                f.write(str(int(y_min[instance_id])) + ' ')   # data[2]
                f.write(str(int(x_max[instance_id])) + ' ')   # data[3]
                f.write(str(int(y_max[instance_id])) + ' ')   # data[4]

                f.write(str(float(centroid[0])) + ' ')        # data[5]
                f.write(str(float(centroid[1])) + ' ')        # data[6]
                f.write(str(float(centroid[2])) + ' ')        # data[7]

                f.write(str(float(w)) + ' ')                  # data[8]
                f.write(str(float(l)) + ' ')                  # data[9]
                f.write(str(float(h)) + ' ')                  # data[10]
                # f.write(str(float(h)) + ' ')                  # data[8]
                # f.write(str(float(l)) + ' ')                  # data[9]
                # f.write(str(float(w)) + ' ')                  # data[10]

                f.write(str(float(orientation[0])) + ' ')     # data[11]
                f.write(str(float(orientation[1])))           # data[12]

                f.write('\n')

            f.close()

            # Update index value
            index += 1
            train_counter += 1

    # TEST DATASET: Download all videos for a dataset and process them one by one
    for id in test_video_ids:

        if test_counter > max_no:
            break

        video_filename = public_url + "/videos/" + id + "/video.MOV"
        metadata_filename = public_url + "/videos/" + id + "/geometry.pbdata"
        annotation_filename = public_url + "/annotations/" + id + ".pbdata"
        # video.content contains the video file.
        video = requests.get(video_filename)
        metadata = requests.get(metadata_filename)

        # Please refer to Parse Annotation tutorial to see how to parse the annotation files.
        annotation = requests.get(annotation_filename)

        # Save the files locally
        file = open("video.MOV", "wb")
        file.write(video.content)
        file.close()
        
        file = open("geometry.pbdata", "wb")
        file.write(metadata.content)
        file.close()
        
        file = open("annotation.pbdata", "wb")
        file.write(annotation.content)
        file.close()

        # Get the maximum number of frames from the video
        cap = cv2.VideoCapture("video.MOV")
        frames_no = int(cap.get(cv2.CAP_PROP_FRAME_COUNT))
        print('[' + id + '] has: ' + str(frames_no) + ' frames')

        # Use ~1 frame per second
        frame_ids = []
        for frame in range(0, frames_no, 30):
            frame_ids.append(frame)

        # Update the counter
        # test_counter += len(frame_ids)

        # Get the frames from the video file
        frames = grab_frames("video.MOV", frame_ids)

        # Get the geometry data for all frames
        sequence_geometry = get_geometry_data('geometry.pbdata')

        # Get the annotation data for all frames
        annotation_data, instances = get_frame_annotation('annotation.pbdata')

        for i, frame in enumerate(frame_ids):

            print('Test counter: ' + str(test_counter))

            # Save the 3D depth points
            _, _, extrinsics, intrinsics, depth_points_3d = sequence_geometry[frame]
            # print('Depth points: ' + str(depth_points_3d.flatten().shape[0]))
            if depth_points_3d.flatten().shape[0] < depth_points_no:
                # print('Skip depth points')
                continue

            points_2d, points_3d, num_keypoints, frame_view_matrix, frame_projection_matrix = annotation_data[frame]

            # Number of detected instances
            num_instances = len(num_keypoints)

            if num_instances == 0:
                # print('num_instances is 0')
                # train_counter -= 1
                continue

            if len(np.array(points_3d)) % (num_instances * 3):
                # print('points_3d dim not divisible with num_instances points')
                # train_counter -= 1
                continue

            height, width, _ = frames[i].shape

            name_prefix = '0' * (6 - len(str(index))) + str(index)

            # print(name_prefix)

            # Write the index to train_data_idx
            with open("/content/val_data_idx.txt", "a") as myfile:
                myfile.write(str(index) + '\n')

            # Save the frame as an image
            cv2.imwrite(image_path + name_prefix + '.jpg', frames[i])

            # fig = plt.figure()
            # ax = fig.add_subplot(111, projection='3d')
            # ax.scatter(depth_points_3d[:, 0], depth_points_3d[:, 1], depth_points_3d[:, 2])

            # depth_len = int(depth_points_3d.flatten().shape[0] // 6)
            # print('shape: ' + str(depth_points_3d.flatten().shape[0]) + ' depth_len: ' + str(depth_len * 6))
            # depth_points_3d.flatten()[:depth_len * 6].astype(np.float32).tofile(depth_path + name_prefix + '.bin')
            depth_points_3d.flatten()[:depth_points_no].astype(np.float32).tofile(depth_path + name_prefix + '.bin')

            # Save the calib file
            with open(calib_path + name_prefix + '.txt', "w") as calibFile:
                Rt = extrinsics[:3, :3].flatten().astype(np.float32)
                K = intrinsics.flatten().astype(np.float32)
                np.savetxt(calibFile, Rt, newline=' ')
                calibFile.write('\n')
                np.savetxt(calibFile, K, newline=' ')

            # Save the annotation data
            f = open(label_path + name_prefix + '.txt', "w")

            # Compute the 2D bounding box
            points_2d = []
            # Now, let's compute the box's vertices in 3D, then project them back to 2D:
            for instance_id in range(num_instances):
                # The annotation contains the box's transformation and scale in world coordinate system
                # Here the instance_vertices_3d are the box vertices in the "BOX" coordinate, (i.e. it's a unit box)
                # and has to be transformed to the world coordinate.
                instance_rotation, instance_translation, instance_scale, instance_vertices_3d = instances[instance_id]
                
                box_transformation = np.eye(4)
                box_transformation[:3, :3] = np.reshape(instance_rotation, (3, 3))
                box_transformation[:3, -1] = instance_translation
                vertices_3d = instance_vertices_3d * instance_scale.T;
                # Homogenize the points
                vertices_3d_homg = np.concatenate((vertices_3d, np.ones_like(vertices_3d[:, :1])), axis=-1).T
                # Transform the homogenious 3D vertices with the box transformation
                box_vertices_3d_world = np.matmul(box_transformation, vertices_3d_homg) 
                
                # If we transform these vertices to the camera frame, we get the 3D keypoints in the annotation data
                # i.e. vertices_3d_cam == points_3d
                vertices_3d_cam = np.matmul(frame_view_matrix, box_vertices_3d_world)
                vertices_2d_proj = np.matmul(frame_projection_matrix, vertices_3d_cam)
                
                # Project the points
                points2d_ndc = vertices_2d_proj[:-1, :] / vertices_2d_proj[-1, :]
                points2d_ndc = points2d_ndc.T
                
                # Convert the 2D Projected points from the normalized device coordinates to pixel values
                x = points2d_ndc[:, 1]
                y = points2d_ndc[:, 0]
                points2d = np.copy(points2d_ndc)
                points2d[:, 0] = ((1 + x) * 0.5) * width
                points2d[:, 1] = ((1 + y) * 0.5) * height    
                points_2d.append(points2d.astype(int))
                # points2d are the projected 3D points on the image plane.


            # 2D bounding box
            x_min, y_min, x_max, y_max = get_x_y(points_2d, num_instances)

            # Change x_max and y_min to be compatible with the SUNRGBD3D format
            x_max = np.array(x_max) - np.array(x_min)
            y_max = np.array(y_max) - np.array(y_min)

            # Compute the 3D points in SUNRGBD3D format
            points_3d = np.array(points_3d)
            points_3d = points_3d.reshape(num_instances, -1, 3)
            # print('points_3d shape: ' + str(points_3d.shape))

            for instance_id in range(num_instances):
                if points_3d.shape[1] < 6:
                    continue

                # print('Here')
                centroid = points_3d[instance_id][0]

                p1 = points_3d[instance_id][1]
                p2 = points_3d[instance_id][2]
                p3 = points_3d[instance_id][3]
                p5 = points_3d[instance_id][5]

                # width
                w = point_to_plane_distance(centroid, p1, p2, p5)

                # length
                l = point_to_plane_distance(centroid, p1, p2, p3)

                # height
                h = point_to_plane_distance(centroid, p1, p3, p5)

                # We are not interested in the orientation part
                orientation = np.ones((2))


                # Write a line in the new annotation file
                f.write(obj + ' ')                            # data[0]

                f.write(str(int(x_min[instance_id])) + ' ')   # data[1]
                f.write(str(int(y_min[instance_id])) + ' ')   # data[2]
                f.write(str(int(x_max[instance_id])) + ' ')   # data[3]
                f.write(str(int(y_max[instance_id])) + ' ')   # data[4]

                f.write(str(float(centroid[0])) + ' ')        # data[5]
                f.write(str(float(centroid[1])) + ' ')        # data[6]
                f.write(str(float(centroid[2])) + ' ')        # data[7]

                f.write(str(float(w)) + ' ')                  # data[8]
                f.write(str(float(l)) + ' ')                  # data[9]
                f.write(str(float(h)) + ' ')                  # data[10]
                # f.write(str(float(h)) + ' ')                  # data[8]
                # f.write(str(float(l)) + ' ')                  # data[9]
                # f.write(str(float(w)) + ' ')                  # data[10]

                f.write(str(float(orientation[0])) + ' ')     # data[11]
                f.write(str(float(orientation[1])))           # data[12]

                f.write('\n')

            f.close()

            # Update index value
            index += 1
            test_counter += 1


    if counter != 0:
        print(obj + ': (' + str(mean_x / float(counter)) + ',' + str(mean_y / float(counter)) + ',' + str(mean_z / float(counter)) + ')')
    else:
        print(obj + ': (0, 0, 0)')

    return index
!ls

drive  Objectron  sample_data


In [None]:
!rm /content/train_data_idx.txt
!rm /content/val_data_idx.txt

rm: cannot remove '/content/train_data_idx.txt': No such file or directory
rm: cannot remove '/content/val_data_idx.txt': No such file or directory


In [None]:
index = convert_objectron_data_sunrgb3d('shoe', 0, max_no=10)

In [None]:
max_no = 300
index = 0
index = convert_objectron_data_sunrgb3d('book', index, max_no=max_no)
print(index)
index = convert_objectron_data_sunrgb3d('bottle', index, max_no=max_no)
print(index)
index = convert_objectron_data_sunrgb3d('camera', index, max_no=max_no)
print(index)
index = convert_objectron_data_sunrgb3d('cereal_box', index, max_no=max_no)
print(index)
index = convert_objectron_data_sunrgb3d('cup', index, max_no=max_no)
print(index)
index = convert_objectron_data_sunrgb3d('laptop', index, max_no=max_no)
print(index)
index = convert_objectron_data_sunrgb3d('shoe', index, max_no=max_no)
print(index)


[book/batch-47/25] has: 277 frames
Train counter: 0
Train counter: 1
Train counter: 2
Train counter: 3
Train counter: 4
Train counter: 5
Train counter: 6
Train counter: 7
Train counter: 8
Train counter: 9
[book/batch-25/19] has: 272 frames
Train counter: 10
Train counter: 11
Train counter: 12
Train counter: 13
Train counter: 14
Train counter: 15
Train counter: 16
Train counter: 17
Train counter: 18
Train counter: 19
[book/batch-48/2] has: 309 frames
Train counter: 20
Train counter: 21
Train counter: 22
Train counter: 23
Train counter: 24
Train counter: 25
Train counter: 26
Train counter: 27
Train counter: 28
Train counter: 29
Train counter: 30
[book/batch-49/9] has: 254 frames
Train counter: 31
Train counter: 32
Train counter: 33
Train counter: 34
Train counter: 35
Train counter: 36
Train counter: 37
Train counter: 38
Train counter: 39
[book/batch-15/0] has: 257 frames
Train counter: 40
Train counter: 41
Train counter: 42
Train counter: 43
Train counter: 44
Train counter: 45
Train coun

In [None]:
!cp /content/train_data_idx.txt /content/drive/MyDrive/mmdet3d-Objectron/data/sunrgbd/sunrgbd_trainval/train_data_idx.txt
!cp /content/val_data_idx.txt /content/drive/MyDrive/mmdet3d-Objectron/data/sunrgbd/sunrgbd_trainval/val_data_idx.txt

In [None]:
test_points.bin


TEST

In [1]:
!pip3 install openmim
!mim install mmcv-full
!mim install mmdet
!mim install mmsegmentation

# Copyright (c) OpenMMLab. All rights reserved.
from concurrent import futures as futures
from os import path as osp

import mmcv
import numpy as np
from scipy import io as sio

def random_sampling(points, num_points, replace=None, return_choices=False):
    """Random sampling.

    Sampling point cloud to a certain number of points.

    Args:
        points (ndarray): Point cloud.
        num_points (int): The number of samples.
        replace (bool): Whether the sample is with or without replacement.
        return_choices (bool): Whether to return choices.

    Returns:
        points (ndarray): Point cloud after sampling.
    """

    if replace is None:
        replace = (points.shape[0] < num_points)
    choices = np.random.choice(points.shape[0], num_points, replace=replace)
    if return_choices:
        return points[choices], choices
    else:
        return points[choices]

class SUNRGBDInstance(object):

    def __init__(self, line):
        data = line.split(' ')
        data[1:] = [float(x) for x in data[1:]]
        self.classname = data[0]
        self.xmin = data[1]
        self.ymin = data[2]
        self.xmax = data[1] + data[3]
        self.ymax = data[2] + data[4]
        self.box2d = np.array([self.xmin, self.ymin, self.xmax, self.ymax])
        self.centroid = np.array([data[5], data[6], data[7]])
        self.width = data[8]
        self.length = data[9]
        self.height = data[10]
        # data[9] is x_size (length), data[8] is y_size (width), data[10] is
        # z_size (height) in our depth coordinate system,
        # l corresponds to the size along the x axis
        self.size = np.array([data[9], data[8], data[10]]) * 2
        self.orientation = np.zeros((3, ))
        self.orientation[0] = 1
        self.orientation[1] = 1
        self.heading_angle = np.arctan2(self.orientation[1],
                                        self.orientation[0])
        self.box3d = np.concatenate(
            [self.centroid, self.size, self.heading_angle[None]])


class SUNRGBDData(object):
    """SUNRGBD data.

    Generate scannet infos for sunrgbd_converter.

    Args:
        root_path (str): Root path of the raw data.
        split (str, optional): Set split type of the data. Default: 'train'.
        use_v1 (bool, optional): Whether to use v1. Default: False.
    """

    def __init__(self, root_path, split='train', use_v1=False):
        self.root_dir = root_path
        self.split = split
        self.split_dir = osp.join(root_path, 'sunrgbd_trainval')
        self.classes = ['book', 'bottle', 'camera', 'cereal_box', 'cup', 'laptop', 'shoe']
        self.cat2label = {cat: self.classes.index(cat) for cat in self.classes}
        self.label2cat = {
            label: self.classes[label]
            for label in range(len(self.classes))
        }
        assert split in ['train', 'val', 'test']
        split_file = osp.join(self.split_dir, f'{split}_data_idx.txt')
        mmcv.check_file_exist(split_file)
        self.sample_id_list = map(int, mmcv.list_from_file(split_file))
        self.image_dir = osp.join(self.split_dir, 'image')
        self.calib_dir = osp.join(self.split_dir, 'calib')
        self.depth_dir = osp.join(self.split_dir, 'depth')
        if use_v1:
            self.label_dir = osp.join(self.split_dir, 'label_v1')
        else:
            self.label_dir = osp.join(self.split_dir, 'label')

    def __len__(self):
        return len(self.sample_id_list)

    def get_image(self, idx):
        img_filename = osp.join(self.image_dir, f'{idx:06d}.jpg')
        return mmcv.imread(img_filename)

    def get_image_shape(self, idx):
        image = self.get_image(idx)
        return np.array(image.shape[:2], dtype=np.int32)

    def get_calibration(self, idx):
        calib_filepath = osp.join(self.calib_dir, f'{idx:06d}.txt')
        lines = [line.rstrip() for line in open(calib_filepath)]
        Rt = np.array([float(x) for x in lines[0].split(' ')])
        Rt = np.reshape(Rt, (3, 3), order='F').astype(np.float32)
        K = np.array([float(x) for x in lines[1].split(' ')])
        K = np.reshape(K, (3, 3), order='F').astype(np.float32)
        return K, Rt

    def get_label_objects(self, idx):
        label_filename = osp.join(self.label_dir, f'{idx:06d}.txt')
        lines = [line.rstrip() for line in open(label_filename)]
        objects = [SUNRGBDInstance(line) for line in lines]
        return objects

    def get_infos(self, num_workers=4, has_label=True, sample_id_list=None):
        """Get data infos.

        This method gets information from the raw data.

        Args:
            num_workers (int, optional): Number of threads to be used.
                Default: 4.
            has_label (bool, optional): Whether the data has label.
                Default: True.
            sample_id_list (list[int], optional): Index list of the sample.
                Default: None.

        Returns:
            infos (list[dict]): Information of the raw data.
        """

        def process_single_scene(sample_idx):
            print(f'{self.split} sample_idx: {sample_idx}')

            info = dict()
            pc_info = {'num_features': 3, 'lidar_idx': sample_idx}
            info['point_cloud'] = pc_info

            info['pts_path'] = osp.join('points', f'{sample_idx:06d}.bin')
            img_path = osp.join('image', f'{sample_idx:06d}.jpg')
            image_info = {
                'image_idx': sample_idx,
                'image_shape': self.get_image_shape(sample_idx),
                'image_path': img_path
            }
            info['image'] = image_info

            K, Rt = self.get_calibration(sample_idx)
            calib_info = {'K': K, 'Rt': Rt}
            info['calib'] = calib_info

            if has_label:
                obj_list = self.get_label_objects(sample_idx)
                annotations = {}
                annotations['gt_num'] = len([
                    obj.classname for obj in obj_list
                    if obj.classname in self.cat2label.keys()
                ])
                if annotations['gt_num'] != 0:
                    annotations['name'] = np.array([
                        obj.classname for obj in obj_list
                        if obj.classname in self.cat2label.keys()
                    ])
                    annotations['bbox'] = np.concatenate([
                        obj.box2d.reshape(1, 4) for obj in obj_list
                        if obj.classname in self.cat2label.keys()
                    ],
                                                         axis=0)
                    annotations['location'] = np.concatenate([
                        obj.centroid.reshape(1, 3) for obj in obj_list
                        if obj.classname in self.cat2label.keys()
                    ],
                                                             axis=0)
                    annotations['dimensions'] = 2 * np.array([
                        [obj.length, obj.width, obj.height] for obj in obj_list
                        if obj.classname in self.cat2label.keys()
                    ])  # lwh (depth) format
                    annotations['rotation_y'] = np.array([
                        obj.heading_angle for obj in obj_list
                        if obj.classname in self.cat2label.keys()
                    ])
                    annotations['index'] = np.arange(
                        len(obj_list), dtype=np.int32)
                    annotations['class'] = np.array([
                        self.cat2label[obj.classname] for obj in obj_list
                        if obj.classname in self.cat2label.keys()
                    ])
                    annotations['gt_boxes_upright_depth'] = np.stack(
                        [
                            obj.box3d for obj in obj_list
                            if obj.classname in self.cat2label.keys()
                        ],
                        axis=0)  # (K,8)
                info['annos'] = annotations
            return info

        sample_id_list = sample_id_list if \
            sample_id_list is not None else self.sample_id_list
        with futures.ThreadPoolExecutor(num_workers) as executor:
            infos = executor.map(process_single_scene, sample_id_list)
        return list(infos)

Looking in indexes: https://pypi.org/simple, https://us-python.pkg.dev/colab-wheels/public/simple/
Collecting openmim
  Downloading openmim-0.2.0-py2.py3-none-any.whl (49 kB)
[K     |████████████████████████████████| 49 kB 4.0 MB/s 
Collecting rich
  Downloading rich-12.4.4-py3-none-any.whl (232 kB)
[K     |████████████████████████████████| 232 kB 18.8 MB/s 
Collecting model-index
  Downloading model_index-0.1.11-py3-none-any.whl (34 kB)
Collecting colorama
  Downloading colorama-0.4.5-py2.py3-none-any.whl (16 kB)
Collecting ordered-set
  Downloading ordered_set-4.1.0-py3-none-any.whl (7.6 kB)
Collecting commonmark<0.10.0,>=0.9.0
  Downloading commonmark-0.9.1-py2.py3-none-any.whl (51 kB)
[K     |████████████████████████████████| 51 kB 6.6 MB/s 
Installing collected packages: ordered-set, commonmark, rich, model-index, colorama, openmim
Successfully installed colorama-0.4.5 commonmark-0.9.1 model-index-0.1.11 openmim-0.2.0 ordered-set-4.1.0 rich-12.4.4
Looking in indexes: https://py

In [5]:
import os
pkl_prefix='sunrgbd'

train_filename = os.path.join('./', f'{pkl_prefix}_infos_train.pkl')
val_filename = os.path.join('./', f'{pkl_prefix}_infos_val.pkl')

# train_dataset = SUNRGBDData(root_path='/content/drive/MyDrive/Objectron-to-SUNRGBD3D-format/sunrgbd', split='train', use_v1=True)
val_dataset = SUNRGBDData(root_path='/content/', split='val', use_v1=True)

In [7]:
# infos_train = train_dataset.get_infos(num_workers=1, has_label=True)
# mmcv.dump(infos_train, train_filename, 'pkl')
# print(f'{pkl_prefix} info train file is saved to {train_filename}')

infos_val = val_dataset.get_infos(num_workers=1, has_label=True)
mmcv.dump(infos_val, val_filename, 'pkl')
print(f'{pkl_prefix} info val file is saved to {val_filename}')

sunrgbd info val file is saved to ./sunrgbd_infos_val.pkl


In [None]:
!cp /content/sunrgbd_infos_train.pkl /content/drive/MyDrive/mmdet3d-Objectron/data/sunrgbd/sunrgbd_infos_train.pkl 
!cp /content/sunrgbd_infos_val.pkl /content/drive/MyDrive/mmdet3d-Objectron/data/sunrgbd/sunrgbd_infos_val.pkl