In [18]:
import numpy as np
import cv2
import skimage.io as skio
import matplotlib.pyplot as plt
import os
import viser
import time

#### 0.1: Calibration

In [19]:
# 3D coordinates of tags in the aruco grid.
tag_3d_coords = {
    4: np.array([
        [0.0, 0.06, 0.0],
        [0.06, 0.06, 0.0],
        [0.06, 0.0, 0.0],
        [0.0, 0.0, 0.0]
    ], dtype=np.float32),
    5: np.array([
        [0.09, 0.06, 0.0],
        [0.15, 0.06, 0.0],
        [0.15, 0.0, 0.0],
        [0.09, 0.0, 0.0]
    ], dtype=np.float32),
    2: np.array([
        [0.0, 0.13572, 0.0],
        [0.06, 0.13572, 0.0],
        [0.06, 0.07572, 0.0],
        [0.0, 0.07572, 0.0]
    ], dtype=np.float32),
    3: np.array([
        [0.09, 0.13572, 0.0],
        [0.15, 0.13572, 0.0],
        [0.15, 0.07572, 0.0],
        [0.09, 0.07572, 0.0]
    ], dtype=np.float32),
    0: np.array([
        [0.0, 0.21144, 0.0],
        [0.06, 0.21144, 0.0],
        [0.06, 0.15144, 0.0],
        [0.0, 0.15144, 0.0]
    ], dtype=np.float32),
    1: np.array([
        [0.09, 0.21144, 0.0],
        [0.15, 0.21144, 0.0],
        [0.15, 0.15144, 0.0],
        [0.09, 0.15144, 0.0]
    ], dtype=np.float32),
}

def get_3d_points_one_im(ids):
    curr_points = []
    for id in ids:
        curr_points.append(tag_3d_coords[id[0]])
    return np.vstack(tuple(curr_points))

In [20]:
def get_images(root, downscale_factor=0.5):
    # Read in the images
    ims = []
    for im_name in os.listdir(root):
        if im_name == '.DS_Store':
            continue
        curr_image = cv2.imread(f"{root}/{im_name}")
        curr_image = cv2.resize(curr_image, None, fx=downscale_factor, fy=downscale_factor)
        ims.append(curr_image)
    return ims

In [21]:
def get_calibration_points(images):
    aruco_dict = cv2.aruco.getPredefinedDictionary(cv2.aruco.DICT_4X4_50)
    aruco_params = cv2.aruco.DetectorParameters()
    detector = cv2.aruco.ArucoDetector(aruco_dict, aruco_params)
    object_points, image_points = [], []
    #curr_idx = 0
    for image in images:
        corners, ids, _ = detector.detectMarkers(image)
        #print(curr_idx)
        if len(corners) != 6:
            continue
        if np.sum(ids > 5) != 0 or np.sum(ids < 0) != 0:
            continue
        curr_image_points = np.vstack(corners).reshape((-1, 2))
        image_points.append(curr_image_points)
        curr_object_points = get_3d_points_one_im(ids)
        object_points.append(curr_object_points)
        #curr_idx += 1
    return object_points, image_points

In [22]:
calibrate_images = get_images("data/0.1/set2")

In [23]:
object_points, image_points = get_calibration_points(calibrate_images)

In [24]:
_, intrinsics_matrix, distortions_vector, _, _ = cv2.calibrateCamera(object_points, image_points, calibrate_images[0].shape[:2][::-1], None, None)

#### 0.3: Perspective and Projection

In [25]:
object_points_central = np.array([
        [0.0, 0.06, 0.0],
        [0.06, 0.06, 0.0],
        [0.06, 0.0, 0.0],
        [0.0, 0.0, 0.0]
    ], dtype=np.float32)

def get_images_and_calibrate(root, K, downscale_factor=0.5):
    # Setup
    aruco_dict = cv2.aruco.getPredefinedDictionary(cv2.aruco.DICT_4X4_50)
    aruco_params = cv2.aruco.DetectorParameters()
    detector = cv2.aruco.ArucoDetector(aruco_dict, aruco_params)

    # Read in the images
    all_ims = []
    image_names = []
    for image_name in os.listdir(root):
        if image_name == '.DS_Store':
            continue
        image = cv2.imread(f'{root}/{image_name}')
        image = cv2.resize(image, None, fx=downscale_factor, fy=downscale_factor)
        all_ims.append(image)
        image_names.append(image_name)

    # find the transform associated with each image.
    good_images = []
    good_image_names = []
    extrinsics = []

    for i in range(len(all_ims)):
        image = all_ims[i]
        image_name = image_names[i]
        corners, ids, _ = detector.detectMarkers(image)

        if ids is None or len(ids) != 1:
            #print(i)
            continue
        if ids[0][0] != 0:
            continue

        image_points = corners[0][0]

        _, rvec, tvec = cv2.solvePnP(object_points_central, image_points, K, None)
        R, _ = cv2.Rodrigues(rvec)
        extrinsics_matrix = np.hstack((R, tvec))
        extrinsics_matrix = np.vstack((extrinsics_matrix, np.array([0.0, 0.0, 0.0, 1.0])))

        good_images.append(image)
        good_image_names.append(image_name)
        extrinsics.append(extrinsics_matrix)
    
    return good_images, good_image_names, extrinsics

In [26]:
def get_c2w_from_w2c(extrinsics):
    res = []
    for matrix in extrinsics:
        res.append(np.linalg.inv(matrix))
    return res

In [27]:
good_images, good_image_names, extrinsics = get_images_and_calibrate("data/0.2/set4", intrinsics_matrix)

In [28]:
c2ws = get_c2w_from_w2c(extrinsics)

Visualize

In [None]:
server = viser.ViserServer(share=True)
H, W = good_images[0].shape[:2]
for i in range(len(good_images)):
    image = good_images[i]
    c2w = c2ws[i]
    server.scene.add_camera_frustum(
        f"/cameras/{i}", # give it a name
        fov=2 * np.arctan2(H / 2, intrinsics_matrix[1, 1]), # field of view
        aspect=W / H, # aspect ratio
        scale=0.02, # scale of the camera frustum change if too small/big
        wxyz=viser.transforms.SO3.from_matrix(c2w[:3, :3]).wxyz, # orientation in quaternion format
        position=c2w[:3, 3], # position of the camera
        image=image[:,:,::-1] # image to visualize
    )

while True:
    time.sleep(0.1)

KeyboardInterrupt: 

In [13]:
server.stop()

#### 0.4: Undistorting images and creating a dataset

In [29]:
# Example: Handling black boundaries from undistortion
h, w = good_images[0].shape[:2]
# alpha=1 keeps all pixels (more black borders), alpha=0 crops maximally
new_camera_matrix, roi = cv2.getOptimalNewCameraMatrix(
    intrinsics_matrix, distortions_vector, (w, h), alpha=0
)

undistorted_images = []
for i in range(len(good_images)):
    undistorted_image = cv2.undistort(good_images[i], intrinsics_matrix, distortions_vector, None, new_camera_matrix)
    x, y, w_roi, h_roi = roi
    undistorted_image = undistorted_image[y:y+h_roi, x:x+w_roi]
    undistorted_images.append(undistorted_image)

# Update the principal point to account for the crop offset
new_camera_matrix[0, 2] -= x  # cx
new_camera_matrix[1, 2] -= y  # cy

In [30]:
new_camera_matrix

array([[2.08641135e+03, 0.00000000e+00, 1.06381815e+03],
       [0.00000000e+00, 2.08357622e+03, 1.42860556e+03],
       [0.00000000e+00, 0.00000000e+00, 1.00000000e+00]])

In [14]:
undistorted_images_arr = np.array(undistorted_images)
c2ws_arr = np.array(c2ws)

In [15]:
num_data = len(undistorted_images_arr)
num_train = int(num_data * 0.9)
shuffle_indices = np.random.choice(num_data, num_data, replace=False)

undistorted_images_arr = undistorted_images_arr[shuffle_indices]
c2ws_arr = c2ws_arr[shuffle_indices]

images_train = undistorted_images_arr[:num_train]
images_val = undistorted_images_arr[num_train:]
c2ws_train = c2ws_arr[:num_train]
c2ws_val = c2ws_arr[num_train:]

In [17]:
np.savez(
    'data/0.4/persimmon_data2.npz',
    images_train=images_train,    # (N_train, H, W, 3)
    c2ws_train=c2ws_train,        # (N_train, 4, 4)
    images_val=images_val,        # (N_val, H, W, 3)
    c2ws_val=c2ws_val,            # (N_val, 4, 4)
    c2ws_test=None,               # Will generate later
    focal=new_camera_matrix[0,0]                   # float
)

In [102]:
undistorted_images_arr.shape

(53, 2856, 2142, 3)

In [104]:
extrinsics_arr.shape

(53, 4, 4)