In [31]:
import numpy as np
import cv2 as cv

# The given video and calibration data
input_file = '../Desktop/chessboard.mp4'
K = np.array([[1.97515271e+03, 0.00000000e+00, 5.49311781e+02],
              [0.00000000e+00, 1.97406482e+03, 9.45237271e+02],
              [0.00000000e+00, 0.00000000e+00, 1.00000000e+00]])
dist_coeff = np.array([0.0104491, -1.07101685, -0.00143694, -0.01396964, 1.3123999])
board_pattern = (9, 6)
board_cellsize = 0.025
board_criteria = cv.CALIB_CB_ADAPTIVE_THRESH + cv.CALIB_CB_NORMALIZE_IMAGE + cv.CALIB_CB_FAST_CHECK

# Open a video
video = cv.VideoCapture(input_file)
assert video.isOpened(), 'Cannot read the given input, ' + input_file

# Prepare 3D points on a chessboard
obj_points = board_cellsize * np.array([[c, r, 0] for r in range(board_pattern[1]) for c in range(board_pattern[0])])

# Define initial 3D box shapes
box_lower_1 = board_cellsize * np.array([[4, 2,  0], [7, 2,  0], [7, 4,  0], [6, 4,  0], [6, 3,  0], [4, 3,  0]])
box_upper_1 = board_cellsize * np.array([[4, 2, -1], [7, 2, -1], [7, 4, -1], [6, 4, -1], [6, 3, -1], [4, 3, -1]])

box_lower_2 = board_cellsize * np.array([[1, 2,  0], [2, 3,  0], [2, 5,  0], [1, 3,  0]])
box_upper_2 = board_cellsize * np.array([[1, 2, -1], [2, 3, -1], [2, 5, -1], [1, 3, -1]])

box_lower_3 = board_cellsize * np.array([[2, 5,  0], [3, 4,  0], [2, 4,  0], [2, 5,  0]])
box_upper_3 = board_cellsize * np.array([[2, 5, -1], [3, 4, -1], [2, 4, -1], [2, 5, -1]])

# Define previous 3D box shapes
prev_box_lower = np.copy(box_lower_1)
prev_box_upper = np.copy(box_upper_1)

# Create a named window
cv.namedWindow('Pose Estimation (Chessboard)', cv.WINDOW_NORMAL)
cv.resizeWindow('Pose Estimation (Chessboard)', 1080, 960)

# Run pose estimation
box_index = 1  # Current box index
while True:
    # Read an image from the video
    valid, img = video.read()
    if not valid:
        break

    # Estimate the camera pose
    complete, img_points = cv.findChessboardCorners(img, board_pattern, board_criteria)
    if complete:
        ret, rvec, tvec = cv.solvePnP(obj_points, img_points, K, dist_coeff)

        # Check if 'q' key is pressed to change the box shape
        key = cv.waitKey(1)
        if key == ord('q'):
            # Cycle through the box shapes
            prev_box_lower = np.copy(box_lower_1)
            prev_box_upper = np.copy(box_upper_1)
            if box_index == 1:
                box_lower = np.copy(box_lower_2)
                box_upper = np.copy(box_upper_2)
                box_index = 2
            elif box_index == 2:
                box_lower = np.copy(box_lower_3)
                box_upper = np.copy(box_upper_3)
                box_index = 3
            elif box_index == 3:
                box_lower = np.copy(box_lower_1)
                box_upper = np.copy(box_upper_1)
                box_index = 1
        elif key == ord('e'):
            # Cycle through the box shapes
            prev_box_lower = np.copy(box_lower_1)
            prev_box_upper = np.copy(box_upper_1)
            if box_index == 1:
                box_lower = np.copy(box_lower_3)
                box_upper = np.copy(box_upper_3)
                box_index = 3
            elif box_index == 2:
                box_lower = np.copy(box_lower_1)
                box_upper = np.copy(box_upper_1)
                box_index = 1
            elif box_index == 3:
                box_lower = np.copy(box_lower_2)
                box_upper = np.copy(box_upper_2)
                box_index = 2

        # Draw the box on the image
        line_lower, _ = cv.projectPoints(box_lower, rvec, tvec, K, dist_coeff)
        line_upper, _ = cv.projectPoints(box_upper, rvec, tvec, K, dist_coeff)
        cv.polylines(img, [np.int32(line_lower)], True, (255, 0, 0), 2)
        cv.polylines(img, [np.int32(line_upper)], True, (0, 0, 255), 2)
        for b, t in zip(line_lower, line_upper):
            cv.line(img, np.int32(b.flatten()), np.int32(t.flatten()), (0, 255, 0), 2)

        # Print the camera position
        R, _ = cv.Rodrigues(rvec)
        p = (-R.T @ tvec).flatten()
        info = f'XYZ: [{p[0]:.3f} {p[1]:.3f} {p[2]:.3f}]'
        cv.putText(img, info, (10, 25), cv.FONT_HERSHEY_DUPLEX, 0.6, (0, 255, 0))

    # Show the image and process the key event
    cv.imshow('Pose Estimation (Chessboard)', img)
    if cv.waitKey(1) == 27:  # ESC
        break

video.release()
cv.destroyAllWindows()