## Implementation of Zhang's camera calibration method

### Initial estimation of intrinsic and extrinsic parameters

In [1]:
import cv2
import os
import glob
import matplotlib.pyplot as plt
import numpy as np
from numpy.linalg import svd
import cv2
import numpy as np
from scipy.optimize import least_squares

In [2]:
def get_img_paths(images_path: str) -> list[str]:
    """Helper function to get image path from a folder."""
    patterns = ['*.jpg', '*.jpeg', '*.png']
    images_list = []
    for pattern in patterns:
        images_list.extend(glob.glob(os.path.join(images_path, pattern)))
    return images_list

In [3]:
def get_world_coordinates(pattern_size: tuple[int, int], square_size: int) -> np.ndarray:
    """Function to get chessboard corners' coordinates in the real world."""
    n_corners = pattern_size[0] * pattern_size[1]
    world_X, world_Y = np.meshgrid(range(pattern_size[0]), range(pattern_size[1]))
    world_X = world_X.reshape(n_corners, 1)
    world_Y = world_Y.reshape(n_corners, 1)

    # Corners coordinates in the real world
    return np.array(np.hstack((world_X, world_Y, np.zeros((world_X.shape))), dtype=np.float32)) * square_size


def get_v(h: np.ndarray, i: int, j: int) -> np.ndarray:
    """
    Helper function to get v[i][j]. 
    
    h, i, j are all zero-indexed
    """
    v_ij = np.array([
        h[0, i] * h[0, j],
        h[0, i] * h[1, j] + h[1, i] * h[0, j],
        h[2, i] * h[0, j] + h[0, i] * h[2, j],
        h[1, i] * h[1, j],
        
        h[2, i] * h[1, j] + h[1, i] * h[2, j],
        h[2, i] * h[2, j]
    ])
    return v_ij

def get_B(b: np.ndarray) -> np.ndarray:
    """Helper function to get symmetric B matrix from 6 b's."""
    b_11 = b[0]
    b_12 = b[1]
    b_13 = b[2]
    b_22 = b[3]
    b_23 = b[4]
    b_33 = b[5]

    return np.array([
        [b_11, b_12, b_13],
        [b_12, b_22, b_23],
        [b_13, b_23, b_33]
    ], dtype=np.float32)

def compute_H(img_path: str, 
              pattern_size: tuple[int, int], 
              world_coordinates: np.ndarray, 
              show_images: bool = False) -> tuple[list[np.ndarray], list[np.ndarray]]:
    """
    Function to compute H matrix for every image. 
    
    Returns list of H matrices and list of image points.
    """
    img_list = get_img_paths(img_path)
    
    term_criterion = (cv2.TERM_CRITERIA_EPS + cv2.TERM_CRITERIA_MAX_ITER, 30, 0.001)
    H_matrices, img_points = [], []

    for image_name in img_list:
        img_rgb = cv2.imread(image_name)
        img_grey = cv2.cvtColor(img_rgb, cv2.COLOR_BGR2GRAY)

        ok, corners = cv2.findChessboardCorners(img_grey, pattern_size, None)
        
        if not ok: continue
        
        refined_corners = cv2.cornerSubPix(img_grey, corners, pattern_size, (-1, -1), term_criterion)
        refined_corners = refined_corners.reshape(-1, 2)
        img_points.append(refined_corners)

        H, _ = cv2.findHomography(world_coordinates, refined_corners, cv2.RANSAC, 5.0)
        if H is None: continue

        H = H.astype(np.float32)
        if abs(H[2, 2]) > 0:
            H = H / H[2, 2]
        H_matrices.append(H)
        
        # Show picture of chessboard corners
        if show_images:
            cv2.drawChessboardCorners(img_rgb, pattern_size, refined_corners, True)
            cv2.imshow('img', img_rgb)
            cv2.waitKey(400)

    cv2.destroyAllWindows()

    return H_matrices, img_points

def find_K(H_matrices: list[np.ndarray]) -> np.ndarray:
    """Function to find matrix K from H matrices."""
    V = []

    for h in H_matrices:
        v_12 = get_v(h, 0, 1)
        v_11 = get_v(h, 0, 0)
        v_22 = get_v(h, 1, 1)

        V.append(v_12.T)
        V.append(v_11.T - v_22.T)
    
    V = np.asarray(V, dtype=np.float32)
    U_, sigma, V_ = svd(V)
    b = V_[-1, :] # vector corresponding the the lowest eigenvalue
    
    B = get_B(b)
    # B = 0.5 * (B + B.T)

    # Ensure overall sign so diagonal is positive (b is up to scale)
    if B[0, 0] < 0: B = -B

    # Try Cholesky on B (we want R upper s.t. R.T @ R = B -> K_inv = R -> K = inv(R))
    L = np.linalg.cholesky(B)  # L @ L.T = B
    K = np.linalg.inv(L.T)
    K = K / K[2, 2]
    if K[0, 0] < 0: K = -K

    return K


def find_E(H_matrices: list[np.ndarray], K: np.ndarray) -> list[np.ndarray]:
    """Function to find extrinsic matrix from H matrices and K matrix."""
    K_inv = np.linalg.inv(K)

    rotations = []
    translations = []
    extrinsics = []

    for H in H_matrices:
        H = H.astype(np.float32)
        # ensure homography normalized
        if abs(H[2, 2]) > 0:
            H = H / H[2, 2]

        h1 = H[:, 0]
        h2 = H[:, 1]
        h3 = H[:, 2]

        kinv_h1 = K_inv @ h1
        kinv_h2 = K_inv @ h2
        kinv_h3 = K_inv @ h3

        # scale: average norm of first two columns for robustness
        norm1 = np.linalg.norm(kinv_h1)
        norm2 = np.linalg.norm(kinv_h2)
        if norm1 == 0 or norm2 == 0:
            raise ValueError("Degenerate homography: zero column after applying K^{-1}")

        lam = 1.0 / ((norm1 + norm2) / 2.0)

        r1 = lam * kinv_h1
        r2 = lam * kinv_h2
        r3 = np.cross(r1, r2)
        t = lam * kinv_h3

        # orthonormalize R using SVD to correct noise: R = U @ Vt
        R_approx = np.column_stack((r1, r2, r3))
        U, _, Vt = svd(R_approx)
        R = U @ Vt

        # enforce right-handed frame
        if np.linalg.det(R) < 0:
            R = -1*R
            t = -t

        ext = np.eye(4, dtype=np.float32)
        ext[:3, :3] = R
        ext[:3, 3] = t
        rotations.append(R)
        translations.append(t)
        extrinsics.append(ext)

    return extrinsics

In [4]:
img_path = '../data/imgs/leftcamera'
pattern_size = (11, 7)
square_size = 30
show_images = True
show_progress = 2 # for the refinement step


n_corners = pattern_size[0] * pattern_size[1]

# Corners coordinates in the real world
world_coordinates = get_world_coordinates(pattern_size, square_size)
H_matrices, img_points = compute_H(img_path, pattern_size, world_coordinates, show_images)

K = find_K(H_matrices) # intrinsics
E = find_E(H_matrices, K) # extrinsics

print("Initial K:\n", K)
print("Initial E (for the first image):\n", E[0])

Initial K:
 [[741.11206     -0.91883403 523.6645    ]
 [  0.         752.5023     287.38272   ]
 [  0.           0.           1.        ]]
Initial E (for the first image):
 [[ 9.9968952e-01 -2.1439662e-02  1.2695490e-02 -2.7431265e+02]
 [ 2.0943712e-02  9.9905872e-01  3.7987325e-02 -1.2199510e+02]
 [-1.3497976e-02 -3.7709642e-02  9.9919760e-01  5.5969104e+02]
 [ 0.0000000e+00  0.0000000e+00  0.0000000e+00  1.0000000e+00]]


### Refinement of extrinsic and intrinsic parameters

In [5]:
def _pack_params(K: np.ndarray, dist: np.ndarray, rvecs: list[np.ndarray], tvecs: list[np.ndarray], optimize_skew: bool):
    """
    Helper function to pack intrinsics, distortion and extrinsics into a 1D parameter vector.
    If optimize_skew is False, skew is fixed to 0 and not included.
    """
    if optimize_skew:
        intr = np.array([K[0,0], K[1,1], K[0,1], K[0,2], K[1,2]], dtype=np.float32)  # fx, fy, s, cx, cy
    else:
        intr = np.array([K[0,0], K[1,1], K[0,2], K[1,2]], dtype=np.float32)  # fx, fy, cx, cy (s implicitly 0)

    dist = np.asarray(dist, dtype=np.float32).reshape(-1)  # k1,k2,p1,p2,k3

    ex = []
    for rvec, tvec in zip(rvecs, tvecs):
        ex.append(rvec.reshape(3,))
        ex.append(tvec.reshape(3,))
    ex = np.hstack(ex) if ex else np.array([], dtype=np.float32)

    return np.hstack((intr, dist, ex))

def _unpack_params(x: np.ndarray, n_views: int, optimize_skew: bool):
    """
    Helper function to unpack parameter vector into K, dist, rvecs, tvecs
    """
    idx = 0
    if optimize_skew:
        fx, fy, s, cx, cy = x[idx:idx+5]; idx += 5
    else:
        fx, fy, cx, cy = x[idx:idx+4]; s = 0.0; idx += 4

    K = np.array([[fx, s, cx],
                  [0.0, fy, cy],
                  [0.0, 0.0, 1.0]], dtype=np.float32)

    dist = x[idx:idx+5]; idx += 5

    rvecs = []
    tvecs = []
    for i in range(n_views):
        rvec = x[idx:idx+3]; idx += 3
        tvec = x[idx:idx+3]; idx += 3
        rvecs.append(rvec)
        tvecs.append(tvec)

    return K, dist, rvecs, tvecs

def _reprojection_residuals(x: np.ndarray,
                            n_views: int,
                            world_coords: np.ndarray,
                            img_points_list: list[np.ndarray],
                            optimize_skew: bool):
    """
    Helper function to compute reprojection residuals for least_squares.
    Returns 1D residual vector [u_err0, v_err0, u_err1, v_err1, ...].
    """
    K, dist, rvecs, tvecs = _unpack_params(x, n_views, optimize_skew)
    k1, k2, k3, p1, p2 = dist
    residuals = []

    # world_coords assumed (M,2) with Z=0
    for view_idx in range(n_views):
        img_pts = img_points_list[view_idx]
        rvec = rvecs[view_idx].reshape(3, 1)
        tvec = tvecs[view_idx].reshape(3, )

        R, _ = cv2.Rodrigues(rvec)
        for i, (u_obs, v_obs) in enumerate(img_pts):
            Xw, Yw, _ = world_coords[i]
            Pw = np.array([Xw, Yw, 0.0], dtype=np.float32)
            Pc = R @ Pw + tvec
            Xc, Yc, Zc = Pc
            if abs(Zc) < 1e-12:
                # skip to avoid NaNs (although should'nt happen)
                residuals.extend([0.0, 0.0])
                continue
            x = Xc / Zc
            y = Yc / Zc
            r2 = x*x + y*y
            r4 = r2*r2
            r6 = r4*r2
            radial = 1.0 + k1*r2 + k2*r4 + k3*r6
            x_dist = x*radial + 2*p1*x*y + p2*(r2 + 2*x*x)
            y_dist = y*radial + p1*(r2 + 2*y*y) + 2*p2*x*y

            u_proj = K[0,0]*x_dist + K[0,1]*y_dist + K[0,2]
            v_proj = K[1,1]*y_dist + K[1,2]

            residuals.append(u_obs - u_proj)
            residuals.append(v_obs - v_proj)

    return np.asarray(residuals, dtype=np.float32)

In [6]:
def refine_camera_parameters(img_points_list: list[np.ndarray],
                      world_coords: np.ndarray,
                      K_init: np.ndarray,
                      extrinsics_init: list[np.ndarray],
                      dist_init: np.ndarray = np.zeros(5, dtype=np.float32),
                      optimize_skew: bool = False,
                      verbose: int = 2,
                      max_nfev: int = 200) -> tuple[np.ndarray, np.ndarray, np.ndarray, np.ndarray, np.ndarray]:
    """
    Full nonlinear bundle adjustment optimizing intrinsics fx,fy,cx,cy (and s if optimize_skew=True),
    distortion (k1,k2,p1,p2,k3) and per-view extrinsics (rvec,tvec) using least squares.

    Returns:
    K_opt (3x3), dist_opt (5,), extrinsics_opt (list of 4x4 matrices), rvecs_opt, tvecs_opt
    """
    n_views = len(img_points_list)
    if n_views == 0:
        raise ValueError("No views provided for bundle adjustment")

    rvecs = []
    tvecs = []
    for ext in extrinsics_init:
        R = ext[:3, :3]
        t = ext[:3, 3].reshape(3,)
        rvec, _ = cv2.Rodrigues(R)
        rvecs.append(rvec.reshape(3,))
        tvecs.append(t.reshape(3,))


    x0 = _pack_params(K_init, dist_init, rvecs, tvecs, optimize_skew)

    # run least_squares
    fun = lambda x: _reprojection_residuals(x, n_views, world_coords, img_points_list, optimize_skew)
    res = least_squares(fun, x0, method='lm' if n_views * world_coords.shape[0] * 2 < 2000 else 'trf',
                        verbose=verbose, max_nfev=max_nfev)
    
    K_opt, dist_opt, rvecs_opt, tvecs_opt = _unpack_params(res.x, n_views, optimize_skew)

    # build extrinsics 4x4 list
    extrinsics_opt = []
    for rvec, tvec in zip(rvecs_opt, tvecs_opt):
        R_opt, _ = cv2.Rodrigues(rvec.reshape(3,1))
        E = np.eye(4, dtype=np.float32)
        E[:3,:3] = R_opt
        E[:3,3] = tvec.reshape(3,)
        extrinsics_opt.append(E)

    extrinsics_opt = np.asarray(extrinsics_opt, dtype=np.float32)
    rvecs_opt = np.asarray(rvecs_opt, dtype=np.float32)
    tvecs_opt = np.asarray(tvecs_opt, dtype=np.float32)

    return K_opt, dist_opt, extrinsics_opt, rvecs_opt, tvecs_opt

In [7]:
# Refined values
K_opt, dist_opt, E_opt, rvecs, tvecs = refine_camera_parameters(img_points, world_coordinates, K, E, verbose=show_progress)

# (k1, k2, k3, p1, p2) instead of (k1, k2, p1, p2, k3)
dist_opt = np.array([dist_opt[0], dist_opt[1], dist_opt[3], dist_opt[4], dist_opt[2]])

print("Refined K:\n", K_opt)
print("Refined E (for the first image):\n", E_opt[0])
print("Estimated distortion (k1, k2, k3, p1, p2):", dist_opt)

   Iteration     Total nfev        Cost      Cost reduction    Step norm     Optimality   
       0              1         6.7433e+01                                    7.02e+03    
       1              2         2.8324e+01      3.91e+01       8.90e+01       1.54e+04    
       2              3         2.6762e+01      1.56e+00       1.23e+01       3.02e+02    
       3              4         2.6759e+01      3.12e-03       1.52e+00       9.76e+00    
       4              5         2.6759e+01      9.35e-05       3.78e-01       8.82e+00    
       5              6         2.6759e+01      1.91e-06       5.07e-02       9.47e+00    
       6             10         2.6759e+01      1.91e-06       2.14e-04       1.16e+01    
       7             13         2.6759e+01      0.00e+00       0.00e+00       1.16e+01    
`xtol` termination condition is satisfied.
Function evaluations 13, initial cost 6.7433e+01, final cost 2.6759e+01, first-order optimality 1.16e+01.
Refined K:
 [[718.80774   0.    

### Calculate reprojection error

In [8]:
mean_error = 0
img_list = get_img_paths(img_path)
    
for i in range(len(img_list)):
    imgpoints2, _ = cv2.projectPoints(world_coordinates, rvecs[i], tvecs[i], K_opt, dist_opt)
    error = cv2.norm(img_points[i].reshape((n_corners,1,2)), imgpoints2, cv2.NORM_L2) / len(imgpoints2)
    mean_error += error

print("Mean reprojection error:\n", mean_error/len(img_list))

Mean reprojection error:
 0.02103548549742627


### Undistorting the first image in the dataset

In [10]:
img_rgb = cv2.imread('../data/imgs/leftcamera/Im_L_1.png')
img_undist = cv2.undistort(img_rgb, K_opt, dist_opt, None)
cv2.imshow('undistorted (window will close in 5 seconds)', img_undist)
cv2.waitKey(5000)
cv2.destroyAllWindows()
cv2.imwrite('output/0.png', img_undist)

print('\n\nUndistorted image is in output/0.png')



Undistorted image is in output/0.png


### Real camera parameters

In [11]:
npz = np.load("../data/out/parameters.npz")
print(f"keys: {npz.files}")
for k in npz.files:
    a = npz[k]
    print(f"\n--- {k} ---\nshape: {a.shape}\ndtype: {a.dtype}\nvalues:\n{a}")

keys: ['Transformation', 'Essential', 'Fundamental', 'MeanError', 'SquareSize', 'BoardSize', 'Objpoints', 'L_Intrinsic', 'L_Distortion', 'L_DistortionROI', 'L_DistortionIntrinsic', 'L_RotVektor', 'L_RotMatrix', 'L_Extrinsics', 'L_TransVektor', 'L_Errors', 'L_MeanError', 'R_Intrinsic', 'R_Distortion', 'R_DistortionROI', 'R_DistortionIntrinsic', 'R_RotVektor', 'R_RotMatrix', 'R_Extrinsics', 'R_TransVektor', 'R_Errors', 'R_MeanError', 'L_Imgpoints', 'R_Imgpoints']

--- Transformation ---
shape: (4, 4)
dtype: float64
values:
[[ 9.99371810e-01  7.63663948e-03 -3.46073196e-02  1.89736261e+02]
 [-8.56808784e-03  9.99602837e-01 -2.68468918e-02 -4.52649875e+00]
 [ 3.43885548e-02  2.71265454e-02  9.99040328e-01  4.79116498e+00]
 [ 0.00000000e+00  0.00000000e+00  0.00000000e+00  1.00000000e+00]]

--- Essential ---
shape: (3, 3)
dtype: float64
values:
[[-1.14608628e-01 -4.91205039e+00 -4.39352691e+00]
 [-1.73660059e+00 -5.11030089e+00 -1.89719986e+02]
 [ 2.89797830e+00  1.89695472e+02 -5.25047885e