In [1]:
import plotly
import plotly.graph_objects as go
import numpy as np

Our object will be a wireframe cube.

In [2]:
def get_cube(R: np.ndarray = None, t: np.ndarray = None) -> tuple[list, list]:
    # Get base vertices: unit cube centered at the origin
    vertices = [
        (-0.5,-0.5,-0.5),
        (0.5,-0.5,-0.5),
        (0.5,0.5,-0.5),
        (-0.5,0.5,-0.5),
        (-0.5,-0.5,0.5),
        (0.5,-0.5,0.5),
        (0.5,0.5,0.5),
        (-0.5,0.5,0.5)
    ]
    # Get connectivity (this doesn't change with transformations of the cube)
    edges = [
        (0,1), (1,2), (2,3), (3,0),     # front face
        (4,5), (5,6), (6,7), (7,4),     # back face
        (0,4), (1,5), (2,6), (3,7),     # connecting edges
    ]
    # Apply a rotation
    if R is not None:
        for idx, v in enumerate(vertices):
            r = R @ np.array(v)
            vertices[idx] = (float(r[0]), float(r[1]), float(r[2]))
    # Apply a translation
    if t is not None:
        for idx, v in enumerate(vertices):
            vertices[idx] = (t[0] + v[0], t[1] + v[1], t[2] + v[2])
    
    return vertices, edges

def plot_cube(vertices: list, edges: list, fig: go.Figure):
    v_x = [v[0] for v in vertices]
    v_y = [v[1] for v in vertices]
    v_z = [v[2] for v in vertices]
    
    fig.add_trace(
        go.Scatter3d(
            x=v_x,
            y=v_y,
            z=v_z,
            mode='markers',
            marker=dict(color='red', size=5),\
            showlegend=False,
        )
    )
    for (v1_idx, v2_idx) in edges:
        v1, v2 = vertices[v1_idx], vertices[v2_idx]
        fig.add_trace(
            go.Scatter3d(
                x=[v1[0], v2[0]],
                y=[v1[1], v2[1]],
                z=[v1[2], v2[2]],
                mode='lines',
                line=dict(color='black'),
                showlegend=False,
            )
        )

def _get_image_plane():
    vertices = [
        (-1, -1, 1),
        (1, -1, 1),
        (1, 1, 1),
        (-1, 1, 1)
    ]
    edges = [(0,1), (1,2), (2,3), (3,0)]
    return vertices, edges

def draw_image_plane(fig: go.Figure):
    vertices, edges = _get_image_plane()
    for (v1_idx, v2_idx) in edges:
        v1, v2 = vertices[v1_idx], vertices[v2_idx]
        fig.add_trace(
            go.Scatter3d(
                x=[v1[0], v2[0]],
                y=[v1[1], v2[1]],
                z=[v1[2], v2[2]],
                mode='lines',
                line=dict(color='black', width=0.5),
                showlegend=False,
            )
        )



In [37]:
theta = np.deg2rad(45)
R_y = np.array([
    [np.cos(theta), 0, np.sin(theta)],
    [0, 1, 0],
    [-np.sin(theta), 0, np.cos(theta)]
])
theta = np.deg2rad(25)
R_x = np.array([
    [1, 0, 0],
    [0, np.cos(theta), -np.sin(theta)],
    [0, np.sin(theta), np.cos(theta)]
])


vertices, edges = get_cube(R=R_y @ R_x, t=[0,0,3])

In [38]:
fig = go.Figure()
plot_cube(vertices=vertices, edges=edges, fig=fig)

draw_image_plane(fig)

# Plot camera center
fig.add_trace(go.Scatter3d(x=[0], y=[0], z=[0], mode='markers', marker=dict(size=5, color='black'), showlegend=False))
fig.update_layout(
    scene=dict(
        aspectmode='manual',
        aspectratio=dict(x=1, y=1, z=1), # equal scaling for all axes
        xaxis=dict(range=[-5,5]),
        yaxis=dict(range=[-5,5]),
        zaxis=dict(range=[-2,8]),
        camera=dict(
            eye=dict(x=1,y=-1,z=-1),
            center=dict(x=0,y=0,z=0),
            up=dict(x=0,y=-1,z=0)
        )
    )
)

fig.show()

In [39]:
# Extrinsics are trivial in this case
R = np.eye(3)
t = np.zeros(3)

def do_projection_onto_norm_img_plane(
        points: list,
        extrinsics: tuple[np.ndarray, np.ndarray]
    ) -> np.ndarray:
    """Perform the rigid body transformation and projection steps.
    Note that I do not actually use homogeneous coordinates here.
    """
    R, t = extrinsics

    norm_img_pts = []
    for v in points:
        # Object point in world coordinates
        x_w = np.array(v)
        # Map to camera coordinates (trivial here)
        x_c = R @ x_w + t
        # Project onto normalized image plane
        x_nimg = x_c / x_c[2]

        norm_img_pts.append(x_nimg)

    norm_img_pts = np.stack(norm_img_pts)
    return norm_img_pts

In [41]:
fig = go.Figure()

# Extrinsics are trivial in this case
R = np.eye(3)
t = np.zeros(3)

# Perform projection
nimp = do_projection_onto_norm_img_plane(vertices, extrinsics=(R,t))

fig.add_trace(go.Scatter(
    x=list(nimp[:,0]),
    y=list(nimp[:,1]),
    mode='markers',
    marker=dict(color='red'),
    showlegend=False,
))
for (v1_idx, v2_idx) in edges:
    v1, v2 = nimp[v1_idx, :], nimp[v2_idx, :]
    fig.add_trace(
        go.Scatter(
            x=[v1[0], v2[0]],
            y=[v1[1], v2[1]],
            mode='lines',
            line=dict(color='black', width=0.5),
            showlegend=False,
        )
    )

fig.update_yaxes(autorange='reversed')


fig.show()

In [42]:
fig = go.Figure()

# Plot the object points in 3D
plot_cube(vertices=vertices, edges=edges, fig=fig)

# Plot camera center
fig.add_trace(go.Scatter3d(x=[0], y=[0], z=[0], mode='markers', marker=dict(size=8, color='black'), showlegend=False))

# Draw the image plane
draw_image_plane(fig)

# Draw the projection in the image plane
fig.add_trace(go.Scatter3d(
    x=list(nimp[:,0]),
    y=list(nimp[:,1]),
    z=list(nimp[:,2]),
    mode='markers',
    marker=dict(color='red', size=1),
    showlegend=False,
))
for (v1_idx, v2_idx) in edges:
    v1, v2 = nimp[v1_idx, :], nimp[v2_idx, :]
    fig.add_trace(
        go.Scatter3d(
            x=[v1[0], v2[0]],
            y=[v1[1], v2[1]],
            z=[1, 1],
            mode='lines',
            line=dict(color='black', width=0.5),
            showlegend=False,
        )
    )

fig.update_layout(
    scene=dict(
        aspectmode='manual',
        aspectratio=dict(x=1, y=1, z=1), # equal scaling for all axes
        xaxis=dict(range=[-5,5]),
        yaxis=dict(range=[-5,5]),
        zaxis=dict(range=[-2,8]),
        camera=dict(
            eye=dict(x=0.25,y=-0.25,z=-0.55),
            center=dict(x=0,y=0,z=0),
            up=dict(x=0,y=-1,z=0)
        )
    )
)


def draw_coordinate_frame(fig: go.Figure, length: float = 1.0):
    p = np.array([0,0,0])
    axes = {
        'x': length*np.array([1,0,0]),
        'y': length*np.array([0,1,0]),
        'z': length*np.array([0,0,1]),
    }
    colors = {'x': 'red', 'y': 'green', 'z': 'blue'}

    for axis, endpoint in axes.items():
        fig.add_trace(go.Scatter3d(
            x=[p[0], endpoint[0]],
            y=[p[1], endpoint[1]],
            z=[p[2], endpoint[2]],
            mode='lines',
            line=dict(width=4, color=colors[axis]),
            showlegend=False,
            name=f"{axis}-axis", # for hover
        ))
        fig.add_trace(go.Cone(
            x=[endpoint[0]], y=[endpoint[1]], z=[endpoint[2]],
            u=[endpoint[0]], v=[endpoint[1]], w=[endpoint[2]],
            colorscale=[[0, colors[axis]], [1, colors[axis]]],
            showscale=False,
            showlegend=False,
            name=f"{axis}-axis",
            sizemode='absolute',
            sizeref=0.1,
        ))

draw_coordinate_frame(fig, 0.5)


fig.show()

In [43]:
fig = go.Figure()

# Plot the object points in 3D
plot_cube(vertices=vertices, edges=edges, fig=fig)

# Plot camera center
fig.add_trace(go.Scatter3d(x=[0], y=[0], z=[0], mode='markers', marker=dict(size=5, color='black'), showlegend=False))

# Draw the image plane
draw_image_plane(fig)

# Draw the projection in the image plane
fig.add_trace(go.Scatter3d(
    x=list(nimp[:,0]),
    y=list(nimp[:,1]),
    z=list(nimp[:,2]),
    mode='markers',
    marker=dict(color='red', size=1),
    showlegend=False,
))
for (v1_idx, v2_idx) in edges:
    v1, v2 = nimp[v1_idx, :], nimp[v2_idx, :]
    fig.add_trace(
        go.Scatter3d(
            x=[v1[0], v2[0]],
            y=[v1[1], v2[1]],
            z=[1, 1],
            mode='lines',
            line=dict(color='black', width=0.5),
            showlegend=False,
        )
    )

fig.update_layout(
    scene=dict(
        aspectmode='manual',
        aspectratio=dict(x=1, y=1, z=1), # equal scaling for all axes
        xaxis=dict(range=[-5,5]),
        yaxis=dict(range=[-5,5]),
        zaxis=dict(range=[-2,8]),
        camera=dict(
            eye=dict(x=0,y=0,z=-0.5),
            center=dict(x=0,y=0,z=1),
            up=dict(x=0,y=-1,z=0)
        )
    )
)

draw_coordinate_frame(fig, 0.5)


fig.show()

### Quick Summary

Where are we at now?

We have projected points from a 3D object onto the 2D normalized image plane. This image plane is a canonical, scale-independent coordinate system.

In our camera model, the next step is to transform these normalized coordinates into pixel coordinates on the actual imaging sensor, by applying the intrinsic matrix $K$.

This matrix $K$ incorporates the focal length in pixel units (for scaling); the principal point offset; and any skew.

Think: $K$ connects the geometry of the projected points to real pixel measurements.

Next: play around with different forms of $K$ and see what happens to the pixel coordinates.

## The Intrinsic Camera Matrix $K$

At this point, we have taken our 3D object points --- labeled $X$ --- and projected them onto the normalized image plane, at coordinates $x=\left(x_n, y_n, 1\right) \in \mathbb{R}^3$.

This is not the end of the camera model, though. The camera outputs values at specific pixel locations, and the camera's intrinsic matrix $K$ defines how points in the normalized image plane are converted into pixel coordinates. This can involve stretching in the $x$ and $y$ directions, and shifting.

This transformation is linear:
$$ \tilde{x}_p = K \tilde{x} $$
where $x_p$ is the pixel location where the normalized image plane point $x$ will appear (both vectors are homogeneous vectors, here).

The matrix $K$ is often taken to have the following simple form:
$$ \left[ \begin{array}{c} u \\ v\\ 1\end{array} \right] = \left[ \begin{array}{ccc} f_x & 0 & c_x \\ 0 & f_y & c_y \\ 0 & 0 & 1 \end{array} \right]\left[ \begin{array}{c} x \\ y \\ 1 \end{array} \right] $$
so that $$ \begin{align*} u & = f_x x + c_x \\ v & = f_y y + c_y\end{align*} $$ are the pixel space coordinates.

Interpretation:
- $f_x$ and $f_y$ convert distance on the (normalized) image plane to units of pixels.
- $c_x$ and $c_y$ translate origin of the (normalized) image plane to the image center.

Note that there are two different origins that we discuss here when moving to the pixel coordinate system.
1. The top-left corner of the image has coordinates $\left(0,0\right)$.
2. The principal point/optical center is a point --- in pixel coordinates --- where the optical axis intersects the sensor. It is typically located at the center of the image, near $\left(\frac{W}{2}, \frac{H}{2}\right)$. It may be slightly off due to manufacturing tolerances.

So what $c_x$ and $c_y$ do is shift the optical axis (the normalized image plane's origin) into pixel coordinates. This is just taking $\left(0,0\right)$ in the normalized image plane and mapping it to the center of the image.

In [46]:
# Dimensions of my webcam images, for this demo
HEIGHT, WIDTH = (720, 1280)
c_x, c_y = WIDTH/2, HEIGHT/2

K1 = np.array([
    [500, 0, c_x],
    [0, 500, c_y],
    [0, 0, 1]
])
pix_coords1 = (K1 @ nimp.T).T

K2 = np.array([
    [300, 0, c_x],
    [0, 600, c_y],
    [0, 0, 1]
])

pix_coords2 = (K2 @ nimp.T).T


fig1 = go.Figure()
fig1.add_trace(
    go.Scatter(
        x=list(pix_coords1[:,0]),
        y=list(pix_coords1[:,1]),
        mode='markers',
        marker=dict(color='red'),
        showlegend=False
    ),
)
for (v1_idx, v2_idx) in edges:
    v1, v2 = pix_coords1[v1_idx, :], pix_coords1[v2_idx, :]
    fig1.add_trace(
        go.Scatter(
            x=[v1[0], v2[0]],
            y=[v1[1], v2[1]],
            mode='lines',
            line=dict(color='black', width=0.5),
            showlegend=False,
        ),
    )
fig1.update_layout(
    width=WIDTH/2, height=HEIGHT/2
)
fig1.update_xaxes(range=[0, WIDTH])
fig1.update_yaxes(range=[HEIGHT, 0])

fig1.show()



fig2 = go.Figure()
fig2.add_trace(
    go.Scatter(
        x=list(pix_coords2[:,0]),
        y=list(pix_coords2[:,1]),
        mode='markers',
        marker=dict(color='red'),
        showlegend=False
    ),
)
for (v1_idx, v2_idx) in edges:
    v1, v2 = pix_coords2[v1_idx, :], pix_coords2[v2_idx, :]
    fig2.add_trace(
        go.Scatter(
            x=[v1[0], v2[0]],
            y=[v1[1], v2[1]],
            mode='lines',
            line=dict(color='black', width=0.5),
            showlegend=False,
        ),
    )
fig2.update_layout(
    width=WIDTH/2,
    height=HEIGHT/2,
)
fig2.update_xaxes(range=[0, WIDTH])
fig2.update_yaxes(range=[HEIGHT, 0])

fig2.show()

### Future: Distortion

Real lenses often introduce some distortion to images.

A distortion model can be "plugged in" to our camera model after we get out normalized image coordinates, and before applying the intrinsic matrix $K$.

In [74]:
def distort_nimps(x, y, k1, k2, p1, p2, k3=0):
    r2 = x**2 + y**2
    radial_dist = 1 + k1 * r2 + k2 * r2**2 + k3 * r2**3
    x_dist = x * radial_dist + 2*p1*x*y + p2 * (r2 * 2*x**2)
    y_dist = y * radial_dist + p1 * (r2 + 2*y**2) + 2*p2*x*y
    return np.stack([x_dist, y_dist, np.ones_like(x_dist)]).T

In [75]:
# Dimensions of my webcam images, for this demo
HEIGHT, WIDTH = (720, 1280)
c_x, c_y = WIDTH/2, HEIGHT/2

K = np.array([
    [500, 0, c_x],
    [0, 500, c_y],
    [0, 0, 1]
])
pix_coords = (K @ nimp.T).T

nimp_dist = distort_nimps(nimp[:,0], nimp[:,1], k1=3.8, k2=1.8, p1=0, p2=0)
pix_coords_dist = (K @ nimp_dist.T).T

fig1 = go.Figure()

# Visualize the undistorted projected points
fig1.add_trace(
    go.Scatter(
        x=list(pix_coords[:,0]),
        y=list(pix_coords[:,1]),
        mode='markers',
        marker=dict(color='rgba(255, 0, 0, 0.3)'),
        showlegend=False
    ),
)
for (v1_idx, v2_idx) in edges:
    v1, v2 = pix_coords[v1_idx, :], pix_coords[v2_idx, :]
    fig1.add_trace(
        go.Scatter(
            x=[v1[0], v2[0]],
            y=[v1[1], v2[1]],
            mode='lines',
            line=dict(color='black', width=0.2),
            showlegend=False,
        ),
    )
# Visualize the distorted projected points
fig1.add_trace(
    go.Scatter(
        x=list(pix_coords_dist[:,0]),
        y=list(pix_coords_dist[:,1]),
        mode='markers',
        marker=dict(color='red'),
        showlegend=False
    ),
)
for (v1_idx, v2_idx) in edges:
    v1, v2 = pix_coords_dist[v1_idx, :], pix_coords_dist[v2_idx, :]
    fig1.add_trace(
        go.Scatter(
            x=[v1[0], v2[0]],
            y=[v1[1], v2[1]],
            mode='lines',
            line=dict(color='black', width=0.5),
            showlegend=False,
        ),
    )


fig1.update_layout(
    width=WIDTH/2, height=HEIGHT/2
)
fig1.update_xaxes(range=[0, WIDTH])
fig1.update_yaxes(range=[HEIGHT, 0])

fig1.show()

In [79]:
# Dimensions of my webcam images, for this demo
HEIGHT, WIDTH = (720, 1280)
c_x, c_y = WIDTH/2, HEIGHT/2

K = np.array([
    [500, 0, c_x],
    [0, 500, c_y],
    [0, 0, 1]
])
pix_coords = (K @ nimp.T).T

nimp_dist = distort_nimps(nimp[:,0], nimp[:,1], k1=0, k2=0, p1=0.2, p2=.4)
pix_coords_dist = (K @ nimp_dist.T).T

fig1 = go.Figure()

# Visualize the undistorted projected points
fig1.add_trace(
    go.Scatter(
        x=list(pix_coords[:,0]),
        y=list(pix_coords[:,1]),
        mode='markers',
        marker=dict(color='rgba(255, 0, 0, 0.3)'),
        showlegend=False
    ),
)
for (v1_idx, v2_idx) in edges:
    v1, v2 = pix_coords[v1_idx, :], pix_coords[v2_idx, :]
    fig1.add_trace(
        go.Scatter(
            x=[v1[0], v2[0]],
            y=[v1[1], v2[1]],
            mode='lines',
            line=dict(color='black', width=0.2),
            showlegend=False,
        ),
    )
# Visualize the distorted projected points
fig1.add_trace(
    go.Scatter(
        x=list(pix_coords_dist[:,0]),
        y=list(pix_coords_dist[:,1]),
        mode='markers',
        marker=dict(color='red'),
        showlegend=False
    ),
)
for (v1_idx, v2_idx) in edges:
    v1, v2 = pix_coords_dist[v1_idx, :], pix_coords_dist[v2_idx, :]
    fig1.add_trace(
        go.Scatter(
            x=[v1[0], v2[0]],
            y=[v1[1], v2[1]],
            mode='lines',
            line=dict(color='black', width=0.5),
            showlegend=False,
        ),
    )


fig1.update_layout(
    width=WIDTH/2, height=HEIGHT/2
)
fig1.update_xaxes(range=[0, WIDTH])
fig1.update_yaxes(range=[HEIGHT, 0])

fig1.show()