# Image Formation
In this notebook we are going to cover the theory behind the pinhole camera model, the perspective projection equation and the distortion parameters.

In [10]:
%matplotlib widget

from ipywidgets import interact, FloatSlider, Checkbox
import ipywidgets as widgets

import numpy as np

import matplotlib.pyplot as plt
import matplotlib as mpl

from matplotlib.patches import Ellipse, Polygon, Arc
import matplotlib.lines as lines
import matplotlib.patches as mpatches
from matplotlib.collections import PolyCollection

## Thin Lens Equation

In [2]:
class LensSchema:
    def __init__(self, title):
        self.max_A = 10; self.max_Z = 50; self.max_f = 5.0
        self.min_A = 5; self.min_Z = 10; self.min_f = 1.0
        
        self.neg_x_lim = self.min_Z*self.max_f/(self.min_Z-self.max_f)
    
        self.fig = plt.figure(figsize=(9.5,3.5))
        self.fig.tight_layout()
        self.fig.canvas.toolbar_visible = False
        self.fig.canvas.header_visible = False
        self.fig.canvas.footer_visible = False
        
        self.ax = self.fig.gca()
        self.ax.set_title(title)
        self.ax.set_xlim([-self.neg_x_lim, self.max_Z])
        self.ax.set_ylim([-self.max_A, self.max_A])
        self.ax.set_aspect('equal')
        # Coodinate system is a bit weird since z should point to the right: (z,x)
        self.ax.set_xlabel('z') 
        self.ax.set_ylabel('x')
        
        self.ax.grid(True)
        
        self.ax.add_artist(Ellipse((0, 0), 1, 2*self.max_A, color='b', alpha=0.2))

        self.house, = self.ax.plot([], [], color='r')
        
    def draw_house(self, Z, A):
        self.house.set_data([Z-A/3, Z+A/3, Z-A/3, Z+A/3, Z, Z-A/3, Z-A/3, Z+A/3, Z+A/3], 
                            [0, 0, A*2/3, A*2/3, A, A*2/3, 0, A*2/3, 0])

    def clear_house(self, index):
        self.houses[index].set_data([],[])

class ThinLensSchema(LensSchema):
    """
    This class is used throughout this notebook for visualizing formulas for the thin lens
    """
    
    def __init__(self, title="Thin Lens Equation"):
        super().__init__(title)        
        
        self.characteristic_ray_center, = self.ax.plot([], [], color='g')
        self.characteristic_ray_focus, = self.ax.plot([], [], color='g')
        
        self.focal_length = self.max_f
        self.sensor_distance = self.focal_length
        
        self.focal_point, = self.ax.plot([], [], 'ko')
        self.aperture_top, = self.ax.plot([], [], color='k', linewidth=3)
        self.aperture_bot, = self.ax.plot([], [], color='k', linewidth=3)
        self.ray_bundle = self.ax.add_collection(PolyCollection([], closed=True, alpha=0.2, color='y'));
        self.sensor, = self.ax.plot([], [], color='k', linewidth=3)
        
        self.thin_lens_deriv_1, = self.ax.plot([], [], color='y', linestyle='dashed', linewidth=2)
        self.thin_lens_deriv_2, = self.ax.plot([], [], color='m', linestyle='dotted', linewidth=2)
        self.blur_circle_deriv, = self.ax.plot([], [], color='m', linestyle='dotted', linewidth=2)
    
    def set_sensor_distance(self, e):
        self.sensor.set_data([-e, -e], [-5, 5])
        self.sensor_distance = e
        
    def set_focal_length(self, f):
        self.focal_point.set_data([-f],[0])
        self.focal_length = f
    
    def focus(self, distance):
        Z = distance
        f = self.focal_length
        self.set_sensor_distance(Z*f/(Z-f)) # Thin lens equation   
           
    def draw_characteristic_rays(self, Z, A):
        f = self.focal_length
        e = Z*f/(Z-f) # The true focused distance
        self.characteristic_ray_center.set_data([Z, 0, -self.neg_x_lim], [A, A, A-self.neg_x_lim*A/f])
        self.characteristic_ray_focus.set_data([Z, -self.neg_x_lim], [A, A-(Z+self.neg_x_lim)*A/Z])
    
    def clear_characteristic_rays(self):
        self.characteristic_ray_center.set_data([], []) 
        self.characteristic_ray_focus.set_data([], []) 
    
    def draw_ray_bundle(self, Z, A, L):
        self.aperture_top.set_data([0, 0], [L/2, self.max_A])
        self.aperture_bot.set_data([0, 0], [-L/2, -self.max_A])
        f = self.focal_length
        e_true = Z*f/(Z-f) # true focused distance = horizontal poisiton of focus point
        a_true = -A/Z*e_true# vertical position of focus point
        e = self.sensor_distance
   
        self.ray_bundle.set_verts(np.array([[[Z, A], [0, L/2], [-e, L/2-e*(L/2-a_true)/e_true], 
                                             [-e, -L/2-e*(-L/2-a_true)/e_true],[0, -L/2], [Z, A]]]))
    
    def draw_blur_circle_derivation(self, Z, A, L):
        f = self.focal_length
        e_true = Z*f/(Z-f) # true focused distance = horizontal poisiton of focus point
        a_true = -A/Z*e_true# vertical position of focus point
        e = self.sensor_distance
        
        if e_true < e:   
            # Double triangle
            self.blur_circle_deriv.set_data([0, -e, -e, 0, 0], 
                                            [L/2, L/2-e*(L/2-a_true)/e_true, -e*A/Z, 0, L/2])
        else:
            # Single triangle
            self.blur_circle_deriv.set_data([-e, -e, -e_true, 0, 0, -e], 
                                            [-e*A/Z, L/2-e*(L/2-a_true)/e_true, a_true, 0, L/2, L/2-e*(L/2-a_true)/e_true])
        
    def draw_thin_lens_equation_derivation(self, Z, A):
        self.focus(Z)
        self.draw_characteristic_rays(Z, A)
        e = self.sensor_distance
        self.thin_lens_deriv_1.set_data([Z, Z, -e, -e, Z], [A, 0, 0, -e*A/Z, A])
        self.thin_lens_deriv_2.set_data([0, 0, -e, -e, 0], [A, 0, 0, -e*A/Z, A])

### Thin Lens
A thin lens is characterized by two properties:
 * Rays that enter parallel to the axis on one side are deflected so that they pass through the focus point on the other side (and reciprocal)
 * Rays that pass through the center of the lens are not not deflected

An object point is in focus if these two rays intersect in the image plane, i.e. at the sensor.

In [3]:
thin_lens_schema_1  = ThinLensSchema("Thin Lens Rays")

def plot_thin_lens_schema_1(A, Z, f, e):
    thin_lens_schema_1.set_focal_length(f)
    thin_lens_schema_1.set_sensor_distance(e)
    thin_lens_schema_1.draw_house(Z, A)
    thin_lens_schema_1.draw_characteristic_rays(Z, A)

style = {'description_width': '50%'}
interact(plot_thin_lens_schema_1, 
         A=FloatSlider(5, description='Object height, A:', layout={'width': '50%'}, style=style,
                       min=thin_lens_schema_1.min_A, max=thin_lens_schema_1.max_A), 
         Z=FloatSlider(10, description='Object distance, Z:', layout={'width': '50%'}, style=style,
                       min=thin_lens_schema_1.min_Z, max=thin_lens_schema_1.max_Z), 
         f=FloatSlider(5,  description='Focal length, f:', layout={'width': '50%'}, style=style,
                       min=thin_lens_schema_1.min_f, max=thin_lens_schema_1.max_f),
         e=FloatSlider(5,  description='Sensor to optical center, e:', layout={'width': '50%'}, style=style,
                       min=2*thin_lens_schema_1.min_f, max=2*thin_lens_schema_1.max_f));

Canvas(toolbar=Toolbar(toolitems=[('Home', 'Reset original view', 'home', 'home'), ('Back', 'Back to previous …

interactive(children=(FloatSlider(value=5.0, description='Object height, A:', layout=Layout(width='50%'), max=…

### Derivation of the Thin Lens Equation
The thin lens equation relates f, Z, and e so that points at distance Z are in focus. We denote the object height by A and the position of the projection by a. By using similar triangles (see figure below) we get:
 * Yellow dashed triangles: $ \frac{a}{A}=\frac{e}{Z}$
 * Red dotted triangles: $ \frac{a}{A}=\frac{e-f}{f}=\frac{e}{f}-1$ 
 
Combining both equations yiels the thin lens equation
$$ \frac{1}{f}=\frac{1}{Z}+\frac{1}{e} $$

In [4]:
nominal_Z = 15; nominal_f = 5; nominal_A = 7.5;
thin_lens_derivation_schema  = ThinLensSchema("Thin Lens Equation Derivation")
thin_lens_derivation_schema.set_focal_length(nominal_f)
thin_lens_derivation_schema.draw_house(nominal_Z, nominal_A)
thin_lens_derivation_schema.draw_thin_lens_equation_derivation(nominal_Z, nominal_A)

Canvas(toolbar=Toolbar(toolitems=[('Home', 'Reset original view', 'home', 'home'), ('Back', 'Back to previous …


## Aperture & Blur Circle
The aperture controls how much light can enter the camera and also controls the depth of field, i.e. the range around the focused distance that is sharp. In the following figure the tip of house is in focused (as above) but we are interested in the upper left corner of the house. The green lines are the rays through the lens center and focus point. We see that they intersect in front of the sensor which means that the corner is not in focus. All other rays will also pass through this (virtual) intersection. The yellow region is the collection of all rays that pass through the aperture. One can see how changing the aperture influences the size of the projection of the corner (i.e. the blur circle).

In [5]:
aperture_schema  = ThinLensSchema("Effect of Aperture")

def plot_aperture_schema(L, show_rays):
    aperture_schema.set_focal_length(nominal_f)
    aperture_schema.focus(nominal_Z)
    aperture_schema.draw_house(nominal_Z, nominal_A)
    if show_rays:
        aperture_schema.draw_characteristic_rays(nominal_Z-nominal_A/3, 2/3*nominal_A)
    else:
        aperture_schema.clear_characteristic_rays()
    aperture_schema.draw_ray_bundle(nominal_Z-nominal_A/3, 2/3*nominal_A, L)
    
interact(plot_aperture_schema, 
         L=FloatSlider(5, description='Aperture diameter, L:', layout={'width': '50%'}, style=style, min=0, max=20),
         show_rays=Checkbox(value=True, description='Show rays', style=style));

Canvas(toolbar=Toolbar(toolitems=[('Home', 'Reset original view', 'home', 'home'), ('Back', 'Back to previous …

interactive(children=(FloatSlider(value=5.0, description='Aperture diameter, L:', layout=Layout(width='50%'), …

### Blur Circle Formula
We denote the horizontal distance between the image plane and the intersection point as $\delta$ and the radius of the blur circle by R. By using similar triangles (see below) again we obtain: $ \frac{0.5 L}{e}=\frac{R}{\delta}$. The radius of the blur circle is therefore
$$ R = \frac{L\delta}{2e} $$

In [6]:
blur_circle_derivation_schema  = ThinLensSchema("Blur Circle Derivation")
blur_circle_derivation_schema.set_focal_length(nominal_f)
blur_circle_derivation_schema.set_sensor_distance(6)
blur_circle_derivation_schema.draw_house(nominal_Z, nominal_A)
blur_circle_derivation_schema.draw_characteristic_rays(nominal_Z-nominal_A/3, 2/3*nominal_A)
blur_circle_derivation_schema.draw_ray_bundle(nominal_Z-nominal_A/3, 2/3*nominal_A, 15)
blur_circle_derivation_schema.draw_blur_circle_derivation(nominal_Z-nominal_A/3, 2/3*nominal_A, 15)

Canvas(toolbar=Toolbar(toolitems=[('Home', 'Reset original view', 'home', 'home'), ('Back', 'Back to previous …

## From the Thin Lens Equation to the Pinhole Camera Model
The object distance Z is much larger than the focal length in most applications ($Z\approx 1m, f\approx 1cm$). The thin lens equation becomes: $\frac{1}{e}=\frac{1}{f}-\frac{1}{Z}\approx\frac{1}{f}$. Thus
$$e\approx f$$ 
Furthermore, we ignore all effects related to blur and depth of field and assume that all points are in focus. The projection of an object point is therefore uniquely defined by the sensor at distance f and the ray passing through the center of the lens. This is equivalent of having a camera with an infinitely small aperture, a pinhole camera.

In [7]:
class PinholeSchema(LensSchema):
    def __init__(self, title="Pinhole Camera"):
        super().__init__(title)
        
        self.optical_center, = self.ax.plot([0], [0], 'bo')
        self.focus_point, = self.ax.plot([],[], 'ko')
        self.principal_point, = self.ax.plot([],[],'ko')
        
        self.image_plane, = self.ax.plot([], [], color='gray', linewidth=2)
        self.sensor, = self.ax.plot([], [], color='k', linewidth=3)
        
        self.sensor_size = 10;
        
        self.focal_length = self.max_f
        
        self.ray, = self.ax.plot([], [], color='g')
        self.fov_ray_1, = self.ax.plot([], [], color='r')
        self.fov_ray_2, = self.ax.plot([], [], color='r')
        self.fov_angle = self.ax.add_patch(Arc((0,0), 0, 0))
        
        
    def set_focal_length(self, f):
        self.focal_length = f
        
    def set_sensor_size(self, size):
        self.sensor_size = size
    
    def draw_sensor(self):
        f = self.focal_length
        self.focus_point.set_data([-f],[0])
        self.sensor.set_data([-f, -f],[self.sensor_size/2, -self.sensor_size/2])
        
    def clear_sensor(self):
        self.focus_point.set_data([],[])
        self.sensor.set_data([],[])
        
    def draw_image_plane(self):
        f = self.focal_length
        self.principal_point.set_data([f],[0])
        self.image_plane.set_data([f, f],[self.sensor_size/2, -self.sensor_size/2])
        
    def clear_image_plane(self):
        self.principal_point.set_data([],[])
        self.image_plane.set_data([],[])
        
    def draw_ray(self, Z_c, X_c, to=""):
        f = self.focal_length
        if to == "image plane":
            self.ray.set_data([Z_c, 0], [X_c, 0])
        elif to == "sensor":
            self.ray.set_data([Z_c, -f], [X_c, -f*X_c/Z_c])
        else:
            self.ray.set_data([Z_c, -self.neg_x_lim], [X_c, -self.neg_x_lim*X_c/Z_c])
    
    def clear_ray(self):
        self.ray.set_data([],[])
        
    def draw_field_of_view(self):
        theta = 2*np.arctan(self.sensor_size/(2*self.focal_length))
        self.fov_ray_1.set_data([0, self.max_Z], [0, np.tan(theta/2)*self.max_Z])
        self.fov_ray_2.set_data([0, self.max_Z], [0, -np.tan(theta/2)*self.max_Z])
        self.fov_angle.width = self.fov_angle.height = self.focal_length
        self.fov_angle.theta1 = -180/np.pi * theta /2
        self.fov_angle.theta2 = 180/np.pi * theta /2    

It is often more convenient to draw the image plane in front of the sensor so that the coordinates are not flipped. The projection of a point $(X_C, Z_C)$ in camera coordinates to the image plane is given by the following equation (using similar triangles again):
$$ x = f\frac{X_C}{Z_C} $$ 

In [8]:
pinhole_schema_1 = PinholeSchema()

def plot_pinhole_schema1(Z, A, f, sensor, image_plane):
    pinhole_schema_1.set_focal_length(f)
    pinhole_schema_1.draw_house(Z, A)
    pinhole_schema_1.draw_ray(Z, A)
    
    pinhole_schema_1.draw_sensor() if sensor else pinhole_schema_1.clear_sensor()
    pinhole_schema_1.draw_image_plane() if image_plane else pinhole_schema_1.clear_image_plane()

interact(plot_pinhole_schema1, 
         A=FloatSlider(5, description='Object height, X_C:', layout={'width': '50%'}, style=style,
                       min=pinhole_schema_1.min_A, max=pinhole_schema_1.max_A), 
         Z=FloatSlider(10, description='Object distance, Z_C:', layout={'width': '50%'}, style=style,
                       min=pinhole_schema_1.min_Z, max=pinhole_schema_1.max_Z), 
         f=FloatSlider(5,  description='Focal length, f:', layout={'width': '50%'}, style=style,
                       min=pinhole_schema_1.min_f, max=pinhole_schema_1.max_f),
         sensor=Checkbox(value=False, description='Show sensor', style=style),
         image_plane=Checkbox(value=True, description='Show image plane', style=style));

Canvas(toolbar=Toolbar(toolitems=[('Home', 'Reset original view', 'home', 'home'), ('Back', 'Back to previous …

interactive(children=(FloatSlider(value=10.0, description='Object distance, Z_C:', layout=Layout(width='50%'),…

## Field of View
The field of view $\theta$ of a camera is related to the sensor size $W$ and focal length $f$ by:
$$ \tan{\frac{\theta}{2}}=\frac{W}{2f} $$
In photography people often speak of *35mm equivalent focal length* when talking about lenses. The reason for this is that e.g. a 50mm lens gives a smaller field of view (aka. more zoom) when mounted to a camera with a smaller sensor. The reference size is usually a full-frame sensor with size 36mm x 24mm which is equivalent of a 35mm film (for the vintage people out there). Another common sensor size is APS-C (23,7 mm × 15,6mm for Nikon). A well known rule of thumb is the crop factor which means that a 50mm lens on an APS-C sensor is equivalent to a 50mm * 1.5 = 75mm lens on a full-full frame sensor. This factor follows directly from the field of view equation for constant $\theta$: $\frac{W}{2f}=\frac{W\prime}{2f\prime} \Rightarrow f\prime=f*\frac{W\prime}{W}=f*\frac{36}{23.7}\approx f*1.5$

In [9]:
pinhole_fov_schema = PinholeSchema()

def plot_fov_schema(f, W):
    pinhole_fov_schema.set_focal_length(f)
    pinhole_fov_schema.set_sensor_size(W)
    pinhole_fov_schema.draw_image_plane()
    pinhole_fov_schema.draw_field_of_view()

interact(plot_fov_schema,
         f=FloatSlider(5,  description='Focal length, f:', layout={'width': '50%'}, style=style,
                       min=1, max=20),
         W=FloatSlider(5, description='Sensor size, W:', layout={'width': '50%'}, style=style,
                       min=1.0, max=15.0));

Canvas(toolbar=Toolbar(toolitems=[('Home', 'Reset original view', 'home', 'home'), ('Back', 'Back to previous …

interactive(children=(FloatSlider(value=5.0, description='Focal length, f:', layout=Layout(width='50%'), max=2…

## Camera Matrix & Perspective Projection Equation
The following coordinate systems are the standard setup for expressing the perspective projection equation:
 * World coordinate system $(X_w, Y_w, Z_w)$
 * Camera coordinate system $(X_c, Y_c, Z_c)$
 * Image plane coordinates $(x,y)$
 * Pixel coordinates $(u,v)$
 * principal point $O$
 * optical center $C$

<img src="CoordinateSystems.svg" width="30%" align="center"/>

A point $P_c = (X_c, Y_c, Z_c)$ in camera coordinates is projected to the image plane using the forumula of the pinhole camera in both coordinates.
$$ x=f \frac{X_C}{Z_C}, y=f \frac{Y_c}{Z_c} $$
The principal point has pixel coordinates $(u_0, v_0)$ and is usually in the middle of the sensor. We call $k_u, k_v$ the pixel conversion factors which denote the inverse of the pixel size in directions $u, v$. The focal length expressed in pixels are called $\alpha_u, \alpha_v$.
$$ u=u_0+k_ux=u_0+k_uf\frac{X_c}{Z_c}=u_0+\alpha_u\frac{X_c}{Z_c} $$
$$ v=v_0+k_vy=v_0+k_vf\frac{Y_c}{Z_c}=v_0+\alpha_v\frac{Y_c}{Z_c} $$
This can be further rewritten in matrix form using homogeneous coordinates. The resulting matrix $K$ is called **Calibration Matrix** or **Intrinsic Parameter Matrix**.
$$ \lambda \left( \begin{array}{r} u \\ v \\ 1 \end{array}\right) = 
\left( \begin{array}{ccc}\alpha_u & 0 & u_0\\ 0 & \alpha_v & v_0 \\ 0 & 0 & 1\end{array}\right) 
\left( \begin{array}{c} X_c \\ Y_c \\ Z_c \end{array}\right) = K \left( \begin{array}{c} X_c \\ Y_c \\ Z_c \end{array}\right) $$

A point in world coordinates has to be first transformed to the camera frame before the above equation can be applied. $R_{cw}, T_{cw}$ are called **extrinsic parameters**.
$$ \left( \begin{array}{c} X_c \\ Y_c \\ Z_c \end{array}\right) = R_{cw} \left( \begin{array}{c} X_w \\ Y_w \\ Z_w \end{array}\right) + T_{cw} = [R_{cw} | T_{cw}] \left( \begin{array}{c} X_w \\ Y_w \\ Z_w \\ 1\end{array}\right) $$

Combining both equations yields the **Perspective Projection Equation**. The matrix $M$ is called **Projection Matrix**.
$$ \lambda \left( \begin{array}{r} u \\ v \\ 1 \end{array}\right) = K [R_{cw} | T_{cw}] \left( \begin{array}{c} X_c \\ Y_c \\ Z_c \\ 1\end{array}\right) = M \left( \begin{array}{c} X_w \\ Y_w \\ Z_w \\ 1\end{array}\right) $$

### Normalized Image Coordinates
Normalized image coordinates are useful to associate rays to pixels and thus relate a 2D pixel to a 3D vector.
$$ \left( \begin{array}{r} \bar{u} \\ \bar{v}\\ 1 \end{array}\right) = K^{-1} \left( \begin{array}{c} u \\ v \\ 1 \end{array}\right) = \left( \begin{array}{c} \frac{u-u_0}{\alpha_u} \\ \frac{v-v_0}{\alpha_v} \\ 1 \end{array}\right)$$
If the distance $Z_c$ associated to a pixel is known, one can calculate the corresponding point as
$$ Z_C \left( \begin{array}{r} \bar{u} \\ \bar{v}\\ 1 \end{array}\right) = \left( \begin{array}{c} X_c \\ Y_c \\ Z_c \end{array}\right). $$