In [None]:
import numpy as np
import matplotlib.pyplot as plt
import cv2
np.set_printoptions(suppress= True)

In [None]:
img1_path = "Images/img1_1.45m.png"
img2_path = "Images/img2_1.55m.png"

img1 = cv2.cvtColor(cv2.imread(img1_path), cv2.COLOR_BGR2RGB)
img2 = cv2.cvtColor(cv2.imread(img2_path), cv2.COLOR_BGR2RGB)

diff = cv2.absdiff(img1, img2)

figure, axis = plt.subplots(1, 3, figsize=(15, 5)) 
  
axis[0].imshow(img1)
axis[1].imshow(img2)
axis[2].imshow(diff)

plt.imshow(diff)
plt.show() 


## Coordinates of all Objects

### Cube

Size
X: 1m Y: 1m Z: 1m

Position 
X: 0.5m Y: 0.5m Z: 0.5m

Rotation
X: 0 Y: 0 Z: 0

### Camera1

FOV
75

Position 
X: 1.45m Y: 1.5m Z: 2m

Rotation
X: -25 Y: 25 Z: 0

### Camera2

FOV
75

Position 
X: 1.55m Y: 1.5m Z: 2m

Rotation
X: -25 Y: 25 Z: 0


# Calibrating Cameras

Let our Point be Xw = (1, 1, 1) in WCF

(Coordinate Transformation)

To find Xc = (xc, yc, zc) in Camera Coordinate Frame

(Perspective Projection)

To find Xi = (xi, yi) in Image Coordinates 

Let our point on the 3d object be Xw = (0.5, 0.5, 1)
The pixel coordinates are u,v = (461, 416)

In [24]:
def basis_from_deg(camera_degrees):

    x, y, z = np.deg2rad(camera_degrees)

    rx = np.array([[1, 0, 0],
                   [0, np.cos(x), -np.sin(x)],
                   [0, np.sin(x), np.cos(x)]])
    
    ry = np.array([[np.cos(y), 0, np.sin(y)],
                   [0, 1, 0],
                   [-np.sin(y), 0, np.cos(y)]])

    rz = np.array([[np.cos(z), -np.sin(z), 0],
                   [np.sin(z), np.cos(z), 0],
                   [0, 0, 1]])
    
    basis = np.matmul(rz, np.matmul(ry, rx))
    
    return basis

print(basis_from_deg([0, 0, -20]))

[[ 0.93969262  0.34202014  0.        ]
 [-0.34202014  0.93969262  0.        ]
 [ 0.          0.          1.        ]]


In [26]:
def unproject_position(intrinsic_matrix, camera_rotation, camera_translation, point_position):
    fx, fy, ox, oy = intrinsic_matrix[0][0], intrinsic_matrix[1][1], intrinsic_matrix[0][2], intrinsic_matrix[1][2]
    basis = basis_from_deg(camera_rotation)
    translation = np.array(camera_translation)
    basis_inv = np.linalg.inv(basis)
    translation_inv = -np.dot(basis_inv, translation)
    point_world_position = np.array(point_position)
    point_camera_position = np.dot(basis_inv, point_world_position) + translation_inv
    point_camera_position_homogenous = np.append(point_camera_position, 1)
    projection_matrix = np.array([[(fx/ox), 0, 0, 0],
                              [0, (fy/oy), 0, 0],
                              [0, 0, -1, -1],
                              [0, 0, 0, 0]]).T
    projected_point_homogenous = np.dot(projection_matrix, point_camera_position_homogenous)
    projected_point = projected_point_homogenous[:2] / projected_point_homogenous[3]
    image_pixel_coordinates = np.array([(projected_point[0] * 0.5 + 0.5) * 2 * ox, (-projected_point[1] * 0.5 + 0.5) * 2 * oy])
    return image_pixel_coordinates

In [34]:
int_matrix = np.array([[421.32885985,   0,         576.23080988],
                [  0,         421.18511044, 323.80923872],
                [  0,           0,           1        ]])
camera_rotation = [-3, -50, 153]
camera_translation = [3, 2, 4]
point = [1, 1, 1]

print(unproject_position(int_matrix, camera_rotation, camera_translation, point))


[362.49645924  35.27510267]
