# 1. Point projection and coordinate transformation

### Import Librarys

In [157]:
import cv2
import numpy as np
import os

### Import images

In [158]:
notebook_dir = os.getcwd()
image_left_path = os.path.join(notebook_dir, 'Additional_files', 'Stereo_images', 'ConstructionSiteLeft', 'image0110_c0.pgm')
image_right_path = os.path.join(notebook_dir, 'Additional_files', 'Stereo_images', 'ConstructionSiteRight', 'image0110_c1.pgm')

image_left = cv2.imread(image_left_path, cv2.IMREAD_GRAYSCALE)
image_right = cv2.imread(image_right_path, cv2.IMREAD_GRAYSCALE)

image_left_display = cv2.normalize(image_left, None, 0, 255, cv2.NORM_MINMAX)
image_right_display = cv2.normalize(image_right, None, 0, 255, cv2.NORM_MINMAX)

### Get Pixels by mouse click

In [159]:
def get_pixel_by_images(num_pixels=1):
    pixel_left = None
    pixel_right = None
    pixel_left2 = None
    pixel_right2 = None

    def mouse_callback(event, x, y, flags, param):
        nonlocal pixel_left, pixel_right, pixel_left2, pixel_right2
        if event == cv2.EVENT_LBUTTONDOWN:
            img, name = param
            print(f"{name}: Klick bei ({x}, {y}) ")
            if name == 'Left Image':
                if pixel_left is None:
                    pixel_left = [x, y]
                elif pixel_left2 is None:
                    pixel_left2 = [x, y]
            elif name == 'Right Image':
                if pixel_right is None:
                    pixel_right = [x, y]
                elif pixel_right2 is None:
                    pixel_right2 = [x, y]

    cv2.namedWindow('Left Image')
    cv2.setMouseCallback('Left Image', mouse_callback, param=(image_left, 'Left Image'))
    cv2.namedWindow('Right Image')
    cv2.setMouseCallback('Right Image', mouse_callback, param=(image_right, 'Right Image'))

    cv2.imshow('Left Image', image_left_display)
    cv2.imshow('Right Image', image_right_display)
    cv2.waitKey(0)
    cv2.destroyAllWindows()

    if num_pixels == 2:
        return pixel_left, pixel_right, pixel_left2, pixel_right2
    else:
        return pixel_left, pixel_right

In [160]:
USE_MOUSE = False  

if USE_MOUSE:
    pixel_left, pixel_right = get_pixel_by_images()
else:
    pixel_left = [250, 250]
    pixel_right = [238, 250]

print("Pixel left image:", pixel_left)
print("Pixel right image:", pixel_right)

Pixel left image: [250, 250]
Pixel right image: [238, 250]


## Solution

### Calculate disparity

In [161]:
def calc_disparity(pixel_left, pixel_right):
    return pixel_left[0] - pixel_right[0]

In [162]:
disparity = calc_disparity(pixel_left, pixel_right)

#disparity = 3
print("Disparity:", disparity)

Disparity: 12


### Read Camera parameters

In [163]:
camera_txt_path = os.path.join(notebook_dir, 'Additional_files', 'Camera.txt')
with open(camera_txt_path, 'r') as f:
    camera_txt_content = f.read()

internal_params = {}
external_params = {}

section = None
for line in camera_txt_content.strip().split('\n'):
    line = line.strip()
    if line == '[INTERNAL]':
        section = 'internal'
    elif line == '[EXTERNAL]':
        section = 'external'
    elif '=' in line and section:
        key, value = line.split('=', 1)
        value_clean = value.split('#', 1)[0].strip()
        try:
            value_num = float(value_clean)
        except ValueError:
            value_num = value_clean  
        if section == 'internal':
            internal_params[key.strip()] = value_num
        elif section == 'external':
            external_params[key.strip()] = value_num

print("Internal params:", internal_params)
print("External params:", external_params)

Internal params: {'F': 820.428, 'SX': 1.0, 'SY': 1.000283, 'X0': 305.278, 'Y0': 239.826}
External params: {'B': 0.308084, 'LATPOS': -0.07, 'HEIGHT': 1.26, 'DISTANCE': 2.0, 'TILT': 0.06, 'YAW': 0.01, 'ROLL': 0.0}


### Camera Matrix

In [164]:
K = np.array([
    [internal_params['F'] / internal_params['SX'], 0, internal_params['X0']],
    [0, internal_params['F'] / internal_params['SY'], internal_params['Y0']],
    [0, 0, 1]
])
K_inv = np.linalg.inv(K)
print("Camera matrix K: \n", K)

Camera matrix K: 
 [[820.428        0.         305.278     ]
 [  0.         820.19588456 239.826     ]
 [  0.           0.           1.        ]]


### Rotation matrix calculation

In [165]:
roll = external_params['ROLL']    
pitch = external_params['TILT']   
yaw = external_params['YAW']      

Rx = np.array([
    [1, 0, 0],
    [0, np.cos(roll), -np.sin(roll)],
    [0, np.sin(roll), np.cos(roll)]
])

Ry = np.array([
    [np.cos(pitch), 0, np.sin(pitch)],
    [0, 1, 0],
    [-np.sin(pitch), 0, np.cos(pitch)]
])

Rz = np.array([
    [np.cos(yaw), -np.sin(yaw), 0],
    [np.sin(yaw), np.cos(yaw), 0],
    [0, 0, 1]
])

R_wc = Rz @ Ry @ Rx
print("Rotation matrix R_wc:\n", R_wc)
R_cw = np.linalg.inv(R_wc)
print("Rotation matrix R_cw_din:\n", R_cw)

Rotation matrix R_wc:
 [[ 9.98150630e-01 -9.99983333e-03  5.99610083e-02]
 [ 9.98183903e-03  9.99950000e-01  5.99630071e-04]
 [-5.99640065e-02  0.00000000e+00  9.98200540e-01]]
Rotation matrix R_cw_din:
 [[ 9.98150630e-01  9.98183903e-03 -5.99640065e-02]
 [-9.99983333e-03  9.99950000e-01  0.00000000e+00]
 [ 5.99610083e-02  5.99630071e-04  9.98200540e-01]]


### Translation vector

In [166]:

t_wc_din = np.array([
    [external_params['DISTANCE']],   
    [external_params['LATPOS']+external_params['B']/2],     # Weiß nicht, ob da noch die Base line dazu muss
    [external_params['HEIGHT']]      
])

print("Translation vector t_wc_din:\n", t_wc_din)
t_cw_din = -t_wc_din

print("Translation vector t_cw_din: \n", t_cw_din)

Translation vector t_wc_din:
 [[2.      ]
 [0.084042]
 [1.26    ]]
Translation vector t_cw_din: 
 [[-2.      ]
 [-0.084042]
 [-1.26    ]]


### Coordinate Calculation Camera


In [167]:
def pixel_to_coordinate(pixel, internal_params, external_params, disparity):
    z = (internal_params['F'] * external_params['B']) / disparity
    x = ((pixel[0] - internal_params['X0']) * internal_params['SX'] * z) / internal_params['F']
    y = ((pixel[1] - internal_params['Y0']) * internal_params['SY'] * z) / internal_params['F']
    
    return np.array([x, y, z])

In [168]:
p_c = pixel_to_coordinate(pixel_left, internal_params, external_params, disparity)
print("Point P_c: \n", p_c)

Point P_c: 
 [-1.41918895  0.26127781 21.063395  ]


### Shift from Camera to World

In [169]:
def camera_to_din(p_c):
    return np.array([[p_c[2]], [-p_c[0]], [-p_c[1]]]) # Beim letzten - bin ich mir nicht ganz sicher

In [170]:
p_c_din = camera_to_din(p_c) 
print("Point P_c_din: \n", p_c_din)

Point P_c_din: 
 [[21.063395  ]
 [ 1.41918895]
 [-0.26127781]]


### Apply Rotation and Translation

In [171]:
def camera_to_world(p_c, R_cw, t_cw_din):
    p_w = R_cw @ p_c + t_cw_din
    return p_w

In [172]:
p_w_din = camera_to_world(p_c_din, R_cw, t_cw_din)
print("Point P_w_din: \n" , p_w_din)

Point P_w_din: 
 [[19.05427437]
 [ 1.12444555]
 [-0.25697426]]


# Validation

## Validation with object of known size

511-21 Verschwenkungstafel ohne Gegenverkehr

1600 x 1250 mm (HxB)

In [173]:
def pixel_to_world(pixel_left, pixel_right, internal_params, external_params):
    disparity = calc_disparity(pixel_left, pixel_right)
    p_c = pixel_to_coordinate(pixel_left, internal_params, external_params, disparity)
    p_c_din = camera_to_din(p_c)
    p_w_din = camera_to_world(p_c_din, R_cw, t_cw_din)
    return p_w_din

In [174]:
USE_MOUSE = False  # Set to True to use mouse for pixel selection

if USE_MOUSE:
    get_pixel_by_images(2)
else:
    pixel_left1 = [250, 180]
    pixel_right1 = [238, 180]
    pixel_left2 = [208, 233]
    pixel_right2 = [196, 233]

p1_val_w_din = pixel_to_world(pixel_left1, pixel_right1, internal_params, external_params)
p2_val_w_din = pixel_to_world(pixel_left2, pixel_right2, internal_params, external_params)
print("Point down right: \n", p_w_din)
print("Point up right: \n", p1_val_w_din)
print("Point down left:\n", p2_val_w_din)

# Abstand zwischen p_w_din und p_val_w_din
distance1 = np.linalg.norm(p_w_din - p1_val_w_din)
print("Abstand zwischen p_w_din und p_val_w_din:", f"{distance1:.3f}" )
print("Soll Abstand: 1.600")

# Abstand zwischen p_val_w_din und p_w_din
distance2 = np.linalg.norm(p_w_din - p2_val_w_din)
print("Abstand zwischen p_val_w_din und p_w_din:", f"{distance2:.3f}")
print("Soll Abstand: 1.250")


Point down right: 
 [[19.05427437]
 [ 1.12444555]
 [-0.25697426]]
Point up right: 
 [[18.94647916]
 [ 1.12444555]
 [ 1.53745618]]
Point down left:
 [[19.03885889]
 [ 2.20268563]
 [ 0.17946257]]
Abstand zwischen p_w_din und p_val_w_din: 1.798
Soll Abstand: 1.600
Abstand zwischen p_val_w_din und p_w_din: 1.163
Soll Abstand: 1.250


## Backward calculation


In [175]:
p_w_din = np.append(p_w_din, 1)

In [None]:
Rt_wc = np.hstack((R_wc, t_wc_din))

p_w = Rt_wc @ p_w_din.T

p_c = np.array([-p_w[1], -p_w[2], p_w[0]])
print("3D-Punkt in Kamerakoordinaten:", p_c)

3D-Punkt in Kamerakoordinaten: [-1.39847394  0.13908247 21.14642527]


In [177]:
p_c = np.dot(K, p_c)

pixel_c = np.array([int(p_c[0] / p_c[2]), int(p_c[1] / p_c[2])])

print("Pixel in Bildkoordinaten:", pixel_c)
print("Ursprünglicher Punkt:", pixel_left)

Pixel in Bildkoordinaten: [251 245]
Ursprünglicher Punkt: [250, 250]
