In [1]:
import torch
from pathlib import Path
import numpy as np
import scipy
import pandas as pd
import cv2
import matplotlib.pyplot as plt

In [3]:
root_dir = Path.cwd().parent
data_dir = root_dir / 'data'
model_dir = root_dir / 'models'

model = torch.hub.load('ultralytics/yolov5', 'custom', path=model_dir / 'best.pt', force_reload=True)

model.eval();

img_left_path = data_dir / "raw/final_project_2023_rect/seq_03/image_02/data/0000000005.png"  # or file, Path, PIL, OpenCV, numpy, list
img_right_path = data_dir / "raw/final_project_2023_rect/seq_03/image_03/data/0000000005.png"  # or file, Path, PIL, OpenCV, numpy, list

img_left = cv2.imread(str(img_left_path))
img_right = cv2.imread(str(img_right_path))

# Inference
results = model([img_left, img_right])

# Results
r = results.pandas().xyxy

results_left_df = r[0]
results_right_df = r[1]

Downloading: "https://github.com/ultralytics/yolov5/zipball/master" to C:\Users\jakab/.cache\torch\hub\master.zip
YOLOv5  2023-5-3 Python-3.11.2 torch-2.0.0+cpu CPU

Fusing layers... 


[31m[1mrequirements:[0m C:\Users\jakab\.cache\torch\hub\requirements.txt not found, check failed.


YOLOv5s summary: 157 layers, 7018216 parameters, 0 gradients
Adding AutoShape... 


In [8]:
def match_objects(
    results_left_df: pd.DataFrame,
    results_right_df: pd.DataFrame,
    img_left_path: Path,
    img_right_path: Path,
) -> pd.DataFrame:
    """Match objects from left and right images.

    Args:
        results_left_df (pd.DataFrame): Results from left image.
        results_right_df (pd.DataFrame): Results from right image.
        calib_file (str): Path to calibration file.

    Returns:
        pd.DataFrame: Matched objects.
    """

    # read images and convert to gray
    img_left = cv2.imread(str(img_left_path))
    img_left_gray = cv2.cvtColor(img_left, cv2.COLOR_RGB2GRAY)
    img_right = cv2.imread(str(img_right_path))
    img_right_gray = cv2.cvtColor(img_right, cv2.COLOR_BGR2GRAY)
    
    nb_matches = 20
    sift = cv2.SIFT_create()
    bf = cv2.BFMatcher()
    im_height = img_left.shape[0]
    match_matrix = np.zeros((len(results_left_df.index), len(results_right_df.index)))
    for i1, bbox1 in results_left_df.iterrows():
        for i2, bbox2 in results_right_df.iterrows():
            cy1 = bbox1["ymin"] + bbox1["ymax"] / 2
            cy2 = bbox2["ymin"] + bbox2["ymax"] / 2
            if abs(cy1 - cy2) / im_height <= 0.1 and bbox1[5] == bbox2[5]:
                bbox1_im = img_left_gray[
                    int(bbox1["ymin"]) : int(bbox1["ymax"]),
                    int(bbox1["xmin"]) : int(bbox1["xmax"]),
                ]
                bbox2_im = img_right_gray[
                    int(bbox2["ymin"]) : int(bbox2["ymax"]),
                    int(bbox2["xmin"]) : int(bbox2["xmax"]),
                ]
                _, des1 = sift.detectAndCompute(bbox1_im, None)
                _, des2 = sift.detectAndCompute(bbox2_im, None)
                matches = bf.match(des1, des2)
                matches = sorted(matches, key=lambda x: x.distance)[:nb_matches]
                for m in matches:
                    match_matrix[i1, i2] += m.distance
            else:
                match_matrix[i1, i2] = 1e12
    row_ind, col_ind = scipy.optimize.linear_sum_assignment(match_matrix)

    return row_ind, col_ind

In [9]:
row_ind, col_ind = match_objects(
    r[0],
    r[1],
    img_left_path,
    img_right_path,
)

In [10]:
nb_matches = 100
def triangulate(im_left, im_right, bbox_matches, bboxes_left, bboxes_right, mtx_left, mtx_right, left_cam_disp_x, right_cam_disp_x):
    if (len(im_left.shape) == 3): im_left = cv2.cvtColor(im_left, cv2.COLOR_RGB2GRAY)
    if (len(im_right.shape) == 3): im_right = cv2.cvtColor(im_right, cv2.COLOR_RGB2GRAY)

    R_left = np.eye(3)
    t_left = np.array([[left_cam_disp_x], [0], [0]])
    #P_left = mtx_left @ np.hstack((R_left, t_left))
    P_left = np.array([[1230,0,687.06,-73.8],
                       [0,1074.5,254.14,0],
                       [0,0,1,0]])
    R_right = np.eye(3)
    t_right = np.array([[right_cam_disp_x], [0], [0]])
    #P_right = mtx_right @ np.hstack((R_right, t_right))
    P_right = np.array([[978.76,0,707.1,469.8048],
                        [0,974.55,260.06,0],
                        [0,0,1,0]])
    
    sift = cv2.SIFT_create()
    bf = cv2.BFMatcher()
    points_3d = []
    for [lb,rb] in bbox_matches:
        lx_min = int(bboxes_left[lb,0])
        ly_min = int(bboxes_left[lb,1])
        lx_max = int(bboxes_left[lb,2])
        ly_max = int(bboxes_left[lb,3])
        rx_min = int(bboxes_right[rb,0])
        ry_min = int(bboxes_right[rb,1])
        rx_max = int(bboxes_right[rb,2])
        ry_max = int(bboxes_right[rb,3])
        bbox_left = im_left[ly_min:ly_max,lx_min:lx_max]
        bbox_right = im_right[ry_min:ry_max,rx_min:rx_max]

        kpl, desl = sift.detectAndCompute(bbox_left, None)
        kpl = np.array([k.pt for k in kpl])
        kpr, desr = sift.detectAndCompute(bbox_right, None)
        matches = bf.match(desl, desr)
        kpr = np.array([k.pt for k in kpr])
        matches = sorted(matches, key = lambda x:x.distance)[:nb_matches]
        matches = np.array([(m.queryIdx, m.trainIdx) for m in matches])

        points_left = kpl[matches[:,0],:] + [lx_min, ly_min]
        points_right = kpr[matches[:,1],:] + [rx_min, ry_min]

        Q = cv2.triangulatePoints(P_left, P_right, points_left.T, points_right.T)
        Q = Q[:-1]/Q[-1]    

        Q = np.sum(Q, axis=1) / nb_matches
        points_3d.append(Q)

    return np.asarray(points_3d)

In [11]:
im_left = cv2.imread(str(img_left_path))
im_right = cv2.imread(str(img_right_path))
matches = np.array([row_ind, col_ind]).T
bboxes_left = results_left_df.to_numpy()
bboxes_right = results_right_df.to_numpy()
mtx_left = np.array([[1.23004607e+03, 0, 6.87057482e+02],
                     [0, 1.07447570e+03, 2.54142938e+02],
                     [0, 0, 1]])
mtx_right = np.array([[978.7638968, 0, 707.10302473],
                      [0, 974.5535797, 260.05961915],
                      [0, 0, 1]])
left_cam_disp_x = -0.06
right_cam_disp_x = 0.48

triangulate(im_left, im_right, matches, bboxes_left, bboxes_right, mtx_left, mtx_right, left_cam_disp_x, right_cam_disp_x)

array([[     1.1423,   -0.030114,     -2.9072],
       [     22.716,     -3.8501,      83.991],
       [    0.19031,    0.067033,    -0.99974],
       [    -2.4772,     0.29665,     -14.929]])