In [1]:
import torch
from pathlib import Path
import numpy as np
import scipy
import pandas as pd
import cv2
import matplotlib.pyplot as plt

In [2]:
root_dir = Path.cwd().parent
data_dir = root_dir / 'data'
model_dir = root_dir / 'models'

model = torch.hub.load('ultralytics/yolov5', 'custom', path=model_dir / 'best.pt', force_reload=True)

model.eval();

img_left_path = data_dir / "raw/final_project_2023_rect/seq_03/image_02/data/0000000005.png"  # or file, Path, PIL, OpenCV, numpy, list
img_right_path = data_dir / "raw/final_project_2023_rect/seq_03/image_03/data/0000000005.png"  # or file, Path, PIL, OpenCV, numpy, list

img_left = cv2.imread(str(img_left_path))
img_right = cv2.imread(str(img_right_path))

# Inference
results = model([img_left, img_right])

# Results
r = results.pandas().xyxy

results_left_df = r[0]
results_right_df = r[1]

Downloading: "https://github.com/ultralytics/yolov5/zipball/master" to C:\Users\jakab/.cache\torch\hub\master.zip
YOLOv5  2023-5-3 Python-3.11.2 torch-2.0.0+cpu CPU

Fusing layers... 


[31m[1mrequirements:[0m C:\Users\jakab\.cache\torch\hub\requirements.txt not found, check failed.


YOLOv5s summary: 157 layers, 7018216 parameters, 0 gradients
Adding AutoShape... 


In [3]:
def match_objects(
    results_left_df: pd.DataFrame,
    results_right_df: pd.DataFrame,
    img_left_path: Path,
    img_right_path: Path,
) -> pd.DataFrame:
    """Match objects from left and right images.

    Args:
        results_left_df (pd.DataFrame): Results from left image.
        results_right_df (pd.DataFrame): Results from right image.
        calib_file (str): Path to calibration file.

    Returns:
        pd.DataFrame: Matched objects.
    """

    # read images and convert to gray
    img_left = cv2.imread(str(img_left_path))
    img_left_gray = cv2.cvtColor(img_left, cv2.COLOR_RGB2GRAY)
    img_right = cv2.imread(str(img_right_path))
    img_right_gray = cv2.cvtColor(img_right, cv2.COLOR_BGR2GRAY)
    
    nb_matches = 20
    sift = cv2.SIFT_create()
    bf = cv2.BFMatcher()
    im_height = img_left.shape[0]
    match_matrix = np.zeros((len(results_left_df.index), len(results_right_df.index)))
    for i1, bbox1 in results_left_df.iterrows():
        for i2, bbox2 in results_right_df.iterrows():
            cy1 = bbox1["ymin"] + bbox1["ymax"] / 2
            cy2 = bbox2["ymin"] + bbox2["ymax"] / 2
            if abs(cy1 - cy2) / im_height <= 0.1 and bbox1[5] == bbox2[5]:
                bbox1_im = img_left_gray[
                    int(bbox1["ymin"]) : int(bbox1["ymax"]),
                    int(bbox1["xmin"]) : int(bbox1["xmax"]),
                ]
                bbox2_im = img_right_gray[
                    int(bbox2["ymin"]) : int(bbox2["ymax"]),
                    int(bbox2["xmin"]) : int(bbox2["xmax"]),
                ]
                _, des1 = sift.detectAndCompute(bbox1_im, None)
                _, des2 = sift.detectAndCompute(bbox2_im, None)
                matches = bf.match(des1, des2)
                matches = sorted(matches, key=lambda x: x.distance)[:nb_matches]
                for m in matches:
                    match_matrix[i1, i2] += m.distance
            else:
                match_matrix[i1, i2] = 1e12
    row_ind, col_ind = scipy.optimize.linear_sum_assignment(match_matrix)

    return row_ind, col_ind

In [18]:
row_ind, col_ind = match_objects(
    r[0],
    r[1],
    img_left_path,
    img_right_path,
)
bboxes_left = results_left_df.to_numpy()[row_ind]
bboxes_right = results_right_df.to_numpy()[col_ind]

[0 1 2 3] [1 3 2 0]


In [5]:
im_left = cv2.imread(str(img_left_path))
im_left = cv2.cvtColor(im_left, cv2.COLOR_BGR2GRAY)
im_right = cv2.imread(str(img_right_path))
im_right = cv2.cvtColor(im_right, cv2.COLOR_BGR2GRAY)

min_disp = 3
num_disp = 5 * 16
block_size = 31

stereo = cv2.StereoBM_create(numDisparities = num_disp, blockSize = block_size)
stereo.setMinDisparity(min_disp)
stereo.setDisp12MaxDiff(200)
stereo.setUniquenessRatio(1)
stereo.setSpeckleRange(3)
stereo.setSpeckleWindowSize(3)

disp = stereo.compute(im_left, im_right).astype(np.float32) / 16.0



In [23]:
bboxes_left = results_left_df.to_numpy()[row_ind]
bboxes_right = results_right_df.to_numpy()[col_ind]

print(bboxes_left)
print(bboxes_right)

[[0.0 201.5142059326172 297.4330749511719 370.0 0.9782201051712036 0 'Car']
 [1000.9168090820312 151.6439666748047 1076.9569091796875 295.646484375 0.9333688020706177 1 'Pedestrian']
 [446.2646789550781 172.99713134765625 483.6410217285156 198.6333770751953 0.927031397819519 0 'Car']
 [867.9674072265625 167.97006225585938 917.901123046875 276.0589599609375 0.8357637524604797 0 'Car']]
[[0.0 195.32542419433594 233.25775146484375 367.4632568359375 0.9502115249633789 0 'Car']
 [958.6746826171875 145.0343780517578 1032.8818359375 297.6231384277344 0.9045212268829346 1 'Pedestrian']
 [438.0471496582031 173.0842742919922 475.4096374511719 199.05380249023438 0.9435446858406067 0 'Car']
 [788.3722534179688 153.4276885986328 893.1302490234375 277.1434631347656 0.9533413648605347 2 'Cyclist']]
