In [35]:
import torch
from pathlib import Path
import numpy as np
import scipy
import pandas as pd
import cv2
import matplotlib.pyplot as plt

In [36]:
root_dir = Path.cwd().parent
data_dir = root_dir / 'data'
model_dir = root_dir / 'models'

model = torch.hub.load('ultralytics/yolov5', 'custom', path=model_dir / 'best.pt', force_reload=True)

model.eval();

img_left_path = data_dir / "raw/final_project_2023_rect/seq_03/image_02/data/0000000150.png"  # or file, Path, PIL, OpenCV, numpy, list
img_right_path = data_dir / "raw/final_project_2023_rect/seq_03/image_03/data/0000000150.png"  # or file, Path, PIL, OpenCV, numpy, list

img_left = cv2.imread(str(img_left_path))
img_right = cv2.imread(str(img_right_path))

# Inference
results = model([img_left, img_right])

# Results
r = results.pandas().xyxy

results_left_df = r[0]
results_right_df = r[1]

Downloading: "https://github.com/ultralytics/yolov5/zipball/master" to C:\Users\jakab/.cache\torch\hub\master.zip
YOLOv5  2023-5-4 Python-3.11.2 torch-2.0.0+cpu CPU

Fusing layers... 
YOLOv5s summary: 157 layers, 7018216 parameters, 0 gradients
Adding AutoShape... 


[31m[1mrequirements:[0m C:\Users\jakab\.cache\torch\hub\requirements.txt not found, check failed.


In [37]:
def match_objects(
    results_left_df: pd.DataFrame,
    results_right_df: pd.DataFrame,
    img_left_path: Path,
    img_right_path: Path,
) -> pd.DataFrame:
    """Match objects from left and right images.

    Args:
        results_left_df (pd.DataFrame): Results from left image.
        results_right_df (pd.DataFrame): Results from right image.
        calib_file (str): Path to calibration file.

    Returns:
        pd.DataFrame: Matched objects.
    """

    # read images and convert to gray
    img_left = cv2.imread(str(img_left_path))
    img_left_gray = cv2.cvtColor(img_left, cv2.COLOR_RGB2GRAY)
    img_right = cv2.imread(str(img_right_path))
    img_right_gray = cv2.cvtColor(img_right, cv2.COLOR_BGR2GRAY)
    
    nb_matches = 20
    sift = cv2.SIFT_create()
    bf = cv2.BFMatcher()
    im_height = img_left.shape[0]
    match_matrix = np.zeros((len(results_left_df.index), len(results_right_df.index)))
    for i1, bbox1 in results_left_df.iterrows():
        for i2, bbox2 in results_right_df.iterrows():
            cy1 = bbox1["ymin"] + bbox1["ymax"] / 2
            cy2 = bbox2["ymin"] + bbox2["ymax"] / 2
            if abs(cy1 - cy2) / im_height <= 0.1 and bbox1[5] == bbox2[5]:
                bbox1_im = img_left_gray[
                    int(bbox1["ymin"]) : int(bbox1["ymax"]),
                    int(bbox1["xmin"]) : int(bbox1["xmax"]),
                ]
                bbox2_im = img_right_gray[
                    int(bbox2["ymin"]) : int(bbox2["ymax"]),
                    int(bbox2["xmin"]) : int(bbox2["xmax"]),
                ]
                _, des1 = sift.detectAndCompute(bbox1_im, None)
                _, des2 = sift.detectAndCompute(bbox2_im, None)
                matches = bf.match(des1, des2)
                matches = sorted(matches, key=lambda x: x.distance)[:nb_matches]
                for m in matches:
                    match_matrix[i1, i2] += m.distance
            else:
                match_matrix[i1, i2] = 1e12
    row_ind, col_ind = scipy.optimize.linear_sum_assignment(match_matrix)

    return row_ind, col_ind

In [38]:
row_ind, col_ind = match_objects(
    r[0],
    r[1],
    img_left_path,
    img_right_path,
)
bboxes_left = results_left_df.to_numpy()[row_ind]
bboxes_right = results_right_df.to_numpy()[col_ind]

In [48]:
im_left = cv2.imread(str(img_left_path))
im_left = cv2.cvtColor(im_left, cv2.COLOR_BGR2GRAY)
im_right = cv2.imread(str(img_right_path))
im_right = cv2.cvtColor(im_right, cv2.COLOR_BGR2GRAY)

min_disp = 3
num_disp = 5 * 16
block_size = 31

stereo = cv2.StereoBM_create(numDisparities = num_disp, blockSize = block_size)
stereo.setMinDisparity(min_disp)
stereo.setDisp12MaxDiff(200)
stereo.setUniquenessRatio(1)
stereo.setSpeckleRange(3)
stereo.setSpeckleWindowSize(3)

disp = stereo.compute(im_left, im_right).astype(np.float32) / 16.0

bbox_ratio = 0.3
n_points = 20

bboxes_dist = []
for bbox_left in bboxes_left:
    
    x_min1 = int(bbox_left[0])
    y_min1 = int(bbox_left[1])
    x_max1 = int(bbox_left[2])
    y_max1 = int(bbox_left[3])
    x_min_recalc = int( (x_min1+x_max1)/2 - (x_max1-x_min1)/2*bbox_ratio )
    x_max_recalc = int( (x_min1+x_max1)/2 + (x_max1-x_min1)/2*bbox_ratio )
    y_min_recalc = int( (y_min1+y_max1)/2 - (y_max1-y_min1)/2*bbox_ratio )
    y_max_recalc = int( (y_min1+y_max1)/2 + (y_max1-y_min1)/2*bbox_ratio )

    bbox_disp = disp[y_min_recalc:y_max_recalc, x_min_recalc:x_max_recalc]
    bbox_dist = bbox_disp.flatten()
    bbox_dist.sort()
    if(len(bbox_dist) > n_points): bbox_dist = bbox_dist[-n_points:]
    bbox_dist = 1 / bbox_dist.mean()

    bboxes_dist.append(bbox_dist)

In [49]:
img_left = cv2.imread(str(img_left_path))
img_right = cv2.imread(str(img_right_path))

for i, (bbox_left, bbox_right, bbox_dist) in enumerate(zip(bboxes_left, bboxes_right, bboxes_dist)):
    random_color = tuple(int(color) for color in np.random.choice(range(255),size=3))

    x_min = int(bbox_left[0])
    y_min = int(bbox_left[1])
    x_max = int(bbox_left[2])
    y_max = int(bbox_left[3])
    cv2.rectangle(img_left, (x_min, y_min), (x_max, y_max), random_color, 2)
    cv2.putText(img_left, str(round(bbox_dist, 4)), (x_min, y_min-10), cv2.FONT_HERSHEY_SIMPLEX, 1, random_color, 2, cv2.LINE_AA)

    x_min = int(bbox_right[0])
    y_min = int(bbox_right[1])
    x_max = int(bbox_right[2])
    y_max = int(bbox_right[3])
    cv2.rectangle(img_right, (x_min, y_min), (x_max, y_max), random_color, 2)
    cv2.putText(img_right, str(round(bbox_dist, 4)), (x_min, y_min-10), cv2.FONT_HERSHEY_SIMPLEX, 1, random_color, 2, cv2.LINE_AA)

merged = np.concatenate((img_left, img_right), axis=0)
cv2.imwrite('C:/Users/jakab/Desktop/150.png', merged)
cv2.imshow('im', merged)
cv2.waitKey(0)
cv2.destroyAllWindows()