In [2]:
import cv2
import numpy as np
from tensorflow.keras.models import load_model

# Load your distance prediction model
dist_model = load_model('distance_prediction_fine_tuned.h5')

# Load YOLOv3
net = cv2.dnn.readNet("yolov3.weights", "yolov3.cfg") 
layer_names = net.getLayerNames()
output_layers = [layer_names[i - 1] for i in net.getUnconnectedOutLayers()]

# Load names of classes and get random colors
classes = open('coco.names').read().strip().split('\n')
np.random.seed(42)
colors = np.random.randint(0, 255, size=(len(classes), 3), dtype='uint8')

# Load image
img = cv2.imread("dog.jpg") 
height, width, channels = img.shape

# Detect objects
blob = cv2.dnn.blobFromImage(img, 0.00392, (416, 416), (0, 0, 0), True, crop=False)
net.setInput(blob)
outs = net.forward(output_layers)

# Show information on the screen
class_ids = []
confidences = []
boxes = []
for out in outs:
    for detection in out:
        scores = detection[5:]
        class_id = np.argmax(scores)
        confidence = scores[class_id]
        if confidence > 0.5: 
            center_x = int(detection[0] * width)
            center_y = int(detection[1] * height)
            w = int(detection[2] * width)
            h = int(detection[3] * height)
            x = int(center_x - w / 2)
            y = int(center_y - h / 2)
            boxes.append([x, y, w, h])
            confidences.append(float(confidence))
            class_ids.append(class_id)

indexes = cv2.dnn.NMSBoxes(boxes, confidences, 0.5, 0.4)

for i in range(len(boxes)):
    if i in indexes:
        x, y, w, h = boxes[i]
        label = str(classes[class_ids[i]])
        print(label)
        
        # Extract larger region of interest centered around the object
        roi_x = max(0, center_x - 448 // 2)
        roi_y = max(0, center_y - 336 // 2)
        roi_x_end = min(width, roi_x + 448)
        roi_y_end = min(height, roi_y + 336)
        
        roi = img[roi_y:roi_y_end, roi_x:roi_x_end]
        
        # Resize ROI to model's expected input size if necessary
        if roi.shape[0] != 448 or roi.shape[1] != 336:
            roi = cv2.resize(roi, (336, 448)) 

        roi = np.expand_dims(roi, axis=0)
        roi = roi / 255.0  # normalize pixel values if your model requires it
        
        # Predict the distance
        distance = dist_model.predict(roi)
        print('Predicted distance:', distance)



truck
Predicted distance: [[1.9616903]]
bicycle
Predicted distance: [[1.9616903]]
dog
Predicted distance: [[1.9616903]]


In [9]:
font = cv2.FONT_HERSHEY_PLAIN
for i in range(len(boxes)):
    if i in indexes:
        x, y, w, h = boxes[i]
        label = str(classes[class_ids[i]])
        color = tuple(map(int, colors[class_ids[i]]))  # convert np.array to tuple
        cv2.rectangle(img, (x, y), (x + w, y + h), color, 2)
        cv2.putText(img, label, (x, y - 5), font, 1, color, 1)

cv2.imwrite('output.jpg', img)  # save the image


True