In [1]:
import numpy as np
import cv2 
import matplotlib.pyplot as plt

plt.rcParams["figure.figsize"] = [12, 8]

In [2]:
# Load the pre-trained model 
yolo_model = cv2.dnn.readNetFromDarknet("S:/Python-Anaconda/Anaconda/pkgs/darknet-yolov4-0.0.1-hd8ed1ab_0/models/darknet/yolov4/yolov4.config",
                                        "S:/Python-Anaconda/Anaconda/pkgs/darknet-yolov4-0.0.1-hd8ed1ab_0/models/darknet/yolov4/yolov4.weights")

model_layers = yolo_model.getLayerNames()
output_layers = [model_layers[model_layer - 1] for model_layer in yolo_model.getUnconnectedOutLayers()]

# define class labels
class_labels_path = "S:/Python-Anaconda/Anaconda/pkgs/darknet-yolov4-0.0.1-hd8ed1ab_0/models/darknet/yolov4/yolov4.labels"
class_labels = open(class_labels_path).read().strip().split("\n")

# declare repeating bounding box colors for each class 
class_colors = ["255,0,0","0,255,0","0,0,255","255,155,0","255,0,255"]
class_colors = [np.array(every_color.split(",")).astype("int") for every_color in class_colors]
class_colors = np.array(class_colors)
class_colors = np.tile(class_colors, (16, 1))


In [3]:
def object_detection_analysis(frame, obj_detections_in_layers, confidence_threshold): 
 
  # get the image dimensions  
  img_height = frame.shape[0]
  img_width = frame.shape[1]
 
  result = frame.copy()
  
  # loop over each output layer 
  for object_detections_in_single_layer in obj_detections_in_layers:
    # loop over the detections in each layer
      for object_detection in object_detections_in_single_layer:  
        # obj_detection[1]: bbox center pt_x
        # obj_detection[2]: bbox center pt_y
        # obj_detection[3]: bbox width
        # obj_detection[4]: bbox height
        # obj_detection[5]: confidence scores for all detections within the bbox 
 
        # get the confidence scores of all objects detected with the bounding box
        prediction_scores = object_detection[5:]
        
        predicted_class_id = np.argmax(prediction_scores)
        # get the prediction confidence
        prediction_confidence = prediction_scores[predicted_class_id]
    
        # consider object detections with confidence score higher than threshold
        if prediction_confidence > confidence_threshold:
            # get the predicted label
            predicted_class_label = class_labels[predicted_class_id]
            # compute the bounding box coordinates scaled for the input image 
            
            bounding_box = object_detection[0:4] * np.array([img_width, img_height, img_width, img_height])
            # get the bounding box centroid (x,y), width and height as integers
            (box_center_x_pt, box_center_y_pt, box_width, box_height) = bounding_box.astype("int")
            
            start_x_pt = max(0, int(box_center_x_pt - (box_width / 2)))
            start_y_pt = max(0, int(box_center_y_pt - (box_height / 2)))
            end_x_pt = start_x_pt + box_width
            end_y_pt = start_y_pt + box_height
            
            # get a random mask color from the numpy array of colors
            box_color = class_colors[predicted_class_id]
            
            # convert the color numpy array as a list and apply to text and box
            box_color = [int(c) for c in box_color]
            
            # print the prediction in console
            predicted_class_label = "{}: {:.2f}%".format(predicted_class_label, prediction_confidence * 100)
            print("predicted object {}".format(predicted_class_label))
            
            # draw the rectangle and text in the image
            cv2.rectangle(result, (start_x_pt, start_y_pt), (end_x_pt, end_y_pt), box_color, 1)
            cv2.putText(result, predicted_class_label, (start_x_pt, start_y_pt-5), cv2.FONT_HERSHEY_SIMPLEX, 0.5, box_color, 1)
  return result

In [4]:
# Open the laptop camera
cap = cv2.VideoCapture(0) 

while True:
    ret, frame = cap.read()
    if not ret:
        break
        
     # Preprocess the frame to create a blob object
    scalefactor = 1.0/255.0
    new_size = (416, 416)
    blob = cv2.dnn.blobFromImage(frame, scalefactor, new_size, swapRB=True, crop=False)

    # Input the preprocessed blob into the model
    yolo_model.setInput(blob)
    obj_detections_in_layers = yolo_model.forward(output_layers)

    # Object detection analysis and visualization
    confidence_threshold = 0.2
    result_raw = object_detection_analysis(frame, obj_detections_in_layers, confidence_threshold)
    
    # Display the resulting frame with bounding boxes and class labels
    cv2.imshow('Object Detection',result_raw)

    # Press 'q' key to exit the loop and stop the camera stream
    if cv2.waitKey(1) & 0xFF == ord('q'):
        break

# Release the camera and close all OpenCV windows
cap.release()
cv2.destroyAllWindows()

predicted object backpack: 40.70%
predicted object backpack: 42.35%
predicted object backpack: 21.93%
predicted object backpack: 26.02%
predicted object backpack: 45.37%
predicted object backpack: 45.88%
predicted object backpack: 25.82%
predicted object backpack: 29.04%
predicted object chair: 31.62%
predicted object chair: 75.25%
predicted object chair: 80.20%
predicted object backpack: 20.22%
predicted object backpack: 20.96%
predicted object chair: 21.30%
predicted object chair: 30.26%
predicted object person: 62.19%
predicted object person: 35.38%
predicted object person: 31.62%
predicted object backpack: 70.21%
predicted object backpack: 69.36%
predicted object backpack: 44.72%
predicted object backpack: 44.77%
predicted object chair: 37.25%
predicted object chair: 37.83%
predicted object chair: 24.63%
predicted object chair: 77.81%
predicted object chair: 79.23%
predicted object chair: 51.91%
predicted object backpack: 31.09%
predicted object backpack: 31.91%
predicted object ch

predicted object backpack: 52.69%
predicted object backpack: 54.88%
predicted object backpack: 21.61%
predicted object backpack: 22.61%
predicted object backpack: 22.99%
predicted object backpack: 51.52%
predicted object backpack: 61.58%
predicted object backpack: 25.51%
predicted object backpack: 34.25%
predicted object backpack: 40.74%
predicted object backpack: 50.17%
predicted object backpack: 57.59%
predicted object backpack: 57.11%
predicted object backpack: 77.00%
predicted object backpack: 77.87%
predicted object backpack: 75.01%
predicted object backpack: 70.37%
predicted object backpack: 70.47%
predicted object backpack: 65.96%
predicted object bed: 25.35%
predicted object bed: 23.83%
predicted object bed: 23.81%
predicted object chair: 50.94%
predicted object chair: 55.71%
predicted object chair: 31.74%
predicted object chair: 35.46%
predicted object person: 60.77%
predicted object person: 58.32%
predicted object person: 28.82%
predicted object person: 26.45%
predicted objec

predicted object backpack: 76.80%
predicted object backpack: 78.32%
predicted object backpack: 46.98%
predicted object backpack: 48.17%
predicted object backpack: 27.95%
predicted object backpack: 52.20%
predicted object backpack: 62.54%
predicted object backpack: 25.60%
predicted object backpack: 34.20%
predicted object backpack: 37.72%
predicted object backpack: 46.69%
predicted object person: 64.66%
predicted object person: 88.37%
predicted object person: 47.19%
predicted object backpack: 81.39%
predicted object backpack: 80.47%
predicted object backpack: 76.54%
predicted object backpack: 77.41%
predicted object backpack: 75.35%
predicted object backpack: 65.67%
predicted object backpack: 65.70%
predicted object backpack: 61.07%
predicted object bed: 22.76%
predicted object bed: 21.55%
predicted object bed: 20.82%
predicted object chair: 24.70%
predicted object chair: 34.25%
predicted object chair: 25.94%
predicted object chair: 26.46%
predicted object chair: 28.80%
predicted object

In [5]:
class_ids_list = []
boxes_list = []
confidences_list = []

In [6]:
def object_detection_attributes(frame, obj_detections_in_layers, confidence_threshold):
  # get the image dimensions  
  img_height = frame.shape[0]
  img_width = frame.shape[1]
  
  # loop over each output layer 
  for object_detections_in_single_layer in obj_detections_in_layers:
    # loop over the detections in each layer
    for object_detection in object_detections_in_single_layer:  
      # get the confidence scores of all objects detected with the bounding box
      prediction_scores = object_detection[5:]
      # consider the highest score being associated with the winning class
      # get the class ID from the index of the highest score 
      predicted_class_id = np.argmax(prediction_scores)
      # get the prediction confidence
      prediction_confidence = prediction_scores[predicted_class_id]
      
      # consider object detections with confidence score higher than threshold
      if prediction_confidence > confidence_threshold:
        # get the predicted label
        predicted_class_label = class_labels[predicted_class_id]
        # compute the bounding box coordinates scaled for the input image
        bounding_box = object_detection[0:4] * np.array([img_width, img_height, img_width, img_height])
        (box_center_x_pt, box_center_y_pt, box_width, box_height) = bounding_box.astype("int")
        start_x_pt = max(0, int(box_center_x_pt - (box_width / 2)))
        start_y_pt = max(0, int(box_center_y_pt - (box_height / 2)))
        
      
        class_ids_list.append(predicted_class_id)
        confidences_list.append(float(prediction_confidence))
        boxes_list.append([int(start_x_pt), int(start_y_pt), int(box_width), int(box_height)])

In [7]:
# Open the laptop camera
cap = cv2.VideoCapture(0) 
while True:

    # Read a frame from the camera
    
    ret, frame = cap.read()
    #frame = cv2.flip(frame,1)
    # Check if the frame was read successfully
    if not ret:
        break

    #Preprocess the frame to create a blob object
    scalefactor = 1.0/255.0
    new_size = (416, 416)
    blob = cv2.dnn.blobFromImage(frame, scalefactor, new_size, swapRB=True, crop=False)
    
    #Input the preprocessed blob into the model
    yolo_model.setInput(blob)
    obj_detections_in_layers = yolo_model.forward(output_layers)
    score_threshold = 0.5
    object_detection_attributes(frame, obj_detections_in_layers, score_threshold)
    
    score_threshold = 0.5
    nms_threshold = 0.4
    winner_ids = cv2.dnn.NMSBoxes(boxes_list, confidences_list, score_threshold, nms_threshold)
    
    # loop through the final set of detections
    for winner_id in winner_ids:
        max_class_id = winner_id
        box = boxes_list[max_class_id]
        start_x_pt = box[0]
        start_y_pt = box[1]
        box_width = box[2]
        box_height = box[3]
    
        #get the predicted class id and label
        predicted_class_id = class_ids_list[max_class_id]
        predicted_class_label = class_labels[predicted_class_id]
        prediction_confidence = confidences_list[max_class_id]
 
        #obtain the bounding box end coordinates
        end_x_pt = start_x_pt + box_width
        end_y_pt = start_y_pt + box_height
    
        #get a random mask color from the numpy array of colors
        box_color = class_colors[predicted_class_id]
    
        #convert the color numpy array as a list and apply to text and box
        box_color = [int(c) for c in box_color]
    
        # print the prediction in console
        predicted_class_label = "{}: {:.2f}%".format(predicted_class_label, prediction_confidence * 100)
        print("predicted object {}".format(predicted_class_label))
    
        # draw rectangle and text in the image
        cv2.rectangle(frame, (start_x_pt, start_y_pt), (end_x_pt, end_y_pt), box_color, 2)
        cv2.putText(frame, predicted_class_label, (start_x_pt, start_y_pt-5), cv2.FONT_HERSHEY_SIMPLEX, 0.5, box_color, 2)
    #cv2.cvtColor(frame, cv2.COLOR_RGB2BGR)
    
    
    # Display the resulting frame with bounding boxes and class labels
    cv2.imshow('Object Detection',frame)
    class_ids_list = []
    boxes_list = []
    confidences_list = []

    # Press 'q' key to exit the loop and stop the camera stream
    if cv2.waitKey(1) & 0xFF == ord('q'):
        break

# Release the camera and close all OpenCV windows
cap.release()
cv2.destroyAllWindows()


predicted object person: 95.69%
predicted object backpack: 84.47%
predicted object backpack: 76.32%
predicted object chair: 52.86%
predicted object person: 94.77%
predicted object backpack: 86.49%
predicted object backpack: 78.03%
predicted object person: 95.45%
predicted object backpack: 82.55%
predicted object backpack: 78.37%
predicted object person: 96.01%
predicted object backpack: 82.24%
predicted object backpack: 75.16%
predicted object chair: 55.47%
predicted object person: 95.77%
predicted object backpack: 84.60%
predicted object backpack: 75.26%
predicted object person: 95.98%
predicted object backpack: 83.60%
predicted object backpack: 76.97%
predicted object person: 96.31%
predicted object backpack: 85.35%
predicted object backpack: 76.58%
predicted object chair: 53.67%
predicted object person: 95.72%
predicted object backpack: 84.20%
predicted object backpack: 77.81%
predicted object chair: 52.43%
predicted object person: 94.51%
predicted object backpack: 83.07%
predicted 