# Table of Contents
<a id="toc"></a>
- [1. Import Necssaries Libraries](#1)
- [2. Load yolov3 weights and configuration](#2)
- [3. Extract the Object names from the coco file](#3)
- [4. load and preprocess target image](#4)
- [5. Detect Objects in  the Image](#5)
- [6. Draw Boundary Boxes, Write labels and probabilty of the image's objects](#6)
- [7. Image Result](#7)
- [8. Detect Objects in a video](#8)
- [9. Detect Objects webcam](#9)



<a id='1'></a>
# Import Libraries

In [1]:
import numpy as np
import cv2

<a id='2'></a>
# Load yolov3 weights and configuration

In [2]:
net = cv2.dnn.readNet('yolov3.weights', 'yolov3.cfg')

download yolo_weights: https://pjreddie.com/media/files/yolov3.weights

<a id='3'></a>
# Extract the Object names from the coco file

In [3]:
classes = []
with open('coco.names.txt', 'r') as obj_f:
    classes = obj_f.read().splitlines()
classes

['person',
 'bicycle',
 'car',
 'motorbike',
 'aeroplane',
 'bus',
 'train',
 'truck',
 'boat',
 'traffic light',
 'fire hydrant',
 'stop sign',
 'parking meter',
 'bench',
 'bird',
 'cat',
 'dog',
 'horse',
 'sheep',
 'cow',
 'elephant',
 'bear',
 'zebra',
 'giraffe',
 'backpack',
 'umbrella',
 'handbag',
 'tie',
 'suitcase',
 'frisbee',
 'skis',
 'snowboard',
 'sports ball',
 'kite',
 'baseball bat',
 'baseball glove',
 'skateboard',
 'surfboard',
 'tennis racket',
 'bottle',
 'wine glass',
 'cup',
 'fork',
 'knife',
 'spoon',
 'bowl',
 'banana',
 'apple',
 'sandwich',
 'orange',
 'broccoli',
 'carrot',
 'hot dog',
 'pizza',
 'donut',
 'cake',
 'chair',
 'sofa',
 'pottedplant',
 'bed',
 'diningtable',
 'toilet',
 'tvmonitor',
 'laptop',
 'mouse',
 'remote',
 'keyboard',
 'cell phone',
 'microwave',
 'oven',
 'toaster',
 'sink',
 'refrigerator',
 'book',
 'clock',
 'vase',
 'scissors',
 'teddy bear',
 'hair drier',
 'toothbrush']

<a id='4'></a>
# load, preprocess and test target image

In [4]:
img = cv2.imread("image.jpg")
img.shape

(533, 800, 3)

In [5]:
height, width, channels = img.shape
print(f"Height of the image: {height}, width: {width}, channels: {channels}")

Height of the image: 533, width: 800, channels: 3


In [6]:
blob = cv2.dnn.blobFromImage(img, 1/255, (416, 416), (0,0,0), swapRB=True, crop=False)
# that function is used to create a blob that is the format which their deep learning models accepts as its inputs

In [7]:
blob

array([[[[0.6509804 , 0.67058825, 0.6745098 , ..., 0.91372555,
          0.94117653, 0.9215687 ],
         [0.6509804 , 0.67058825, 0.6745098 , ..., 0.92549026,
          0.9333334 , 0.9058824 ],
         [0.654902  , 0.6745098 , 0.6784314 , ..., 0.8941177 ,
          0.8862746 , 0.86666673],
         ...,
         [0.78823537, 0.78823537, 0.79215693, ..., 0.81568635,
          0.81568635, 0.81568635],
         [0.78823537, 0.78823537, 0.79215693, ..., 0.81568635,
          0.81568635, 0.81568635],
         [0.78823537, 0.78823537, 0.79215693, ..., 0.81568635,
          0.81568635, 0.81568635]],

        [[0.64705884, 0.6666667 , 0.67058825, ..., 0.9058824 ,
          0.9333334 , 0.91372555],
         [0.64705884, 0.67058825, 0.6745098 , ..., 0.9176471 ,
          0.9215687 , 0.8941177 ],
         [0.6509804 , 0.67058825, 0.6745098 , ..., 0.87843144,
          0.86274517, 0.83921576],
         ...,
         [0.7725491 , 0.7725491 , 0.77647066, ..., 0.7803922 ,
          0.7803922 , 0.7

<a id='5'></a>
# Detect Objects in the image

In [8]:
net.setInput(blob) # set the input from the blob into the network
output_layers_names = net.getUnconnectedOutLayersNames() # get the output layers names
layersOutput = net.forward(output_layers_names) # passing output layers names to forward network function we will get the output from this funciton
boundary_boxes = []
probabilities = []
predicted_classes = []
for output in layersOutput: # extract all the information from the layers output
    for detection in output: # extract the information from each of the outputs
        scores = detection[5:] # store all the acting classes predictions 
        class_id = np.argmax(scores) # store the locations that contains the higher scores
        probability = scores[class_id] # extract the higher scores,
        # bec. we want to make sure that thier their predictions has a confidence that is high enough to consider that the object has been detected
        if probability > 0.5:
            center_x = int(detection[0]*width) # scale it back
            center_y = int(detection[1]*height)
            w = int(detection[2]*width)
            h = int(detection[3]*height)
            # bec. yolo predicts the results with the center of the bounding boxes
            # extract the upper left cornor position
            x = int(center_x- w/2)
            y = int(center_y- h/2)
            boundary_boxes.append([x,y,w,h])
            probabilities.append((float(probability)))
            predicted_classes.append(class_id)

In [9]:
len(boundary_boxes)

21

**handle more than one boundry box for the same object by using non maximum suppression function**


In [10]:
indexes = cv2.dnn.NMSBoxes(boundary_boxes, probabilities, 0.5, 0.4)

In [11]:
indexes.flatten() 

array([11,  3,  9,  1, 20, 17, 15, 16], dtype=int32)

In [12]:
len(indexes.flatten())

8

In [13]:
font = cv2.FONT_HERSHEY_PLAIN
colors = np.random.uniform(0, 255, size=(len(boundary_boxes), 3))

<a id="6"></a>
# Draw Boundary Boxes,Write labels and probabilty of the image's objects

In [14]:
for i in indexes.flatten():
    x,y,w,h = boundary_boxes[i]
    label = str(classes[predicted_classes[i]])
    probability = str(round(probabilities[i], 2))
    color = colors[i]
    cv2.rectangle(img, (x,y), (x+w, y+h), color, 2)
    cv2.putText(img, label + " " + probability, (x, y+20), font, 2, (0,255,0), 2)
cv2.imshow('Image', img)
cv2.waitKey(0)
cv2.destroyAllWindows()

<a id='7'></a>
<h1> Image Result: </h1>
<img src="image result.png">

<a id='8'></a>
# Detect Objects in a Video

In [15]:
vid = cv2.VideoCapture('test.mp4')
while True:
    _, img = vid.read()
    height, width, channels = img.shape
    blob = cv2.dnn.blobFromImage(img, 1/255, (416, 416), (0,0,0), swapRB=True, crop=False)
    net.setInput(blob) # set the input from the blob into the network
    output_layers_names = net.getUnconnectedOutLayersNames() # get the output layers names
    layersOutput = net.forward(output_layers_names) # passing output layers names to forward network function we will get the output from this funciton
    boundary_boxes = []
    probabilities = []
    predicted_classes = []
    for output in layersOutput: # extract all the information from the layers output
        for detection in output: # extract the information from each of the outputs
            scores = detection[5:] # store all the acting classes predictions 
            class_id = np.argmax(scores) # store the locations that contains the higher scores
            probability = scores[class_id] # extract the higher scores,
            # bec. we want to make sure that thier their predictions has a confidence that is high enough to consider that the object has been detected
            if probability > 0.5:
                center_x = int(detection[0]*width) # scale it back
                center_y = int(detection[1]*height)
                w = int(detection[2]*width)
                h = int(detection[3]*height)
                # bec. yolo predicts the results with the center of the bounding boxes
                # extract the upper left cornor position
                x = int(center_x- w/2)
                y = int(center_y- h/2)
                boundary_boxes.append([x,y,w,h])
                probabilities.append((float(probability)))
                predicted_classes.append(class_id)
    indexes = cv2.dnn.NMSBoxes(boundary_boxes, probabilities, 0.5, 0.4)
    font = cv2.FONT_HERSHEY_PLAIN
    colors = np.random.uniform(0, 255, size=(len(boundary_boxes), 3))
    if len(indexes)>0:
        for i in indexes.flatten():
            x,y,w,h = boundary_boxes[i]
            label = str(classes[predicted_classes[i]])
            probability = str(round(probabilities[i], 2))
            color = colors[i]
            cv2.rectangle(img, (x,y), (x+w, y+h), color, 2)
            cv2.putText(img, label + " " + probability, (x, y+20), font, 2, (255,255,255), 2)
    cv2.imshow('Image', img)
    key = cv2.waitKey(1)
    if key == ord('q'):
        break
vid.release()
cv2.destroyAllWindows()
    
    

<a id='9'></a>
# Detect Object Webcam

In [16]:
vid = cv2.VideoCapture(0)
while True:
    _, img = vid.read()
    height, width, channels = img.shape
    blob = cv2.dnn.blobFromImage(img, 1/255, (416, 416), (0,0,0), swapRB=True, crop=False)
    net.setInput(blob) # set the input from the blob into the network
    output_layers_names = net.getUnconnectedOutLayersNames() # get the output layers names
    layersOutput = net.forward(output_layers_names) # passing output layers names to forward network function we will get the output from this funciton
    boundary_boxes = []
    probabilities = []
    predicted_classes = []
    for output in layersOutput: # extract all the information from the layers output
        for detection in output: # extract the information from each of the outputs
            scores = detection[5:] # store all the acting classes predictions 
            class_id = np.argmax(scores) # store the locations that contains the higher scores
            probability = scores[class_id] # extract the higher scores,
            # bec. we want to make sure that thier their predictions has a confidence that is high enough to consider that the object has been detected
            if probability > 0.5:
                center_x = int(detection[0]*width) # scale it back
                center_y = int(detection[1]*height)
                w = int(detection[2]*width)
                h = int(detection[3]*height)
                # bec. yolo predicts the results with the center of the bounding boxes
                # extract the upper left cornor position
                x = int(center_x- w/2)
                y = int(center_y- h/2)
                boundary_boxes.append([x,y,w,h])
                probabilities.append((float(probability)))
                predicted_classes.append(class_id)
    indexes = cv2.dnn.NMSBoxes(boundary_boxes, probabilities, 0.5, 0.4)
    font = cv2.FONT_HERSHEY_PLAIN
    colors = np.random.uniform(0, 255, size=(len(boundary_boxes), 3))
    if len(indexes)>0:
        for i in indexes.flatten():
            x,y,w,h = boundary_boxes[i]
            label = str(classes[predicted_classes[i]])
            probability = str(round(probabilities[i], 2))
            color = colors[i]
            cv2.rectangle(img, (x,y), (x+w, y+h), color, 2)
            cv2.putText(img, label + " " + probability, (x, y+20), font, 2, (255,255,255), 2)
    cv2.imshow('Image', img)
    key = cv2.waitKey(100)
    if key == ord('q'):
        break
vid.release()
cv2.destroyAllWindows()
    

In [17]:
#for b in blob:
    #for n, img_blob in enumerate(b):
        #cv2.imshow(str(n), img_blob)
