# import libraries

In [7]:
import numpy as np 
from PIL import Image

import cv2 
import os

# import yolov3

In [8]:
net = cv2.dnn.readNet("yolov3.weights","yolov3.cfg")

# names of classes

In [3]:
classes=[]
with open("names.txt", "r") as txt_file:
    classes =txt_file.read().splitlines()

In [4]:
classes

['person',
 'bicycle',
 'car',
 'motorbike',
 'aeroplane',
 'bus',
 'train',
 'truck',
 'boat',
 'traffic light',
 'fire hydrant',
 'stop sign',
 'parking meter',
 'bench',
 'bird',
 'cat',
 'dog',
 'horse',
 'sheep',
 'cow',
 'elephant',
 'bear',
 'zebra',
 'giraffe',
 'backpack',
 'umbrella',
 'handbag',
 'tie',
 'suitcase',
 'frisbee',
 'skis',
 'snowboard',
 'sports ball',
 'kite',
 'baseball bat',
 'baseball glove',
 'skateboard',
 'surfboard',
 'tennis racket',
 'bottle',
 'wine glass',
 'cup',
 'fork',
 'knife',
 'spoon',
 'bowl',
 'banana',
 'apple',
 'sandwich',
 'orange',
 'broccoli',
 'carrot',
 'hot dog',
 'pizza',
 'donut',
 'cake',
 'chair',
 'sofa',
 'pottedplant',
 'bed',
 'diningtable',
 'toilet',
 'tvmonitor',
 'laptop',
 'mouse',
 'remote',
 'keyboard',
 'cell phone',
 'microwave',
 'oven',
 'toaster',
 'sink',
 'refrigerator',
 'book',
 'clock',
 'vase',
 'scissors',
 'teddy bear',
 'hair drier',
 'toothbrush']

# lading target image

In [11]:
img = cv2.imread(r"download.JPG")
img.shape

(275, 183, 3)

In [21]:
height, width, channels = img.shape

In [22]:
#the format which their deep learning models accepts as its inputs
blob = cv2.dnn.blobFromImage(img, 1/255, (416, 416), (0,0,0), swapRB=True, crop=False)

# Detect obj in the image

In [23]:
# set the input from the blob into the network
net.setInput(blob)
# get the output layers names
output_layers_names = net.getUnconnectedOutLayersNames() 
## passing output layers names to forward network
layersOutput = net.forward(output_layers_names) 
boundary_boxes = []
probabilities = []
predicted_classes = []
for output in layersOutput: # extract all the information from the layers output
    for detection in output: # extract the information from each of the outputs
        scores = detection[5:] # store all the acting classes predictions 
        class_id = np.argmax(scores) # store the locations that contains the higher scores
        probability = scores[class_id] # extract the higher scores,
        # bec. we want to make sure that thier their predictions has a confidence that is high enough to consider that the object has been detected
        if probability > 0.5:
            center_x = int(detection[0]*width) # scale it back
            center_y = int(detection[1]*height)
            w = int(detection[2]*width)
            h = int(detection[3]*height)
            # bec. yolo predicts the results with the center of the bounding boxes
            # extract the upper left cornor position
            x = int(center_x- w/2)
            y = int(center_y- h/2)
            boundary_boxes.append([x,y,w,h])
            probabilities.append((float(probability)))
            predicted_classes.append(class_id)

# NMS handling

In [24]:
indexes = cv2.dnn.NMSBoxes(boundary_boxes, probabilities, 0.5, 0.4)

In [25]:
font = cv2.FONT_HERSHEY_PLAIN
colors = np.random.uniform(0, 255, size=(len(boundary_boxes), 3))


# Draw Boundary Boxes

In [26]:
for i in indexes.flatten():
    x,y,w,h = boundary_boxes[i]
    label = str(classes[predicted_classes[i]])
    probability = str(round(probabilities[i], 2))
    color = colors[i]
    cv2.rectangle(img, (x,y), (x+w, y+h), color, 2)
    cv2.putText(img, label + " " + probability, (x, y+20), font, 2, (0,255,0), 2)
cv2.imshow('Image', img)
cv2.waitKey(0)
cv2.destroyAllWindows()

# Detect Vedio

In [12]:
vid = cv2.VideoCapture('Pexels Videos 1721303.mp4')
while True:
    _, img = vid.read()
    
    height, width, channels = img.shape
    blob = cv2.dnn.blobFromImage(img, 1/255, (416, 416), (0,0,0), swapRB=True, crop=False)
    net.setInput(blob) # set the input from the blob into the network
    output_layers_names = net.getUnconnectedOutLayersNames() # get the output layers names
    layersOutput = net.forward(output_layers_names) # passing output layers names to forward network function we will get the output from this funciton
    boundary_boxes = []
    probabilities = []
    predicted_classes = []
    for output in layersOutput: # extract all the information from the layers output
        for detection in output: # extract the information from each of the outputs
            scores = detection[5:] # store all the acting classes predictions 
            class_id = np.argmax(scores) # store the locations that contains the higher scores
            probability = scores[class_id] # extract the higher scores,
            # bec. we want to make sure that thier their predictions has a confidence that is high enough to consider that the object has been detected
            if probability > 0.5:
                center_x = int(detection[0]*width) # scale it back
                center_y = int(detection[1]*height)
                w = int(detection[2]*width)
                h = int(detection[3]*height)
                # bec. yolo predicts the results with the center of the bounding boxes
                # extract the upper left cornor position
                x = int(center_x- w/2)
                y = int(center_y- h/2)
                boundary_boxes.append([x,y,w,h])
                probabilities.append((float(probability)))
                predicted_classes.append(class_id)
    indexes = cv2.dnn.NMSBoxes(boundary_boxes, probabilities, 0.5, 0.4)
    font = cv2.FONT_HERSHEY_PLAIN
    colors = np.random.uniform(0, 255, size=(len(boundary_boxes), 3))
    if len(indexes)>0:
        for i in indexes.flatten():
            x,y,w,h = boundary_boxes[i]
            label = str(classes[predicted_classes[i]])
            probability = str(round(probabilities[i], 2))
            color = colors[i]
            cv2.rectangle(img, (x,y), (x+w, y+h), color, 2)
            cv2.putText(img, label + " " + probability, (x, y+20), font, 2, (255,255,255), 2)
    cv2.imshow('Image', img)
    key = cv2.waitKey(1)
    if key == ord('q'):
        break
vid.release()
cv2.destroyAllWindows()

# Detect in WEBCAMERA

In [14]:
vid = cv2.VideoCapture(0)
while True:
    _, img = vid.read()
    height, width, channels = img.shape
    blob = cv2.dnn.blobFromImage(img, 1/255, (416, 416), (0,0,0), swapRB=True, crop=False)
    net.setInput(blob) # set the input from the blob into the network
    output_layers_names = net.getUnconnectedOutLayersNames() # get the output layers names
    layersOutput = net.forward(output_layers_names) # passing output layers names to forward network function we will get the output from this funciton
    boundary_boxes = []
    probabilities = []
    predicted_classes = []
    for output in layersOutput: # extract all the information from the layers output
        for detection in output: # extract the information from each of the outputs
            scores = detection[5:] # store all the acting classes predictions 
            class_id = np.argmax(scores) # store the locations that contains the higher scores
            probability = scores[class_id] # extract the higher scores,
            # bec. we want to make sure that thier their predictions has a confidence that is high enough to consider that the object has been detected
            if probability > 0.5:
                center_x = int(detection[0]*width) # scale it back
                center_y = int(detection[1]*height)
                w = int(detection[2]*width)
                h = int(detection[3]*height)
                # bec. yolo predicts the results with the center of the bounding boxes
                # extract the upper left cornor position
                x = int(center_x- w/2)
                y = int(center_y- h/2)
                boundary_boxes.append([x,y,w,h])
                probabilities.append((float(probability)))
                predicted_classes.append(class_id)
    indexes = cv2.dnn.NMSBoxes(boundary_boxes, probabilities, 0.5, 0.4)
    font = cv2.FONT_HERSHEY_PLAIN
    colors = np.random.uniform(0, 255, size=(len(boundary_boxes), 3))
    if len(indexes)>0:
        for i in indexes.flatten():
            x,y,w,h = boundary_boxes[i]
            label = str(classes[predicted_classes[i]])
            probability = str(round(probabilities[i], 2))
            color = colors[i]
            cv2.rectangle(img, (x,y), (x+w, y+h), color, 2)
            cv2.putText(img, label + " " + probability, (x, y+20), font, 2, (255,255,255), 2)
    cv2.imshow('Image', img)
    key = cv2.waitKey(100)
    if key == ord('q'):
        break
vid.release()
cv2.destroyAllWindows()

AttributeError: 'NoneType' object has no attribute 'shape'