In [1]:
#https://github.com/meenavyas/Misc/blob/master/ObjectDetectionUsingYolo/ObjectDetectionUsingYolo.ipynb
import cv2
import numpy as np

In [None]:


# 'path to input image/video'
IMAGE='./2.mp4'

# 'path to yolo config file' 
# download https://github.com/arunponnusamy/object-detection-opencv/blob/master/yolov3.cfg
CONFIG='./yolov3.cfg'

# 'path to text file containing class names'
# download https://github.com/arunponnusamy/object-detection-opencv/blob/master/yolov3.txt
CLASSES='./yolov3.txt'

# 'path to yolo pre-trained weights' 
# wget https://pjreddie.com/media/files/yolov3.weights
WEIGHTS='./yolov3.weights'

In [3]:
#check if exist
import os  
print(os.path.exists(CLASSES))
print(os.path.exists(CONFIG))
print(os.path.exists(WEIGHTS))
print(os.path.exists(IMAGE))

True
True
True
True


In [4]:

# read class names from text file
classes = None
with open(CLASSES, 'r') as f:
     classes = [line.strip() for line in f.readlines()]
        
scale = 0.00392
conf_threshold = 0.5
nms_threshold = 0.4

# generate different colors for different classes 
COLORS = np.random.uniform(0, 255, size=(len(classes), 3))

In [5]:
# function to get the output layer names 
# in the architecture
def get_output_layers(net): 
    layer_names = net.getLayerNames()
    output_layers = [layer_names[i[0] - 1] for i in net.getUnconnectedOutLayers()]
    return output_layers

# function to draw bounding box on the detected object with class name
def draw_bounding_box(img, class_id, confidence, x, y, x_plus_w, y_plus_h):
    label = str(classes[class_id])
    color = COLORS[class_id]
    cv2.rectangle(img, (x,y), (x_plus_w,y_plus_h), color, 2)
    cv2.putText(img, label+':'+str(round(confidence,2)), (x-10,y-10), cv2.FONT_HERSHEY_SIMPLEX, 0.5, color, 2)

In [6]:
#https://docs.opencv.org/3.4/d6/d0f/group__dnn.html

def processImage(image,index):

    Width = image.shape[1]
    Height = image.shape[0]

    # read pre-trained model and config file
    net = cv2.dnn.readNet(WEIGHTS, CONFIG)

    # create input blob 
    blob = cv2.dnn.blobFromImage(image, scale, (416,416), (0,0,0), True, crop=False)
    # set input blob for the network
    net.setInput(blob)

    # run inference through the network
    # and gather predictions from output layers
    outs = net.forward(get_output_layers(net))

    # initialization
    class_ids = []
    confidences = []
    boxes = []
    # for each detection from each output layer 
    # get the confidence, class id, bounding box params
    # and ignore weak detections (confidence < 0.5)
    
    #detection = [x, y, w, h, scores]
    for out in outs:
        for detection in out:
            scores = detection[5:] #score sur toutes les classes
            class_id = np.argmax(scores) #class ayant le plus gros score
            confidence = scores[class_id] #score de la classe en question
            if confidence > 0.5:
                center_x = int(detection[0] * Width)
                center_y = int(detection[1] * Height)
                w = int(detection[2] * Width)
                h = int(detection[3] * Height)
                x = center_x - w / 2
                y = center_y - h / 2
                class_ids.append(class_id)
                confidences.append(float(confidence))
                boxes.append([x, y, w, h])
            
    # apply non-max suppression
    indices = cv2.dnn.NMSBoxes(boxes, confidences, conf_threshold, nms_threshold)

    # go through the detections remaining
    # after nms and draw bounding box
    for i in indices:
        i = i[0]
        box = boxes[i]
        x = box[0]
        y = box[1]
        w = box[2]
        h = box[3]
    
        draw_bounding_box(image, class_ids[i], confidences[i], round(x), round(y), round(x+w), round(y+h))
    
    # display output image    
    out_image_name = "image"+str(index)
    #cv2.imshow(out_image_name, image)
    # wait until any key is pressed
    #cv2.waitKey()
     # save output image to disk
    cv2.imwrite("out/"+out_image_name+".jpg", image)

In [7]:
!ls
index = 0
cap = cv2.VideoCapture(IMAGE)
ret, frame = cap.read()
processImage(frame,index)
cv2.destroyAllWindows()

1.mp4  out	 yolov3.cfg    yolov3.txt
2.mp4  outvideo  yolov3.ipynb  yolov3.weights


In [None]:
# open the video file
cap = cv2.VideoCapture(IMAGE)

index = 0
while(cap.isOpened()):
    ret, frame = cap.read()
    processImage(frame,index)
    index = index +1
    
# release resources
cv2.destroyAllWindows()

In [3]:
import cv2
import numpy as np
import os

def frames_to_video(inputpath,outputpath,fps):
   image_array = []
   files = [f for f in os.listdir(inputpath) if os.path.isfile(os.path.join(inputpath, f))] #recuperer noms fichiers
   files.sort(key = lambda x: int(x[5:-4])) #ranger les images dans ordre croissant
   for i in range(len(files)):
       img = cv2.imread(inputpath + files[i]) #lire l'image i
       size =  (img.shape[1],img.shape[0]) #(1920,1080)
       img = cv2.resize(img,size)
       image_array.append(img) #ajouter à liste img
   fourcc = cv2.VideoWriter_fourcc('D', 'I', 'V', 'X')
   out = cv2.VideoWriter(outputpath,fourcc, fps, size)
   for i in range(len(image_array)):
       out.write(image_array[i]) #ecrire img dans flux sortant
   out.release()


inputpath = 'out/'
outpath =  'outvideo/'+IMAGE
fps = 29
frames_to_video(inputpath,outpath,fps)

In [4]:
import matplotlib.pyplot as plt
import matplotlib.image as mpimg
img = mpimg.imread('out/image0.jpg')
plt.imshow(img)

<matplotlib.image.AxesImage at 0x7fa0192bc2e8>