In [2]:
import cv2
import numpy as np
#from google.colab.patches import cv2_imshow

## **Image Classification using OpenCV DNN Module**
### Classify animal available in text files classification_classes_ILSVRC2012.txt

In [5]:
with open (r'C:\Users\zenny\source\repos\DeeplearningwithOpenCV_DNNmodule\input\classification_classes_ILSVRC2012.txt','r') as f:
    image_net_names = f.read().split('\n')
class_names = [name.split(',')[0] for name in image_net_names]

In [6]:
#Load neural network model
model = cv2.dnn.readNet(model=r'C:\Users\zenny\source\repos\DeeplearningwithOpenCV_DNNmodule\input\DenseNet_121.caffemodel', 
                        config=r'C:\Users\zenny\source\repos\DeeplearningwithOpenCV_DNNmodule\input\DenseNet_121.prototxt', 
                        framework='Caffe')

In [7]:
image = cv2.imread(r'C:\Users\zenny\source\repos\DeeplearningwithOpenCV_DNNmodule\input\tiger1.jpg')
#creat blob
blob = cv2.dnn.blobFromImage(image=image, scalefactor=0.01, size=(224,224), mean=(104,117,123))

In [8]:
# set the input blob for the neural network
model.setInput(blob)
# forward pass image blog through the model
outputs = model.forward()


In [9]:
final_outputs = outputs[0]
# make all the outputs 1D
final_outputs = final_outputs.reshape(1000, 1)
# get the class label
label_id = np.argmax(final_outputs)

In [10]:
# convert the output scores to softmax probabilities
probs = np.exp(final_outputs) / np.sum(np.exp(final_outputs))
# get the final highest probability
final_prob = np.max(probs) * 100.
final_prob

87.32171654701233

In [13]:
# map the max confidence to the class label names
out_name = class_names[label_id]
out_text = f"{out_name}, {final_prob:.3f}"
# put the class name text on top of the image
cv2.putText(image, out_text, (25, 50), cv2.FONT_HERSHEY_SIMPLEX, 1, (0, 255, 0), 2)
cv2.imshow('image',image)
cv2.waitKey(0)
cv2.imwrite('result_image.jpg', image)


True

# **Object Detection in image using OpenCV DNN**

In [35]:
# load the COCO class names
with open(r'C:\Users\zenny\source\repos\DeeplearningwithOpenCV_DNNmodule\input\object_detection_classes_coco.txt', 'r') as f:
    class_names = f.read().split('\n')
# get a different color array for each of the classes
COLORS = np.random.uniform(0, 255, size=(len(class_names), 3))


In [36]:
# load the DNN model
model = cv2.dnn.readNet(model=r'C:\Users\zenny\source\repos\DeeplearningwithOpenCV_DNNmodule\input\frozen_inference_graph.pb',                
                        config=r'C:\Users\zenny\source\repos\DeeplearningwithOpenCV_DNNmodule\input\ssd_mobilenet_v2_coco_2018_03_29.pbtxt.txt',framework='TensorFlow')

In [37]:
# read the image from disk
image = cv2.imread(r'C:\Users\zenny\source\repos\DeeplearningwithOpenCV_DNNmodule\input\image_2.jpg')
image_height, image_width, _ = image.shape
# create blob from image
blob = cv2.dnn.blobFromImage(image=image, size=(300, 300), mean=(104, 117, 123), swapRB=True)
# set the blob to the model
model.setInput(blob)
# forward pass through the model to carry out the detection
output = model.forward()

In [38]:
#for detection in output[0, 0, :, :]:
  #print(detection)  
print(output[0, 0, :, :].shape)

(100, 7)


The array of detection summary info, name - detection_out, shape - 1, 1, 100, 7 in the format 1, 1, N, 7, where N is the number of detected bounding boxes. For each detection, the description has the format: [image_id, label, conf, x_min, y_min, x_max, y_max], where:

*   **image_id** - ID of the image in the batch



*   **label** - predicted class ID (1..20 - PASCAL VOC defined class ids). Mapping to class names provided by <omz_dir>/data/dataset_classes/voc_20cl_bkgr.txt file.
*   **conf** - confidence for the predicted class

*   **(x_min, y_min)** - coordinates of the top left bounding box corner (coordinates are in normalized format, in range [0, 1])

*   **(x_max, y_max)** - coordinates of the bottom right bounding box corner (coordinates are in normalized format, in range [0, 1])

In [44]:
# loop over each of the detection
for detection in output[0, 0, :, :]:
    '''
    index 1: class label
    index 2: confidence score
    index 3,4,5,6: chứa tọa độ hình hộp x,y và chiều dài chiều rộng
    '''
    # extract the confidence of the detection
    confidence = detection[2]
    # draw bounding boxes only if the detection confidence is above...

    # ... a certain threshold, else skip

    if confidence > .4:

        # get the class id
        class_id = detection[1]

        # map the class id to the class
        class_name = class_names[int(class_id)-1]
        color = COLORS[int(class_id)]

        # get the bounding box coordinates
        box_x = detection[3] * image_width
        box_y = detection[4] * image_height

        # get the bounding box width and height
        box_width = detection[5] * image_width
        box_height = detection[6] * image_height

        # draw a rectangle around each detected object
        cv2.rectangle(image, (int(box_x), int(box_y)), (int(box_width), int(box_height)), color, thickness=2)
        # put the FPS text on top of the frame
        cv2.putText(image, class_name, (int(box_x), int(box_y - 5)), cv2.FONT_HERSHEY_SIMPLEX, 1, color, 2)

cv2.imshow('image',image)
cv2.imwrite('image_result.jpg', image)
cv2.waitKey(0)
cv2.destroyAllWindows()


# **Object Detection in video using OpenCV DNN**

In [49]:
import time


In [50]:
with open(r'C:\Users\zenny\source\repos\DeeplearningwithOpenCV_DNNmodule\input\object_detection_classes_coco.txt','r') as f:
    class_names = f.read().split('\n')
# get a different color array for each of the classes
COLORS = np.random.uniform(0, 255, size=(len(class_names), 3))
model = cv2.dnn.readNet(model=r'C:\Users\zenny\source\repos\DeeplearningwithOpenCV_DNNmodule\input\frozen_inference_graph.pb',                        
                        config=r'C:\Users\zenny\source\repos\DeeplearningwithOpenCV_DNNmodule\input\ssd_mobilenet_v2_coco_2018_03_29.pbtxt.txt',framework='TensorFlow')
cap = cv2.VideoCapture(r'C:\Users\zenny\source\repos\DeeplearningwithOpenCV_DNNmodule\input\video_1.mp4')
frame_width = int(cap.get(3))
frame_height = int(cap.get(4))
# create the `VideoWriter()` object
out = cv2.VideoWriter('video_result.mp4', cv2.VideoWriter_fourcc(*'mp4v'), 30, (frame_width, frame_height))


In [51]:
# detect objects in each frame of the video
while cap.isOpened():
    ret, frame = cap.read()
    if ret:
        image = frame
        image_height, image_width, _ = image.shape
        # create blob from image
        blob = cv2.dnn.blobFromImage(image=image, size=(300, 300), mean=(104, 117, 123), swapRB=True)

        # start time to calculate FPS
        start = time.time()
        model.setInput(blob)
        output = model.forward()       
        # end time after detection
        end = time.time()
        # calculate the FPS for current frame detection
        fps = 1 / (end-start)

        # loop over each of the detections
        for detection in output[0, 0, :, :]:
            # extract the confidence of the detection
            confidence = detection[2]
            # draw bounding boxes only if the detection confidence is above...
            # ... a certain threshold, else skip
            if confidence > .4:
                # get the class id
                class_id = detection[1]
                # map the class id to the class
                class_name = class_names[int(class_id)-1]
                color = COLORS[int(class_id)]
                # get the bounding box coordinates
                box_x = detection[3] * image_width
                box_y = detection[4] * image_height
                # get the bounding box width and height
                box_width = detection[5] * image_width
                box_height = detection[6] * image_height
                # draw a rectangle around each detected object
                cv2.rectangle(image, (int(box_x), int(box_y)), (int(box_width), int(box_height)), color, thickness=2)
                # put the class name text on the detected object
                cv2.putText(image, class_name, (int(box_x), int(box_y - 5)), cv2.FONT_HERSHEY_SIMPLEX, 1, color, 2)
                # put the FPS text on top of the frame
                cv2.putText(image, f"{fps:.2f} FPS", (20, 30), cv2.FONT_HERSHEY_SIMPLEX, 1, (0, 255, 0), 2)

        cv2.imshow('image',image)
        out.write(image)
        if cv2.waitKey(10) & 0xFF == ord('q'):
            break
    else:
        break
cap.release()
cv2.destroyAllWindows()

[[[195 149 109]
  [195 149 109]
  [195 149 109]
  ...
  [ 38  24  36]
  [ 91  77  89]
  [160 146 158]]

 [[195 149 109]
  [195 149 109]
  [195 149 109]
  ...
  [ 38  24  36]
  [ 91  77  89]
  [160 146 158]]

 [[195 149 109]
  [195 149 109]
  [195 149 109]
  ...
  [ 38  24  36]
  [ 91  77  89]
  [160 146 158]]

 ...

 [[ 37  29  24]
  [ 39  31  26]
  [ 42  34  29]
  ...
  [ 91 161 175]
  [ 91 161 175]
  [ 91 161 175]]

 [[ 39  31  26]
  [ 41  33  28]
  [ 44  36  31]
  ...
  [114 138 148]
  [114 138 148]
  [114 138 148]]

 [[ 41  33  28]
  [ 44  36  31]
  [ 45  37  32]
  ...
  [112 136 146]
  [112 136 146]
  [112 136 146]]]
[[[195 149 109]
  [195 149 109]
  [195 149 109]
  ...
  [ 38  24  36]
  [ 91  77  89]
  [160 146 158]]

 [[195 149 109]
  [195 149 109]
  [195 149 109]
  ...
  [ 38  24  36]
  [ 91  77  89]
  [160 146 158]]

 [[195 149 109]
  [195 149 109]
  [195 149 109]
  ...
  [ 38  24  36]
  [ 91  77  89]
  [160 146 158]]

 ...

 [[ 34  26  21]
  [ 35  27  22]
  [ 35  27  22]
  ..

[[[201 153 118]
  [201 153 118]
  [201 153 118]
  ...
  [ 60  48  60]
  [ 55  41  53]
  [ 46  32  44]]

 [[201 153 118]
  [201 153 118]
  [201 153 118]
  ...
  [ 60  48  60]
  [ 55  41  53]
  [ 46  32  44]]

 [[201 153 118]
  [201 153 118]
  [201 153 118]
  ...
  [ 60  48  60]
  [ 55  41  53]
  [ 46  32  44]]

 ...

 [[ 14   9  10]
  [ 14   9  10]
  [ 14   9  10]
  ...
  [107 143 158]
  [107 143 158]
  [107 143 158]]

 [[ 14   9  10]
  [ 14   9  10]
  [ 14   9  10]
  ...
  [129 141 151]
  [129 141 151]
  [129 141 151]]

 [[ 14   9  10]
  [ 14   9  10]
  [ 14   9  10]
  ...
  [138 150 160]
  [138 150 160]
  [138 150 160]]]
[[[199 151 116]
  [199 151 116]
  [199 151 116]
  ...
  [ 59  47  59]
  [ 59  45  57]
  [ 47  33  45]]

 [[199 151 116]
  [199 151 116]
  [199 151 116]
  ...
  [ 59  47  59]
  [ 59  45  57]
  [ 47  33  45]]

 [[199 151 116]
  [199 151 116]
  [199 151 116]
  ...
  [ 58  46  58]
  [ 59  45  57]
  [ 47  33  45]]

 ...

 [[ 14   9  10]
  [ 14   9  10]
  [ 14   9  10]
  ..

[[[190 147 111]
  [190 147 111]
  [190 147 111]
  ...
  [ 52  48  51]
  [ 57  48  52]
  [ 58  49  53]]

 [[190 147 111]
  [190 147 111]
  [190 147 111]
  ...
  [ 52  48  51]
  [ 56  47  51]
  [ 58  49  53]]

 [[190 147 111]
  [190 147 111]
  [190 147 111]
  ...
  [ 52  48  51]
  [ 56  47  51]
  [ 57  48  52]]

 ...

 [[  9   9   9]
  [ 10  10  10]
  [ 10  10  10]
  ...
  [118 117 119]
  [ 97  97  97]
  [ 87  87  87]]

 [[  9   9   9]
  [ 10  10  10]
  [ 10  10  10]
  ...
  [126 126 126]
  [110 110 110]
  [100 100 100]]

 [[  9   9   9]
  [ 10  10  10]
  [ 10  10  10]
  ...
  [149 149 149]
  [139 139 139]
  [129 129 129]]]
[[[190 147 111]
  [190 147 111]
  [190 147 111]
  ...
  [ 52  48  51]
  [ 57  48  52]
  [ 58  49  53]]

 [[190 147 111]
  [190 147 111]
  [190 147 111]
  ...
  [ 52  48  51]
  [ 56  47  51]
  [ 58  49  53]]

 [[190 147 111]
  [190 147 111]
  [190 147 111]
  ...
  [ 52  48  51]
  [ 56  47  51]
  [ 57  48  52]]

 ...

 [[  9   9   9]
  [  9   9   9]
  [  9   9   9]
  ..

[[[177 134  98]
  [177 134  98]
  [177 134  98]
  ...
  [ 54  50  53]
  [ 50  41  45]
  [ 51  42  46]]

 [[177 134  98]
  [177 134  98]
  [177 134  98]
  ...
  [ 54  50  53]
  [ 50  41  45]
  [ 52  43  47]]

 [[177 134  98]
  [177 134  98]
  [177 134  98]
  ...
  [ 54  50  53]
  [ 50  41  45]
  [ 52  43  47]]

 ...

 [[  8   8   8]
  [  8   8   8]
  [  8   5   6]
  ...
  [ 20  15  16]
  [ 21  16  17]
  [ 21  16  17]]

 [[  8   8   8]
  [  8   8   8]
  [  6   6   6]
  ...
  [ 21  16  17]
  [ 21  16  17]
  [ 21  16  17]]

 [[  8   8   8]
  [  8   8   8]
  [  6   6   6]
  ...
  [ 21  16  17]
  [ 21  16  17]
  [ 21  16  17]]]
[[[177 134  98]
  [177 134  98]
  [177 134  98]
  ...
  [ 57  53  56]
  [ 51  42  46]
  [ 51  42  46]]

 [[177 134  98]
  [177 134  98]
  [177 134  98]
  ...
  [ 57  53  56]
  [ 51  42  46]
  [ 51  42  46]]

 [[177 134  98]
  [177 134  98]
  [177 134  98]
  ...
  [ 57  53  56]
  [ 51  42  46]
  [ 51  42  46]]

 ...

 [[  8   8   8]
  [  8   8   8]
  [  8   8   8]
  ..

[[[160 118  80]
  [160 118  80]
  [160 118  80]
  ...
  [ 37  36  38]
  [ 57  51  54]
  [ 57  51  54]]

 [[160 118  80]
  [160 118  80]
  [160 118  80]
  ...
  [ 37  36  38]
  [ 57  51  54]
  [ 57  51  54]]

 [[158 118  80]
  [158 118  80]
  [158 118  80]
  ...
  [ 34  36  38]
  [ 55  51  54]
  [ 55  51  54]]

 ...

 [[ 48  53  53]
  [ 66  71  71]
  [104 109 109]
  ...
  [ 71 125 146]
  [ 71 125 146]
  [ 71 125 146]]

 [[ 35  40  40]
  [ 57  62  62]
  [105 110 110]
  ...
  [125 117 130]
  [125 117 130]
  [125 117 130]]

 [[ 18  23  23]
  [ 50  55  55]
  [105 110 110]
  ...
  [123 115 128]
  [123 115 128]
  [123 115 128]]]
[[[160 118  80]
  [160 118  80]
  [160 118  80]
  ...
  [ 38  37  39]
  [ 56  50  53]
  [ 55  49  52]]

 [[160 118  80]
  [160 118  80]
  [160 118  80]
  ...
  [ 38  37  39]
  [ 56  50  53]
  [ 55  49  52]]

 [[158 118  80]
  [158 118  80]
  [158 118  80]
  ...
  [ 35  37  39]
  [ 54  50  53]
  [ 53  49  52]]

 ...

 [[ 43  35  37]
  [ 33  25  27]
  [ 37  29  31]
  ..