### OpenCV DNN을 이용하여 SSD 기반 Object Detection 수행
* Tensorflow 에서 Pretrained 된 모델 파일을 OpenCV에서 로드하여 이미지와 영상에 대한 Object Detection 수행.
* SSD+Inception과 SSD+MobileNet v3 를 모두 테스트
* CPU기반 환경에서 SSD의 Inference 속도 주시. 

#### dnn에서 readNetFromTensorflow()로 tensorflow inference 모델을 로딩

In [3]:
import cv2

cv_net = cv2.dnn.readNetFromTensorflow('./pretrained/ssd_inception_v2_coco_2017_11_17/frozen_inference_graph.pb',
                                      './pretrained/ssd_config_01.pbtxt')

#### coco 데이터 세트의 클래스id별 클래스명 지정. 

In [4]:
labels_to_names = {1:'person',2:'bicycle',3:'car',4:'motorcycle',5:'airplane',6:'bus',7:'train',8:'truck',9:'boat',10:'traffic light',
                    11:'fire hydrant',12:'street sign',13:'stop sign',14:'parking meter',15:'bench',16:'bird',17:'cat',18:'dog',19:'horse',20:'sheep',
                    21:'cow',22:'elephant',23:'bear',24:'zebra',25:'giraffe',26:'hat',27:'backpack',28:'umbrella',29:'shoe',30:'eye glasses',
                    31:'handbag',32:'tie',33:'suitcase',34:'frisbee',35:'skis',36:'snowboard',37:'sports ball',38:'kite',39:'baseball bat',40:'baseball glove',
                    41:'skateboard',42:'surfboard',43:'tennis racket',44:'bottle',45:'plate',46:'wine glass',47:'cup',48:'fork',49:'knife',50:'spoon',
                    51:'bowl',52:'banana',53:'apple',54:'sandwich',55:'orange',56:'broccoli',57:'carrot',58:'hot dog',59:'pizza',60:'donut',
                    61:'cake',62:'chair',63:'couch',64:'potted plant',65:'bed',66:'mirror',67:'dining table',68:'window',69:'desk',70:'toilet',
                    71:'door',72:'tv',73:'laptop',74:'mouse',75:'remote',76:'keyboard',77:'cell phone',78:'microwave',79:'oven',80:'toaster',
                    81:'sink',82:'refrigerator',83:'blender',84:'book',85:'clock',86:'vase',87:'scissors',88:'teddy bear',89:'hair drier',90:'toothbrush',
                    91:'hair brush'}


#### 단일 이미지의 object detection을 함수로 생성

In [8]:
import time
from PIL import Image
import imageio
import numpy as np
def get_detected_img(cv_net, img_array,cnt, query_img,video, score_threshold , is_print=True ):
    query_img =cv2.imread(query_img)
    hsv = cv2.cvtColor(query_img, cv2.COLOR_BGR2HSV)
    hist = cv2.calcHist([hsv], [0,1], None, [180,256], [0,180,0, 256])
    imgs=[]
    hists =[]
    query = hist
    rows = img_array.shape[0]
    cols = img_array.shape[1]
    #print(cnt)
    draw_img = img_array.copy()
    methods = {'CORREL' :cv2.HISTCMP_CORREL, 'CHISQR':cv2.HISTCMP_CHISQR,'INTERSECT':cv2.HISTCMP_INTERSECT,'BHATTACHARYYA':cv2.HISTCMP_BHATTACHARYYA}
    cv_net.setInput(cv2.dnn.blobFromImage(img_array, size=(300, 300), swapRB=True, crop=False))
    img_dict ={}
    start = time.time()
    cv_out = cv_net.forward()
    
    green_color=(0, 255, 0)
    red_color=(0, 0, 255)
    count =0
    # detected 된 object들을 iteration 하면서 정보 추출
    for detection in cv_out[0,0,:,:]:
        score = float(detection[2])
        class_id = int(detection[1])
        # detected된 object들의 score가 0.4 이상만 추출
        if score > score_threshold:
            # detected된 object들은 image 크기가 (300, 300)으로 scale된 기준으로 예측되었으므로 다시 원본 이미지 비율로 계산
            left = detection[3] * cols
            top = detection[4] * rows
            right = detection[5] * cols
            bottom = detection[6] * rows
            # labels_to_names 딕셔너리로 class_id값을 클래스명으로 변경. opencv에서는 class_id + 1로 매핑해야함.
            caption = "{}: {:.4f}".format(labels_to_names[class_id], score)
            if class_id ==1:
                
                print(str(left)+ ' '+ str(right)+ ' '+ str(top)+' ', str(bottom))
                #cv2.rectangle()은 인자로 들어온 draw_img에 사각형을 그림. 위치 인자는 반드시 정수형.
                #cv2.rectangle(draw_img, (int(left), int(top)), (int(right), int(bottom)), color=green_color, thickness=2)
                print(img_array.shape)
                
                crop_img = img_array[int(top):int(bottom),int(left):int(right)]
                name = ''
                if '준경' in video:
                    name = '2000'
                elif '주영' in video:
                    name = '2001'
                elif '지석' in video:
                    name = '2002'
                elif '성희' in video:
                    name = '2003'            
                imgs.append(crop_img)
                try:
                    hsv = cv2.cvtColor(crop_img, cv2.COLOR_BGR2HSV)
                except:
                    print('Error')
                else:        
                    hist = cv2.calcHist([hsv], [0,1], None, [180,256], [0,180,0, 256])
                    cv2.normalize(hist, hist, 0, 1, cv2.NORM_MINMAX)
                    hists.append(hist)
                    img_dict[count] = [int(left),int(top),int(right), int(bottom)]
                    cv2.imwrite('./output/'+name+'_'+str(cnt)+'.jpg' , crop_img)
                    #imageio.imwrite('./output/'+str(cnt)+'_'+str(count)+'.jpg' , crop_img)
                    #pil_img = Image.fromarray(crop_img)
                    #pil_img.save('./output/'+name+str(cnt)+'.jpg')
                    count+=1
                    #cv2.putText(draw_img, caption, (int(left), int(top - 5)), cv2.FONT_HERSHEY_SIMPLEX, 0.7, red_color, 2)
                
    """
    values = []
    if count >0:
        for i, (hist, img) in enumerate(zip(hists, imgs)):
            #---④ 각 메서드에 따라 img1과 각 이미지의 히스토그램 비교
            ret = cv2.compareHist(query, hist, 1)
            ret = ret/np.sum(query)
            values.append(ret)       
        if ret > 0.6:
            index = values.index(max(values))  
            left = img_dict[index][0]
            top = img_dict[index][1]
            right = img_dict[index][2]
            bottom = img_dict[index][3]      
            cv2.rectangle(draw_img, (left, right), (right, bottom), color=green_color, thickness=2)       
            #cv2.putText(draw_img, str(ret), (left, top - 5), cv2.FONT_HERSHEY_SIMPLEX, 0.7, red_color, 2)              
            print('check')
    if is_print:
        print('Detection 수행시간:',round(time.time() - start, 2),"초")
    """
    return draw_img

#### VideoCapture와 VideoWriter 설정하고 Video Detection용 전용 함수 생성
* VideoCapture를 이용하여 Video를 frame별로 capture 할 수 있도록 설정
* VideoCapture의 속성을 이용하여 Video Frame의 크기 및 FPS 설정. 
* VideoWriter를 위한 인코딩 코덱 설정 및 영상 write를 위한 설정
총 Frame 별로 iteration 하면서 Object Detection 수행. 개별 frame별로 단일 이미지 Object Detection과 유사 

In [17]:
def do_detected_video(cv_net, input_path, output_path,  score_threshold, is_print, query_img):
    
    cap = cv2.VideoCapture(input_path)

    codec = cv2.VideoWriter_fourcc(*'XVID')

    vid_size = (round(cap.get(cv2.CAP_PROP_FRAME_WIDTH)),round(cap.get(cv2.CAP_PROP_FRAME_HEIGHT)))
    vid_fps = cap.get(cv2.CAP_PROP_FPS)

    vid_writer = cv2.VideoWriter(output_path, codec, vid_fps, vid_size) 

    frame_cnt = int(cap.get(cv2.CAP_PROP_FRAME_COUNT))
    print('총 Frame 갯수:', frame_cnt, )
    cnt =440
    green_color=(0, 255, 0)
    red_color=(0, 0, 255)
    while True:
        hasFrame, img_frame = cap.read()
        if not hasFrame:
            print('더 이상 처리할 frame이 없습니다.')
            break
        
        returned_frame = get_detected_img(cv_net, img_frame, cnt, query_img, input_path , score_threshold=score_threshold, is_print=True)
        cnt+=1
        vid_writer.write(returned_frame)
    # end of while loop

    vid_writer.release()
    cap.release()

In [18]:
do_detected_video(cv_net, '../videos/준경2.mp4', './data/준경2.mp4', 0.8, False, './output/지석.png')

OpenCV: FFMPEG: tag 0x44495658/'XVID' is not supported with codec id 12 and format 'mp4 / MP4 (MPEG-4 Part 14)'
OpenCV: FFMPEG: fallback to use tag 0x7634706d/'mp4v'


총 Frame 갯수: 639
222.71311104297638 736.4311695098877 733.7228965759277  1912.9573059082031
(1920, 1080, 3)
170.2919590473175 723.109302520752 716.751880645752  1907.1723175048828
(1920, 1080, 3)
193.2460731267929 719.6809244155884 712.2924041748047  1914.5931243896484
(1920, 1080, 3)
199.9940711259842 706.048264503479 694.2058753967285  1919.2586517333984
(1920, 1080, 3)
190.8558064699173 699.2278790473938 664.0497207641602  1917.755241394043
(1920, 1080, 3)
163.56555819511414 672.452244758606 673.3303070068359  1918.2154083251953
(1920, 1080, 3)
161.93074107170105 668.2170152664185 650.2456855773926  1915.2516174316406
(1920, 1080, 3)
170.8078122138977 673.901731967926 636.2193489074707  1913.2038116455078
(1920, 1080, 3)
158.56206893920898 671.4919281005859 638.825569152832  1908.4992599487305
(1920, 1080, 3)
181.15122020244598 667.5660109519958 632.9010200500488  1915.3564453125
(1920, 1080, 3)
169.93641078472137 673.8925266265869 623.1038475036621  1911.4617919921875
(1920, 1080, 3

KeyboardInterrupt: 