# ソーシャルディスタンスを検知するアプリを作ろう
インテル® OpenVINO™ ツールキットの事前学習済みモデルを使ってソーシャルディスタンスを検知するアプリケーションを作成します。事前学習済みモデルを利用することで簡単にAI機能を含んだアプリケーションを開発できることをご体験ください。

## 第1章 人物検出

### STEP0. 事前学習済みモデルのダウンロード

In [None]:
!python3 $INTEL_OPENVINO_DIR/deployment_tools/tools/model_downloader/downloader.py --name person-detection-retail-0013

### STEP1. モジュールのインポート

In [None]:
import sys
import time
import os
import io

import cv2
import numpy as np
from scipy.spatial import distance

import logging as log
from PIL import Image
import PIL

from munkres import Munkres

from openvino.inference_engine import IENetwork, IECore

import IPython.display
from IPython.display import clear_output

### STEP2. 必要なクラスを定義

In [None]:
#検出したPersonの各種情報を保持しておくためのデータホルダークラス
class data_base_h:
    def __init__(self, position, feature=[], id=-1):
        self.pos = position
        self.feature = feature
        self.time = time.monotonic()
        self.id = id

#Personを検出するためのディープラーニング推論をOpenVINOで実行するためのクラス
class PersonDetector:
    
    #検出した物体がPersonであるか否かを判定するための閾値
    THRESHOLD = 0.3
    
    def __init__(self, iecore, model_path):
        #AI model of human recognition　settings
        self.net_h  = net = iecore.read_network(model = model_path+ ".xml", weights = model_path + ".bin") 
        self.input_name_h  = next(iter(self.net_h.inputs))                     
        self.input_shape_h = self.net_h.inputs[self.input_name_h].shape           
        self.out_name_h    = next(iter(self.net_h.outputs))                    
        self.out_shape_h   = self.net_h.outputs[self.out_name_h].shape  
        self.exec_net_h    = iecore.load_network(self.net_h, 'CPU')
    
    def inference_and_get_person_bbox(self, image):
        #AI model "human recognition　settings" inference
        in_frame = cv2.resize(image, (self.input_shape_h[3], self.input_shape_h[2]))
        in_frame = in_frame.transpose((2, 0, 1))
        in_frame = in_frame.reshape(self.input_shape_h)
        res_h = self.exec_net_h.infer(inputs={self.input_name_h: in_frame})
        person_bbox_list = res_h[self.out_name_h][0][0]
        
        detected_person_list = []
        for person_bbox in person_bbox_list:
            probability = person_bbox[2]
            if probability > PersonDetector.THRESHOLD: 
                frame = image
                xmin = abs(int(person_bbox[3] * frame.shape[1]))
                ymin = abs(int(person_bbox[4] * frame.shape[0]))
                xmax = abs(int(person_bbox[5] * frame.shape[1]))
                ymax = abs(int(person_bbox[6] * frame.shape[0]))

                person_image = frame[ymin:ymax, xmin:xmax]   
                detected_person_list.append(data_base_h([xmin, ymin, xmax, ymax]))
        return detected_person_list

### STEP3. 各種ユーティリティー関数を定義

In [None]:
#検出したPersonにモザイク処理をするための関数
def mosaic_area(img, xmin, ymin, xmax, ymax, ratio=0.05):
    dst = img.copy()
    dst[ymin:ymax,xmin:xmax] = mosaic(dst[ymin:ymax,xmin:xmax], ratio)
    return dst

def mosaic(img, ratio=0.05):
    small = cv2.resize(img, None, fx=ratio, fy=ratio, interpolation=cv2.INTER_NEAREST)
    big = cv2.resize(small, img.shape[:2][::-1], interpolation=cv2.INTER_NEAREST)
    return big


#推論結果を反映した画面出力用の画像（フレーム）を作成する関数
def create_output_image(image, detected_person_list, mosaic=False):
    for detected_person in detected_person_list:
        id = detected_person.id
        color = (0, 0, 0)
        xmin, ymin, xmax, ymax = detected_person.pos
        if mosaic:
            image = mosaic_area(image, xmin, ymin, xmax, ymax)
        cv2.rectangle(image, (xmin, ymin), (xmax, ymax), color, 5)
    font = cv2.FONT_HERSHEY_COMPLEX
    cv2.putText(image, 'PersonDetection', (50, 150), font, 4, (0, 0, 255), 2, cv2.LINE_AA)              
    image = cv2.resize(image, dsize=(600, 360))
    return image

### STEP4. メイン関数を定義

In [None]:
def main(video_file):

    #Instantiate two AI models
    iecore = IECore()
    detector = PersonDetector(iecore, "intel/person-detection-retail-0013/FP32/person-detection-retail-0013")
   
    #Please enter the name of the video file you want to process
    cap = cv2.VideoCapture(video_file)
    
    start_time = time.monotonic()
    
    index = 0 #Index setting
    while cv2.waitKey(1) != 27:
        index += 1
        
        #Read one frame from the video data
        frame_data = cap.read()
        if frame_data[0] == False:
            return
        
        #Set captured frame
        image = frame_data[1]

        #Detect all person's bounding box in the captured frame
        detected_person_list = detector.inference_and_get_person_bbox(image)

        #Window display processing           
        image = create_output_image(image, detected_person_list)
        #cv2.imshow('Frame', image)
        clear_output(wait=True)
        image = cv2.cvtColor(image, cv2.COLOR_BGR2RGB)
        f = io.BytesIO()
        PIL.Image.fromarray(image).save(f, 'jpeg')
        IPython.display.display(IPython.display.Image(data=f.getvalue()))
        
    cv2.destroyAllWindows()

### STEP5. 実行

In [None]:
main('people.264')

---

## 第2章 人物認証

### STEP0. 事前学習済みモデルのダウンロード

In [None]:
!python3 $INTEL_OPENVINO_DIR/deployment_tools/tools/model_downloader/downloader.py --name person-reidentification-retail-0287

### STEP1. 人物認証処理を実行する用のクラスを定義

In [None]:
class PersonIdentifier:
    def __init__(self, iecore, model_path):
        #AI model of personal identification settings
        self.net_p = iecore.read_network(model = model_path + ".xml", weights = model_path + ".bin") 
        self.input_name_p  = next(iter(self.net_p.inputs))                  
        self.input_shape_p = self.net_p.inputs[self.input_name_p].shape        
        self.out_name_p    = next(iter(self.net_p.outputs))                 
        self.out_shape_p   = self.net_p.outputs[self.out_name_p].shape 
        self.exec_net_p    = iecore.load_network(self.net_p, 'CPU')
        
        #Person identification management
        self.id_num = 0
        self.dist_threshold = 1.0
        self.timeout_threshold = 10000
        self.feature_db = []
    
    def inference_and_get_feature(self, obj_img):
        obj_img = cv2.resize(obj_img, (128, 256)) 
        obj_img = obj_img.transpose((2,0,1))
        obj_img = np.expand_dims(obj_img, axis=0)

        #AI model "Personal recognition" inference             
        res_reid = self.exec_net_p.infer(inputs={ self.input_name_p : obj_img}) 
        feature = np.array(res_reid[self.out_name_p]).reshape((256))
        return feature
    
    def register_into_database(self, detected_person_list):
        hangarian = Munkres()
        dist_matrix = [ [ distance.cosine(obj_db.feature, obj_cam.feature) for obj_db in self.feature_db ] for obj_cam in detected_person_list ]
        combination = hangarian.compute(dist_matrix)      
        for idx_obj, idx_db in combination:
            if detected_person_list[idx_obj].id!=-1: 
                continue 
            dist = distance.cosine(detected_person_list[idx_obj].feature, self.feature_db[idx_db].feature)
            if dist < self.dist_threshold:
                self.feature_db[idx_db].time = time.monotonic()            
                detected_person_list[idx_obj].id = self.feature_db[idx_db].id              
        del hangarian
        
        for detected_person in detected_person_list:
            if detected_person.id == -1:
                xmin, ymin, xmax, ymax = detected_person.pos
                detected_person.id = self.id_num
                self.feature_db.append(detected_person)
                self.id_num += 1
        
        for i, db in enumerate(self.feature_db):
            if time.monotonic() - db.time > self.timeout_threshold:
                self.feature_db.pop(i)

### STEP2. 画面表示用の関数を少し修正

In [None]:
def create_output_image_with_id(image, detected_person_list, mosaic=False):
    for detected_person in detected_person_list:
        id = detected_person.id
        color = (0, 0, 0)
        xmin, ymin, xmax, ymax = detected_person.pos
        if mosaic:
            image = mosaic_area(image, xmin, ymin, xmax, ymax)
        cv2.rectangle(image, (xmin, ymin), (xmax, ymax), color, 5)
        cv2.putText(image, str(id), (xmin, ymin - 7), cv2.FONT_HERSHEY_COMPLEX, 1.0, color, 1)
    font = cv2.FONT_HERSHEY_COMPLEX
    cv2.putText(image, 'PersonDetection', (50, 150), font, 4, (0, 0, 255), 2, cv2.LINE_AA)              
    image = cv2.resize(image, dsize=(600, 360))
    return image

### STEP3. メイン関数を定義

In [None]:
def main(video_file):
    #Instantiate two AI models
    iecore = IECore()
    detector = PersonDetector(iecore, "intel/person-detection-retail-0013/FP32/person-detection-retail-0013")
    identifier = PersonIdentifier(iecore, "intel/person-reidentification-retail-0287/FP32/person-reidentification-retail-0287")
       
    #Please enter the name of the video file you want to process
    cap = cv2.VideoCapture(video_file)
    
    start_time = time.monotonic()
    
    index = 0 #Index setting
    while cv2.waitKey(1) != 27:    
        index += 1
        
        frame = cap.read()
        if frame[0] == False:
            return
        
        image = frame[1]

        #AI model "human recognition　settings" inference
        detected_person_list = detector.inference_and_get_person_bbox(image)

        for detected_person in detected_person_list:
            xmin, ymin, xmax, ymax = detected_person.pos
            person_image = image[ymin:ymax, xmin:xmax] 
            feature = identifier.inference_and_get_feature(person_image)    
            detected_person.feature = feature

        #Register vectors into the database
        identifier.register_into_database(detected_person_list)
        
        #Window display processing           
        image = create_output_image_with_id(image, detected_person_list)
        #cv2.imshow('Frame', image)
        clear_output(wait=True)
        image = cv2.cvtColor(image, cv2.COLOR_BGR2RGB)
        f = io.BytesIO()
        PIL.Image.fromarray(image).save(f, 'jpeg')
        IPython.display.display(IPython.display.Image(data=f.getvalue()))
        
    cv2.destroyAllWindows()

### STEP4. 実行

In [None]:
main('people.264')

---

## 第3章 ソーシャルディスタンス検知

### STEP1. ソーシャルディスタンス検知処理用のクラスを定義

In [None]:
class SocialDistanceViolationJudge:
    '''
    Please custom Social_parameter
    SOCIAL_PARAMETER determines Social_Distance.
    But that's not the actual distance. This is the distance between the x and y coordinates on the screen.
    '''
    SOCIAL_PARAMETER = 50.0

    def __init__(self):
        self.violate_t = list()
    
   
    def judge_violation_of_social_distance(self, object_H, index, start_time):
        violate = set() 
        violate_b = set()
        centroids = np.array([[(obj.pos[2]-obj.pos[0])/2+obj.pos[0],(obj.pos[3]-obj.pos[1])/2+obj.pos[1]] for obj in object_H])
        D_1 = distance.cdist(centroids, centroids, metric = "euclidean")        
        for i in range(0, D_1.shape[0]):
            for j in range(i+1, D_1.shape[1]):
                if  (D_1[i,j] != 0.0 and D_1[i, j] < SocialDistanceViolationJudge.SOCIAL_PARAMETER):
                    violate.add(object_H[i].id)
                    violate.add(object_H[j].id)                 
        if index == 1:
           violate_b = violate

        if time.monotonic() - start_time > 30.0:
            start_time =  time.monotonic()
            violate_a = violate
            if len(list(violate_b & violate_a)) % 2== 0:
                self.violate_t += list(violate_b & violate_a)
                self.violate_t = list(set(self.violate_t))
            violate_b = violate
            
        violate = list(violate)
        
        return violate, self.violate_t

### STEP2. 画像出力用のユーティリティ関数も少し修正

In [None]:
#Create output image(frame)
def create_output_image_with_violation(image, detected_person_list, violate, violate_t, mosaic=False):
    for detected_person in detected_person_list:
        id = detected_person.id
        color = (0, 0, 0)
        xmin, ymin, xmax, ymax = detected_person.pos
        if mosaic:
            image = mosaic_area(image, xmin, ymin, xmax, ymax)
        if detected_person.id in violate:
           color = (0, 255, 0)
        if detected_person.id in violate_t:
           color = (0, 0, 255)
        cv2.rectangle(image, (xmin, ymin), (xmax, ymax), color, 5)
    font = cv2.FONT_HERSHEY_COMPLEX
    cv2.putText(image, str(len(violate_t)), (50, 150), font, 4, (0, 0, 255), 2, cv2.LINE_AA)              
    image = cv2.resize(image, dsize=(600, 360))
    return image

### STEP3. メイン関数を定義

In [None]:
def main(video_file):
    
    #Instantiate two AI models
    iecore = IECore()
    detector = PersonDetector(iecore, "intel/person-detection-retail-0013/FP32/person-detection-retail-0013")
    identifier = PersonIdentifier(iecore, "intel/person-reidentification-retail-0287/FP32/person-reidentification-retail-0287")
    
    violation_judger = SocialDistanceViolationJudge()
   
    #Please enter the name of the video file you want to process
    cap = cv2.VideoCapture(video_file)
    
    start_time = time.monotonic()
    
    index = 0 #Index setting
    while cv2.waitKey(1) != 27:    
        index += 1
        
        frame = cap.read()
        if frame[0] == False:
            return
        
        image = frame[1]

        #AI model "human recognition　settings" inference
        detected_person_list = detector.inference_and_get_person_bbox(image)

        for detected_person in detected_person_list:
            xmin, ymin, xmax, ymax = detected_person.pos
            person_image = image[ymin:ymax, xmin:xmax] 
            feature = identifier.inference_and_get_feature(person_image)    
            detected_person.feature = feature

        #Register vectors into the database
        identifier.register_into_database(detected_person_list)
       
        #Judging a violation of Social Distance
        violate, violate_t = violation_judger.judge_violation_of_social_distance(detected_person_list, index, start_time)
        
        #Window display processing           
        image = create_output_image_with_violation(image, detected_person_list, violate, violate_t)
        #cv2.imshow('Frame', image)
        clear_output(wait=True)
        image = cv2.cvtColor(image, cv2.COLOR_BGR2RGB)
        f = io.BytesIO()
        PIL.Image.fromarray(image).save(f, 'jpeg')
        IPython.display.display(IPython.display.Image(data=f.getvalue()))
        
    cv2.destroyAllWindows()

### STEP4. 実行

In [None]:
main('people.264')

---

## おしまい！