In [1]:
import tensorflow as tf
import sys
import cv2
import numpy as np
from glob import glob
import pandas as pd
import os
import math
import scipy as sp
from PIL import Image
from models import Yolov4

# Tensorflow
from tensorflow.keras import models, layers, Model
from tensorflow.keras.models import Sequential, load_model
from tensorflow.keras.layers import Conv2D, MaxPooling2D
from tensorflow.keras.layers import Dense, Dropout, Flatten, GlobalAveragePooling2D
from tensorflow.keras.layers import Flatten, Dense, Dropout, ZeroPadding2D

from tensorflow.keras.preprocessing.image import ImageDataGenerator
from keras.preprocessing import image

from tensorflow.keras.callbacks import ModelCheckpoint, TensorBoard, ReduceLROnPlateau
from tensorflow.keras import optimizers
from tensorflow.keras.optimizers import SGD
from tensorflow.keras.models import Model
from tensorflow.keras.preprocessing import image
from tensorflow.keras.applications import MobileNetV2, imagenet_utils
from sklearn.metrics import confusion_matrix

import scikitplot as skplt
from sklearn.metrics import roc_auc_score
import matplotlib.pyplot as plt


In [2]:
%load_ext autoreload
%autoreload 2

In [3]:
yolo_model = Yolov4(weight_path='./yolov4.weights',
               class_name_path='./class_names/coco_classes.txt')

nms iou: 0.413 score: 0.3
all weights read
load from ./yolov4.weights


  super(Adam, self).__init__(name, **kwargs)


In [4]:
MobileNetV2Model= tf.keras.applications.MobileNetV2(
    input_shape=(224,224,3),
    include_top=False,
    weights="imagenet",
    input_tensor=None,
    pooling=None)
x = GlobalAveragePooling2D()(MobileNetV2Model.output)
x = Flatten()(x)
x = Dense(256, activation="relu")(x)
predictions = Dense(3, activation='softmax', name = "output_node")(x)
# Input ~ Output 연결해주기
model = Model(inputs=MobileNetV2Model.input, outputs=predictions)

soft_model = load_model('../MobileNetV2-107-0.2315-0.9500.hdf5')
# sig_model = load_model('../MobileNetV2-089-0.3565-0.9250.hdf5')

In [5]:
# 이미지 전처리
def prepare_image(img):
    resize_img = cv2.resize(img, (224,224))
    # img_array = image.img_to_array(resize_img)
    img_array_expanded_dims = np.expand_dims(resize_img, axis=0)
    return tf.keras.applications.mobilenet_v2.preprocess_input(img_array_expanded_dims)

# classification 결과값 가져오기
def get_results(pred):
    class_num = pred.argmax(axis=-1)
    score = pred.max()
    
    if class_num == 0:
        class_txt = "leash"
    elif class_num == 1:
        class_txt = "muzzle"
    else:
        class_txt = "nothing"
    
    return class_num, score, class_txt

# 사진에 bbox 그려주기
def draw_bbox(img, score, class_txt, df, idx):
    scale = max(img.shape[0:2]) / 416
    line_width = int(2 * scale)

    color = list(np.random.random(size=3) * 255)
    cv2.rectangle(img, (int(df.iloc[idx].x1), int(df.iloc[idx].y1)), (int(df.iloc[idx].x2), int(df.iloc[idx].y2)), color, line_width)
    
    text = f'{class_txt} {score:.2f}'
    font = cv2.FONT_HERSHEY_DUPLEX
    font_scale = max(0.3 * scale, 0.3)
    thickness = max(int(1 * scale), 1)
    (text_width, text_height) = cv2.getTextSize(text, font, fontScale=font_scale, thickness=thickness)[0]
    
    cv2.rectangle(img, (int(df.iloc[idx].x1) - line_width//2, int(df.iloc[idx].y1) - text_height), (int(df.iloc[idx].x1) + text_width, int(df.iloc[idx].y1)), color, cv2.FILLED)
    cv2.putText(img, text, (int(df.iloc[idx].x1), int(df.iloc[idx].y1)), font, font_scale, (255, 255, 255), thickness, cv2.LINE_AA)

# 강아지 location값 가져오기
def get_location(model, frame):
    df = model.predict_img(frame)
    
    # set threshold
    df = df[df.score>0.7] 
    return df

# classification 진행
def start_classficiation(model, img, dog_img, df, idx=0):
    input_img = prepare_image(dog_img)
    
    pred = model.predict(input_img)[0]
    
    class_num, score, class_txt = get_results(pred)
    
    img = draw_bbox(img, score, class_txt, df, idx)
    
    return img

# 전체 결과 subplot에 저장
def show_imgs(img_list, result_img, i):
    result_img = cv2.cvtColor(result_img, cv2.COLOR_BGR2RGB)
    plt.figure(figsize=(10, 10*len(img_list)))
    plt.subplot(len(img_list)+1, 1, i+1)
    plt.imshow(result_img)
    plt.axis('off')

In [6]:
cap = cv2.VideoCapture('../video2.mkv')

if not cap.isOpened():
    print("Video open failed!")
    sys.exit()
    
fps = cap.get(cv2.CAP_PROP_FPS)
print('FPS: ', fps)

delay = round(1000 / fps)
color = list(np.random.random(size=3) * 255)

fourcc = cv2.VideoWriter_fourcc(*'DIVX') # *'DIVX' == 'D', 'I', 'V', 'X'
out = cv2.VideoWriter('output.avi', fourcc, 30, (round(cap.get(cv2.CAP_PROP_FRAME_WIDTH)), round(cap.get(cv2.CAP_PROP_FRAME_HEIGHT))))
  
while True:
    ret, frame = cap.read()
    
    if not ret:
        break

    df = yolo_model.predict_img(frame)

    if len(df) == 0:
        pass
    
    else:
        for i in range(len(df)):
            dog_img = frame[int(df.iloc[i].y1) : int(df.iloc[i].y2), int(df.iloc[i].x1) : int(df.iloc[i].x2), :]
            
            dog_img = cv2.resize(dog_img, (224, 224))
            img_array_expanded_dims = np.expand_dims(dog_img, axis=0)
            resize_frame = tf.keras.applications.mobilenet_v2.preprocess_input(img_array_expanded_dims)
            
            pred = soft_model.predict(resize_frame)[0]
            
            class_num, score, class_txt = get_results(pred)
            
            scale = max(frame.shape[0:2]) / 416
            line_width = int(2 * scale)
            
            cv2.rectangle(frame, (int(df.iloc[i].x1), int(df.iloc[i].y1)), (int(df.iloc[i].x2), int(df.iloc[i].y2)), color, line_width)

            text = f'{class_txt} {score:.2f}'
            font = cv2.FONT_HERSHEY_DUPLEX
            font_scale = max(0.3 * scale, 0.3)
            thickness = max(int(1 * scale), 1)
            (text_width, text_height) = cv2.getTextSize(text, font, fontScale=font_scale, thickness=thickness)[0]            
            cv2.rectangle(frame, (int(df.iloc[i].x1) - line_width//2, int(df.iloc[i].y1) - text_height), (int(df.iloc[i].x1) + text_width, int(df.iloc[i].y1)), color, cv2.FILLED)
            cv2.putText(frame, text, (int(df.iloc[i].x1), int(df.iloc[i].y1)), font, font_scale, (255, 255, 255), thickness, cv2.LINE_AA)

    # else:
    #     dog_img = frame[int(df.y1) : int(df.y2) , int(df.x1) : int(df.x2), :]
        
    #     #이미지 전처리
    #     dog_img = cv2.resize(dog_img, (224,224))
    #     img_array_expanded_dims = np.expand_dims(dog_img, axis=0)
    #     resize_frame = tf.keras.applications.mobilenet_v2.preprocess_input(img_array_expanded_dims)
    
    #     pred = soft_model.predict(resize_frame)[0]
        
    #     class_num, score, class_txt = get_results(pred)
        
    #     # bbox 그리기
    #     scale = max(frame.shape[0:2]) / 416
    #     line_width = int(2 * scale)

        
    #     cv2.rectangle(frame, (int(df.iloc[0].x1), int(df.iloc[0].y1)), (int(df.iloc[0].x2), int(df.iloc[0].y2)), color, line_width)

    #     text = f'{class_txt} {score:.2f}'
    #     font = cv2.FONT_HERSHEY_DUPLEX
    #     font_scale = max(0.3 * scale, 0.3)
    #     thickness = max(int(1 * scale), 1)
    #     (text_width, text_height) = cv2.getTextSize(text, font, fontScale=font_scale, thickness=thickness)[0]            
    #     cv2.rectangle(frame, (int(df.iloc[0].x1) - line_width//2, int(df.iloc[0].y1) - text_height), (int(df.iloc[0].x1) + text_width, int(df.iloc[0].y1)), color, cv2.FILLED)
    #     cv2.putText(frame, text, (int(df.iloc[0].x1), int(df.iloc[0].y1)), font, font_scale, (255, 255, 255), thickness, cv2.LINE_AA)

    cv2.imshow('frame', frame)
    out.write(frame)
    

    
    if cv2.waitKey(1) == 27:
        break
    
cap.release()
cv2.destroyAllWindows()

FPS:  1000.0
img shape:  (720, 1280, 3)
# of bboxes: 4
img shape:  (720, 1280, 3)
# of bboxes: 4
img shape:  (720, 1280, 3)
# of bboxes: 5
img shape:  (720, 1280, 3)
# of bboxes: 2
img shape:  (720, 1280, 3)
# of bboxes: 4
img shape:  (720, 1280, 3)
# of bboxes: 2
img shape:  (720, 1280, 3)
# of bboxes: 2
img shape:  (720, 1280, 3)
# of bboxes: 2
img shape:  (720, 1280, 3)
# of bboxes: 3
img shape:  (720, 1280, 3)
# of bboxes: 3
img shape:  (720, 1280, 3)
# of bboxes: 1
img shape:  (720, 1280, 3)
# of bboxes: 3
img shape:  (720, 1280, 3)
# of bboxes: 4
img shape:  (720, 1280, 3)
# of bboxes: 2
img shape:  (720, 1280, 3)
# of bboxes: 3
img shape:  (720, 1280, 3)
# of bboxes: 4
img shape:  (720, 1280, 3)
# of bboxes: 2
img shape:  (720, 1280, 3)
# of bboxes: 4
img shape:  (720, 1280, 3)
# of bboxes: 3
img shape:  (720, 1280, 3)
# of bboxes: 2
img shape:  (720, 1280, 3)
# of bboxes: 2
img shape:  (720, 1280, 3)
# of bboxes: 3
img shape:  (720, 1280, 3)
# of bboxes: 4
img shape:  (720, 128