In [None]:
import tensorflow as tf
import keras
from keras.callbacks import Callback, ModelCheckpoint
from keras.engine.network import Network
from keras.layers import Activation, BatchNormalization, Conv2D, Dense, Dropout, Flatten, Input, MaxPooling2D
from keras.layers.pooling import GlobalAveragePooling2D
from keras.models import Model

from keras.applications.resnet50 import ResNet50
from keras_vggface.vggface import VGGFace

import cv2
import math
import numpy as np
import os
import sys

from IPython.display import clear_output
from joblib import Parallel, delayed

sys.path.append('../face_detection/')
from utils import label_map_util
from utils import visualization_utils_color as vis_util
from tensorflow_face_detector import TensoflowFaceDector

In [None]:
INPUT_SIZE = 224
NUM_CLASS = 21

In [None]:
names = {
    0: "井口眞緒",
    1: "潮紗理菜",
    2: "柿崎芽実",
    3: "影山優佳",
    4: "加藤史帆",
    5: "齊藤京子",
    6: "佐々木久美",
    7: "佐々木美玲",
    8: "高瀬愛奈" ,
    9: "高本彩花" ,
    10: "東村芽依",
    11: "金村美玖",
    12: "河田陽菜",
    13: "小坂菜緒",
    14: "富田鈴花",
    15: "丹生明里",
    16: "濱岸ひより",
    17: "松田好花",
    18: "宮田愛萌",
    19: "渡邉美穂",
    20: "上村ひなの",
}

In [None]:
# データ解析
def read_image(images):     
    image = tf.cast(images, tf.float32)
    image = image / 255  # 画像データを、0～1の範囲に変換する
    image = tf.reshape(image, [INPUT_SIZE, INPUT_SIZE, 3])
     
    return image

In [None]:
def detect_n_faces(imageFile):
    image = cv2.imread(imageFile)
    faceDetectImage = image.copy()
    [h, w] = faceDetectImage.shape[:2]

    (boxes, scores, classes, num_detections) = tDetector.run(faceDetectImage)

    faceBoxes = vis_util.visualize_boxes_and_labels_on_image_array(
        faceDetectImage,
        np.squeeze(boxes),
        np.squeeze(classes).astype(np.int32),
        np.squeeze(scores),
        category_index,
        use_normalized_coordinates=True,
        line_thickness=4)
    
    personCount = len(faceBoxes)
        
    imageHeight, imageWidth = image.shape[:2]
    
    originBoxes = []
    cropBoxes = []
    
    for index, box in enumerate(faceBoxes):
        ymin, xmin, ymax, xmax = box
        (left, right, top, bottom) = (int(xmin * imageWidth), int(xmax * imageWidth), int(ymin * imageHeight), int(ymax * imageHeight))
        originBoxes.append((left, right, top, bottom))  # TensorFlowの顔検出で切り取った領域
        
        cropWidth = right - left
        cropHeight = bottom - top
        
        # 長辺に合わせる
        if cropHeight > cropWidth:
            diff  = (cropHeight - cropWidth) / 2
            if int(left - diff) < 0 or int(right + diff) > imageWidth:
                top = int(top + diff)
                bottom = int(bottom - diff)
            else:
                left = int(left - diff)
                right = int(right + diff)
        else:
            diff = (cropWidth - cropHeight) / 2
            if int(top - diff) < 0 or int(bottom + diff) > imageHeight:
                left = int(left + diff)
                right = int(right - diff)
            else:
                top = int(top - diff)
                bottom = int(bottom + diff)
        
        cropBoxes.append((left, right, top, bottom))   # 顔検出に用いる正方形領域
        
    return originBoxes, cropBoxes

In [None]:
def cropImage(imageFile):   
    sourceImage = cv2.imread(imageFile)
    boxedImage = sourceImage.copy() 
    
    originBoxes, cropBoxes = detect_n_faces(imageFile)
    
    if len(cropBoxes) == 0:
        return

    cropImages = []
    
    for box in cropBoxes:
        (left, right, top, bottom) = box
        cropImages.append(sourceImage[top:bottom, left:right])
        cv2.rectangle(boxedImage, (left, top), (right, bottom), (255, 0, 0), thickness=3)

    imgs = np.empty((0, INPUT_SIZE, INPUT_SIZE, 3)) #empty dummy array, we will append to this array all the images
    for img in cropImages:
        img = cv2.resize(img, dsize=(INPUT_SIZE, INPUT_SIZE), interpolation=cv2.INTER_LANCZOS4)
        img = img[:, :, ::-1].copy()
        imgs = np.append(imgs, np.array(img).reshape((1, INPUT_SIZE, INPUT_SIZE, 3)), axis=0)
        
    imgs = imgs / 255 # 正規化(しないと正しくなくなる)

    return boxedImage, cropImages, imgs

In [None]:
# http://kazuhito00.hatenablog.com/entry/2018/06/20/025715
from PIL import ImageFont, ImageDraw
from PIL import Image as PILImage

class CvPutJaText:
    
    def __init__(self):
        pass

    @classmethod
    def puttext(cls, cv_image, text, point, font_path, font_size, color=(0,0,0)):
        font = ImageFont.truetype(font_path, font_size)
        
        cv_rgb_image = cv2.cvtColor(cv_image, cv2.COLOR_BGR2RGB)
        pil_image = PILImage.fromarray(cv_rgb_image)
        
        draw = ImageDraw.Draw(pil_image)
        draw.text(point, text, fill=color, font=font)
        
        cv_rgb_result_image = np.asarray(pil_image)
        cv_bgr_result_image = cv2.cvtColor(cv_rgb_result_image, cv2.COLOR_RGB2BGR)

        return cv_bgr_result_image

In [None]:
from IPython.display import display, Image

def display_cv_image(image, format='.jpg'):
    decoded_bytes = cv2.imencode(format, image)[1].tobytes()
    display(Image(data=decoded_bytes))

In [None]:
# Path to frozen detection graph. This is the actual model that is used for the object detection.
PATH_TO_CKPT = '../face_detection/model/frozen_inference_graph_face.pb'
# List of the strings that is used to add correct label for each box.
PATH_TO_LABELS = '../face_detection/protos/face_label_map.pbtxt'
NUM_CLASSES = 2

label_map = label_map_util.load_labelmap(PATH_TO_LABELS)
categories = label_map_util.convert_label_map_to_categories(label_map, max_num_classes=NUM_CLASSES, use_display_name=True)
category_index = label_map_util.create_category_index(categories)

tDetector = TensoflowFaceDector(PATH_TO_CKPT)# モデルのロード

In [None]:
inputShape = (INPUT_SIZE, INPUT_SIZE, 3)
inputTensor = Input(shape=inputShape)

# モデル構築 Resnet50 face
ResNet50Model = VGGFace(model='resnet50', weights='vggface', include_top=False, input_tensor=inputTensor, input_shape=inputShape)
flat = GlobalAveragePooling2D()(ResNet50Model.output)
dropout = Dropout(0)(flat)
output = Dense(NUM_CLASS, activation='softmax')(dropout)

commonNetwork = Network(inputTensor, output)

output = commonNetwork(inputTensor)
model = Model(inputs=inputTensor, outputs=output)

In [None]:
# vggface ResNet50
trainStartLayerName = 'activation_40'

setTrainable = False
for layer in model.get_layer("network_1").layers:
    if layer.name == trainStartLayerName:
        setTrainable = True
    layer.trainable = setTrainable
    
weightPath = 'recognition_model/weights.hdf5'
model.load_weights(weightPath)

In [None]:
imageFIle = "images/music_station.jpg"
boxedImage, cropedImages, imgs = cropImage(imageFIle)

estimationResult = model.predict(x=imgs, steps=1)

In [None]:
# 真っ白な画像をつくる
startY = 12
startX = 36
DISP_SIZE = 128
heightDivide = 210
widthDivide = 200
fontSize = 15

height = int(heightDivide * np.ceil(len(cropedImages) / 5))
width = 1000
resultImage = np.zeros((height, width, 3), np.uint8)

for h in range(0, height):
    for w in range(0, width):
        resultImage[h, w] = [255, 255, 255]
        
# 切り取った画像と識別結果を描画
fontPath = './font/ipaexg.ttf'

for i in range (0, len(cropedImages)):
    q, mod = divmod(i, 5)
    resizeImage = cv2.resize(cropedImages[i], dsize=(128, 128), interpolation=cv2.INTER_LANCZOS4)
    imgX = startX + widthDivide * mod
    imgY = startY + heightDivide * q
    resultImage[imgY : imgY + DISP_SIZE, imgX : imgX + DISP_SIZE] = resizeImage
    
    # https://gist.github.com/naoyashiga/8f8a215932e881a3f9ec85e45d499e99
    # 上位K件のインデックス
    K = 3
    unsortedMaxIndices = np.argpartition(-estimationResult[i], K)[:K]
    
    topK = estimationResult[i][unsortedMaxIndices]
    indices = np.argsort(-topK)
    
    maxKIndices = unsortedMaxIndices[indices]
    
    for index, m in enumerate(maxKIndices):
        str = names[m] + ": " + '{:.4f}'.format(estimationResult[i][m])
        resultImage = CvPutJaText.puttext(resultImage, str,
                                        (imgX, imgY + DISP_SIZE + 10 + fontSize * index), fontPath, fontSize, (0, 0, 0))
        

display_cv_image(resultImage)

In [None]:
display_cv_image(boxedImage)