## 얼굴 사진으로 국적 분류
### England Germany Spain Argentina France
#### using vgg13

In [27]:
'''
Using VGG13 model

# Reference:
- [Very Deep Convolutional Networks for Large-Scale Image Recognition]
'''

from __future__ import absolute_import
from __future__ import division
from __future__ import print_function

import numpy as np
import pandas as pd
import os
import cv2

from sklearn.model_selection import train_test_split

from keras.models import Model
from keras.layers import Flatten, Dense, Input
from keras.layers import Conv2D, MaxPooling2D
from keras.layers import GlobalAveragePooling2D, GlobalMaxPooling2D
from keras.preprocessing import image
from keras.utils import get_file
from keras.utils.layer_utils import convert_all_kernels_in_model
from keras import backend as K
from keras import optimizers
from keras.applications.vgg16 import decode_predictions, preprocess_input

In [28]:
def read_data(fin):
    """ 이미지 파일을 읽어 들여 이미지 데이터, 국가를 리턴"""
    data = pd.read_csv(fin)
    target_li=[]
    data_li=[]
    for i in range(len(data)):
        image_id = int(data.iloc[i][0])
        target_nation = data.iloc[i][3]
        working_dir = fin.split('/')[:-1]
        for i in range(10):
            if i == 0:
                image_path = '/'.join(working_dir)+'/valid_pictures/'+ target_nation +'/{}.png'.format(image_id)
            else:
                image_path = '/'.join(working_dir)+'/valid_pictures/'+ target_nation +'/{}_{}.png'.format(image_id, i)
                
            if (os.path.isfile(image_path)):
                image_data = cv2.imread(image_path)
                data_li.append(image_data)
                target_li.append(target_nation)

    return (np.array(data_li), np.array(target_li))

In [29]:
def create_train_test_data(image_data, label_li):
    #데이터 수, 세로 픽셀, 가로 픽셀 
    n_samples, image_h, image_w, _channel = image_data.shape
    print(image_h)
    print(image_w)
    X = image_data
 
    n_features = X.shape[1]
    y = label_li
    
    #클래스 갯수 = 분류할 국가 수
    n_classes = 5
    
    print("total dataset size:")
    print("n_samples: %d" % n_samples)
    print("n_features: %d" % n_features)
    print("n_classes: %d" % n_classes)
    
    X_train, X_test, y_train, y_test = train_test_split(X, y, test_size=0.25, random_state=42)
    return(X_train, X_test, y_train, y_test)

In [30]:
def VGG13(include_top=True, input_tensor=None, input_shape=None, pooling=None):
    '''
    Instantiate the VGG13 architecture.
    
     # Arguments
        include_top: whether to include the 3 fully-connected
            layers at the top of the network.
        input_tensor: optional Keras tensor (i.e. output of `layers.Input()`)
            to use as image input for the model.
        input_shape: optional shape tuple, only to be specified
            if `include_top` is False (otherwise the input shape
            has to be `(224, 224, 3)`
            (with `channels_last` data format)
            or `(3, 224, 224)` (with `channels_first` data format).
            It should have exactly 3 inputs channels,
            width and height should be no smaller than 32.
            E.g. `(200, 200, 3)` would be one valid value.
        pooling: Optional pooling mode for feature extraction
            when `include_top` is `False`.
            - `None` means that the output of the model will be
                the 4D tensor output of the
                last convolutional block.
            - `avg` means that global average pooling
                will be applied to the output of the
                last convolutional block, and thus
                the output of the model will be a 2D tensor.
            - `max` means that global max pooling will
                be applied.
    # Returns
        A Keras model instance.
        
    '''
    
    # Determine properinput shape
    #
    print(K.image_dim_ordering())
    #
    if K.image_dim_ordering() == 'th':
            input_shape = (3, None, None)
    else:
            input_shape = (None, None, 3)


    if input_tensor is None:
        img_input = Input(shape=input_shape)
    else:
        if not K.is_keras_tensor(input_tensor):
            img_input = Input(tensor=input_tensor, shape=input_shape)
        else:
            img_input = input_tensor
            
    # Block 1
    x = Conv2D(64, (3, 3), activation='relu', padding='same', name='block1_conv1')(img_input)
    x = Conv2D(64, (3, 3), activation='relu', padding='same', name='block1_conv2')(x)
    x = MaxPooling2D((2, 2), strides=(2, 2), name='block1_pool')(x)

    # Block 2
    x = Conv2D(128, (3, 3), activation='relu', padding='same', name='block2_conv1')(x)
    x = Conv2D(128, (3, 3), activation='relu', padding='same', name='block2_conv2')(x)
    x = MaxPooling2D((2, 2), strides=(2, 2), name='block2_pool')(x)

    # Block 3
    x = Conv2D(256, (3, 3), activation='relu', padding='same', name='block3_conv1')(x)
    x = Conv2D(256, (3, 3), activation='relu', padding='same', name='block3_conv2')(x)
    x = MaxPooling2D((2, 2), strides=(2, 2), name='block3_pool')(x)

    # Block 4
    x = Conv2D(512, (3, 3), activation='relu', padding='same', name='block4_conv1')(x)
    x = Conv2D(512, (3, 3), activation='relu', padding='same', name='block4_conv2')(x)
    x = MaxPooling2D((2, 2), strides=(2, 2), name='block4_pool')(x)

    # Block 5
    x = Conv2D(512, (3, 3), activation='relu', padding='same', name='block5_conv1')(x)
    x = Conv2D(512, (3, 3), activation='relu', padding='same', name='block5_conv2')(x)
    x = MaxPooling2D((2, 2), strides=(2, 2), name='block5_pool')(x)
    
    if include_top:
        # Classification block
        x = Flatten(name='flatten')(x)
        x = Dense(4096, activation='relu', name='fc1')(x)
        x = Dense(2048, activation='relu', name='fc2')(x)
        x = Dense(1024, activation='relu', name='fc3')(x)
    else:
        if pooling == 'avg':
            x = GlobalAveragePooling2D()(x)
        elif pooling == 'max':
            x = GlobalMaxPooling2D()(x)
            
    # Create model
    model = Model(img_input, x)
    
    return model

In [32]:
if __name__ == "__main__":
    image_data, label = read_data("../Data/5country.csv")
    
    # one-hot-encoding
    label = pd.get_dummies(label)
    
    # train-test 나눠줌
    x_train, x_test, y_train, y_test = create_train_test_data(image_data, label)
    
    # vgg model 사용
    epochs = 15
    img_width, img_height = 48, 48
    batch_size = 200
    model = VGG13(include_top=True, input_tensor=Input(shape=(img_width, img_height, 3)))

    x = model.output
    # x = Flatten()(x)
    # x = Dense(1024, activation="relu")(x)
    predictions = Dense(5, activation="softmax", name="softmax_prediction")(x)
    
    model_final = Model(input = model.input, output = predictions)
    model_final.compile(loss = "categorical_crossentropy", optimizer = optimizers.Adam(lr=0.0001), metrics=["accuracy"])
    model.summary()
    model_final.fit(x_train, y_train, batch_size = batch_size , epochs=epochs)
  
    test_loss, test_acc = model_final.evaluate(x_test, y_test)

    print('테스트 정확도:', test_acc)

48
48
total dataset size:
n_samples: 48050
n_features: 48
n_classes: 5
tf




_________________________________________________________________
Layer (type)                 Output Shape              Param #   
input_7 (InputLayer)         (None, 48, 48, 3)         0         
_________________________________________________________________
block1_conv1 (Conv2D)        (None, 48, 48, 64)        1792      
_________________________________________________________________
block1_conv2 (Conv2D)        (None, 48, 48, 64)        36928     
_________________________________________________________________
block1_pool (MaxPooling2D)   (None, 24, 24, 64)        0         
_________________________________________________________________
block2_conv1 (Conv2D)        (None, 24, 24, 128)       73856     
_________________________________________________________________
block2_conv2 (Conv2D)        (None, 24, 24, 128)       147584    
_________________________________________________________________
block2_pool (MaxPooling2D)   (None, 12, 12, 128)       0         
__________

In [33]:
if __name__ == "__main__":
    image_data, label = read_data("../Data/5country.csv")
    
    # one-hot-encoding
    label = pd.get_dummies(label)
    
    # train-test 나눠줌
    x_train, x_test, y_train, y_test = create_train_test_data(image_data, label)
    
    # vgg model 사용
    epochs = 15
    img_width, img_height = 48, 48
    batch_size = 200
    model = VGG13(include_top=False, input_tensor=Input(shape=(img_width, img_height, 3)), pooling="avg")

    x = model.output
    # x = Flatten()(x)
    # x = Dense(1024, activation="relu")(x)
    predictions = Dense(5, activation="softmax", name="softmax_prediction")(x)
    
    model_final = Model(input = model.input, output = predictions)
    model_final.compile(loss = "categorical_crossentropy", optimizer = optimizers.Adam(lr=0.0001), metrics=["accuracy"])
    model.summary()
    model_final.fit(x_train, y_train, batch_size = batch_size , epochs=epochs)
  
    test_loss, test_acc = model_final.evaluate(x_test, y_test)

    print('테스트 정확도:', test_acc)

48
48
total dataset size:
n_samples: 48050
n_features: 48
n_classes: 5
tf




_________________________________________________________________
Layer (type)                 Output Shape              Param #   
input_8 (InputLayer)         (None, 48, 48, 3)         0         
_________________________________________________________________
block1_conv1 (Conv2D)        (None, 48, 48, 64)        1792      
_________________________________________________________________
block1_conv2 (Conv2D)        (None, 48, 48, 64)        36928     
_________________________________________________________________
block1_pool (MaxPooling2D)   (None, 24, 24, 64)        0         
_________________________________________________________________
block2_conv1 (Conv2D)        (None, 24, 24, 128)       73856     
_________________________________________________________________
block2_conv2 (Conv2D)        (None, 24, 24, 128)       147584    
_________________________________________________________________
block2_pool (MaxPooling2D)   (None, 12, 12, 128)       0         
__________

KeyboardInterrupt: 