In [1]:
import pandas as pd
import os
import numpy as np
from sklearn.preprocessing import LabelEncoder
from sklearn.model_selection import train_test_split
import cv2
import tensorflow as tf
from tensorflow.keras.models import Sequential
from tensorflow.keras.layers import GlobalAveragePooling2D , BatchNormalization , Dropout , Dense
from tensorflow.keras.callbacks import TensorBoard , ModelCheckpoint , LearningRateScheduler

In [2]:
HOME_TRAIN = False

In [3]:
BATCH_SIZE = 32
DROP_OUT_RATE = 0.2

In [4]:
dataset_info = pd.read_csv("meta_data_face_coor_K-Face.csv")
dataset_info

Unnamed: 0,file_path_x,left,right,top,bottom,연령대,성별,new_gender
0,C:/Users/Moon/Desktop/Age_Gender_Prediction/Da...,289,491,155,402,30대,남,1
1,C:/Users/Moon/Desktop/Age_Gender_Prediction/Da...,377,581,175,392,30대,남,1
2,C:/Users/Moon/Desktop/Age_Gender_Prediction/Da...,313,502,181,401,30대,남,1
3,C:/Users/Moon/Desktop/Age_Gender_Prediction/Da...,394,602,142,407,30대,남,1
4,C:/Users/Moon/Desktop/Age_Gender_Prediction/Da...,373,589,142,403,30대,남,1
...,...,...,...,...,...,...,...,...
365667,C:/Users/Moon/Desktop/Age_Gender_Prediction/Da...,323,635,101,470,20대,여,0
365668,C:/Users/Moon/Desktop/Age_Gender_Prediction/Da...,305,600,101,466,20대,여,0
365669,C:/Users/Moon/Desktop/Age_Gender_Prediction/Da...,282,577,101,467,20대,여,0
365670,C:/Users/Moon/Desktop/Age_Gender_Prediction/Da...,260,557,100,472,20대,여,0


In [5]:
data_file_path = dataset_info[['file_path_x' , 'left' , 'right' , 'top' , 'bottom']]
gender = dataset_info['new_gender'].tolist()

In [6]:
file_path_train, file_path_val, y_train, y_val = train_test_split(data_file_path, gender, 
                                                                  test_size=0.25, 
                                                                  random_state=777, 
                                                                  stratify = gender)

In [7]:
print( len(file_path_train) , len(y_train) , len(file_path_val) , len(y_val) )

274254 274254 91418 91418


In [8]:
train_left = file_path_train['left'].tolist()
train_right = file_path_train['right'].tolist()
train_top = file_path_train['top'].tolist()
train_bottom = file_path_train['bottom'].tolist()
file_path_train = file_path_train['file_path_x'].tolist()

In [9]:
val_left = file_path_val['left'].tolist()
val_right = file_path_val['right'].tolist()
val_top = file_path_val['top'].tolist()
val_bottom = file_path_val['bottom'].tolist()
file_path_val = file_path_val['file_path_x'].tolist()

In [10]:
def load_image( image_path , left , right , top , bottom , label ):
    img = tf.io.read_file(image_path)
    img = tf.image.decode_jpeg(img, channels=3)   
    img = tf.image.crop_to_bounding_box( img , top , left, bottom - top , right - left )
    
    """
    output_image = tf.image.encode_png(img)
    file_name = tf.constant('./Ouput_image.png')
    file = tf.io.write_file(file_name, output_image)    
    """
    
    img = tf.image.resize(img, (224, 224))
    img = tf.keras.applications.resnet50.preprocess_input(img)    
    
    return img , label

In [11]:
train_dataset = tf.data.Dataset.from_tensor_slices( (file_path_train , 
                                                     train_left , 
                                                     train_right , 
                                                     train_top , 
                                                     train_bottom , 
                                                     y_train) )

val_dataset = tf.data.Dataset.from_tensor_slices( (file_path_val , 
                                                   val_left , 
                                                   val_right , 
                                                   val_top , 
                                                   val_bottom ,
                                                   y_val) )

In [12]:
train_dataset = train_dataset.shuffle(buffer_size=len(file_path_train))\
                .map( load_image, num_parallel_calls=tf.data.experimental.AUTOTUNE)\
                .batch(BATCH_SIZE)\
                .prefetch(tf.data.experimental.AUTOTUNE)     #


val_dataset = val_dataset.shuffle(buffer_size=len(file_path_val))\
                .map( load_image, num_parallel_calls=tf.data.experimental.AUTOTUNE)\
                .batch(BATCH_SIZE)\
                .prefetch(tf.data.experimental.AUTOTUNE)    #

In [13]:
ResNet50 = tf.keras.applications.resnet.ResNet50(
    weights=None,
    input_shape=(224, 224, 3),
    include_top=False)

In [14]:
model= Sequential()

model.add( ResNet50 )

model.add( GlobalAveragePooling2D() ) 
model.add( Dropout( DROP_OUT_RATE ) ) 
model.add( BatchNormalization() ) 
model.add( Dense(128, activation='relu') )
model.add( Dropout( DROP_OUT_RATE ) ) 
model.add( BatchNormalization() ) 

model.add( Dense(1, activation='sigmoid') )

In [15]:
initial_learning_rate = 0.01

def lr_exp_decay(epoch, lr):
    k = 0.1
    return initial_learning_rate * np.math.exp(-k*epoch)

lr_scheduler = LearningRateScheduler(lr_exp_decay, verbose=1)

In [16]:
log_dir = os.path.join('Logs')
CHECKPOINT_PATH = os.path.join('CheckPoints_K-Face_Gender')
tb_callback = TensorBoard(log_dir=log_dir)

cp = ModelCheckpoint(filepath=CHECKPOINT_PATH, 
                     monitor='val_accuracy',                     
                     save_best_only = True,
                     verbose = 1)

In [17]:
model.compile(
    optimizer=tf.keras.optimizers.Adam(1e-3),
    loss='binary_crossentropy',
    metrics=['accuracy']
)

In [18]:
hist = model.fit(train_dataset,
                 validation_data=val_dataset,
                 callbacks=[lr_scheduler , cp , tb_callback],
                 epochs = 20,
                 verbose = 1 
)



Epoch 1/20

Epoch 00001: LearningRateScheduler reducing learning rate to 0.01.

Epoch 00001: val_accuracy improved from -inf to 0.94230, saving model to CheckPoints_K-Face_Gender




INFO:tensorflow:Assets written to: CheckPoints_K-Face_Gender\assets
Epoch 2/20

Epoch 00002: LearningRateScheduler reducing learning rate to 0.009048374180359595.

Epoch 00002: val_accuracy improved from 0.94230 to 0.96006, saving model to CheckPoints_K-Face_Gender




INFO:tensorflow:Assets written to: CheckPoints_K-Face_Gender\assets
Epoch 3/20

Epoch 00003: LearningRateScheduler reducing learning rate to 0.008187307530779819.

Epoch 00003: val_accuracy improved from 0.96006 to 0.96133, saving model to CheckPoints_K-Face_Gender




INFO:tensorflow:Assets written to: CheckPoints_K-Face_Gender\assets
Epoch 4/20

Epoch 00004: LearningRateScheduler reducing learning rate to 0.007408182206817179.
 576/8571 [=>............................] - ETA: 19:58 - loss: 0.0208 - accuracy: 0.9924

KeyboardInterrupt: 

# 사용한 Code

In [None]:
def gender_map( gender ):
    if gender == '남':
        return 1
    elif gender == '여':
        return 0

In [None]:
dataset_info['new_gender'] = dataset_info['성별'].map( gender_map )
dataset_info.head()

In [None]:
if HOME_TRAIN == False:
    data_file_path = [c.replace('f:/', 'C:/Users/Moon/Desktop/Age_Gender_Prediction/Dataset/') for c in data_file_path]

In [None]:
def get_ID_2(file_path):
    return file_path[19:]

In [None]:
dataset_info['ID'] = dataset_info['file_path'].apply(get_ID_2)
dataset_info['ID']

In [None]:
dataset_info.info()

In [None]:
face_coor_info = pd.read_csv("coor_211129.csv")

In [None]:
face_coor_info.head()

In [None]:
face_coor_info.info()

In [None]:
def get_ID(file_path):
    return file_path[68:]

In [None]:
face_coor_info['file_path'][0]

In [None]:
face_coor_info['ID'] = face_coor_info['file_path'].apply(get_ID)
face_coor_info['ID']

In [None]:
meta_data_face_coor = pd.merge(face_coor_info , dataset_info , how='left' , on='ID')

In [None]:
meta_data_face_coor.head()

In [None]:
meta_data_face_coor.info()

In [None]:
meta_data_face_coor = meta_data_face_coor.drop(['file_path_y'] , axis=1)

In [None]:
meta_data_face_coor.head()

In [None]:
meta_data_face_coor.isna().sum()

In [None]:
meta_data_face_coor['연령대'].value_counts()

In [None]:
meta_data_face_coor['new_gender'].value_counts()

In [None]:
meta_data_face_coor.to_csv("meta_data_face_coor_K-Face.csv",index=False)