In [1]:
import cv2
import glob
import matplotlib.pyplot as plt
import numpy as np
import os
import pandas as pd
import random
import shutil
import tensorflow as tf

from keras.layers.advanced_activations import LeakyReLU, PReLU
from math import cos, sin, pi
from PIL import Image
from tqdm import tqdm
from tensorflow.keras import Sequential, Model
from tensorflow.keras.callbacks import ModelCheckpoint, EarlyStopping, ReduceLROnPlateau
from tensorflow.keras.layers import Activation, Convolution2D, MaxPooling2D, BatchNormalization, Flatten, Dense, Dropout, Conv2D, ZeroPadding2D, GlobalAveragePooling2D
from tensorflow.keras.models import load_model
from tensorflow.keras.optimizers import Adam
from tensorflow.keras.applications import Xception

In [2]:
AUTOTUNE = tf.data.experimental.AUTOTUNE

In [3]:
# 경로 이동
os.chdir('data/1. open')

In [4]:
os.listdir()

['train.csv',
 'train_imgs.zip',
 'train_df.csv',
 '210325_aug_Xception_04_10.h5',
 'train_imgs',
 '210325_aug_Xception_01_4.h5',
 '210326_aug_Xception_05_1.h5',
 '2_augmentation(8_epochs).csv',
 'test_imgs',
 '3_transferlearning2.csv',
 '210325_aug_Xception_01_3.h5',
 'sample_submission.csv',
 '210325_aug_Xception_03_1.h5',
 'submission_210326_aug_Xception_valacc63%.csv',
 '210325_aug_Xception_03_2.h5',
 '3_transferlearning(7epochs).csv',
 '3_transferlearning1.csv',
 '210325_aug_Xception_02_2.h5',
 '3_transferlearning.csv',
 'saved_model.pb',
 '210325_aug_Xception_01_7.h5',
 '210325_aug_Xception_02_1.h5',
 'val',
 '210325_aug_Xception_01_1.h5',
 'baseline_submission.csv',
 'valid.csv',
 'training.csv',
 'submission_210325_aug_Xception_valacc55%.csv',
 'assets',
 'train',
 'variables',
 '210325_aug_Xception_04_8.h5',
 'test_imgs.zip',
 '210325_aug_Xception_01_2.h5']

In [5]:
# train 데이터 중 10%를 검증 데이터로 사용

# csv 파일 불러오기
data = pd.read_csv('train_df.csv')
submission = pd.read_csv('sample_submission.csv')

# 경로 설정
data_paths = sorted(glob.glob('./train_imgs/*.jpg'))
test_paths = sorted(glob.glob('./test_imgs/*.jpg'))

data['path'] = data_paths

In [6]:
# 데이터 프레임 랜덤하게 분할

# 전체 데이터 중 90%는 학습 데이터 활용
train = data.sample(frac=0.9, random_state=2021)
print('학습 데이터 길이는: ', len(train))

# 전체 데이터 중 10%는 검증 데이터 활용
valid = data.drop(train.index)
print('검증 데이터 길이는: ', len(valid))

학습 데이터 길이는:  3776
검증 데이터 길이는:  419


In [7]:
train.reset_index(drop=True, inplace=True)
valid.reset_index(drop=True, inplace=True)
train

Unnamed: 0,image,nose_x,nose_y,left_eye_x,left_eye_y,right_eye_x,right_eye_y,left_ear_x,left_ear_y,right_ear_x,...,right_palm_y,spine2(back)_x,spine2(back)_y,spine1(waist)_x,spine1(waist)_y,left_instep_x,left_instep_y,right_instep_x,right_instep_y,path
0,186-1-1-08-Z36_E-0000025.jpg,798.933008,373.245639,793.866015,359.459874,807.169574,357.482205,806.000000,355.000000,855.000000,...,340.000000,865.280391,460.337474,879.516120,535.617865,838.000000,913.000000,875.967760,903.528542,./train_imgs/186-1-1-08-Z36_E-0000025.jpg
1,474-1-2-21-Z134_A-0000009.jpg,1174.947912,639.230112,1174.871199,659.153408,1194.947920,650.871224,1144.282200,698.564400,1195.714959,...,579.956605,1037.861370,639.306809,959.634094,612.817037,706.561671,497.947990,732.270802,485.008540,./train_imgs/474-1-2-21-Z134_A-0000009.jpg
2,001-1-1-01-Z17_C-0000013.jpg,953.109409,328.531353,954.000000,313.000000,933.000000,325.000000,969.312380,322.468647,921.000000,...,355.359180,985.357813,451.768530,992.703545,514.421905,995.766173,876.202971,856.124302,750.687572,./train_imgs/001-1-1-01-Z17_C-0000013.jpg
3,282-2-1-14-Z57_A-0000021.jpg,966.449343,331.632895,959.572700,312.490462,972.000000,319.000000,938.895719,306.041119,955.000000,...,589.367105,909.000000,448.000000,907.000000,514.000000,896.000000,856.000000,982.000000,874.000000,./train_imgs/282-2-1-14-Z57_A-0000021.jpg
4,642-2-4-31-Z148_D-0000011.jpg,852.178815,404.919392,863.435893,375.317445,830.081597,416.593408,846.341823,377.819042,873.442188,...,806.838679,1084.408063,592.954292,1205.317386,662.164418,1226.580743,878.550402,1215.323665,851.450037,./train_imgs/642-2-4-31-Z148_D-0000011.jpg
...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...
3771,546-1-2-25-Z134_D-0000013.jpg,645.000000,586.000000,636.000000,591.000000,625.000000,596.000000,646.000000,620.000000,623.000000,...,745.000000,711.615351,633.488674,814.195946,652.573006,1147.000000,745.000000,1025.000000,814.000000,./train_imgs/546-1-2-25-Z134_D-0000013.jpg
3772,186-1-1-08-Z36_E-0000023.jpg,767.000000,590.000000,754.933008,578.325890,752.236567,579.325890,760.504536,566.223308,766.504536,...,526.370551,852.794836,543.696441,916.085136,578.089323,831.712212,909.426797,874.019819,902.101745,./train_imgs/186-1-1-08-Z36_E-0000023.jpg
3773,282-2-1-14-Z57_A-0000017.jpg,1024.490462,546.000000,1027.756253,530.898686,1037.632895,543.325986,1025.756253,506.082238,1040.000000,...,730.917762,939.000000,497.000000,882.000000,530.000000,898.000000,855.000000,983.000000,874.000000,./train_imgs/282-2-1-14-Z57_A-0000017.jpg
3774,177-1-1-07-Z36_D-0000029.jpg,744.000000,284.000000,755.000000,264.000000,735.000000,271.000000,798.000000,258.000000,800.000000,...,533.000000,813.000000,423.000000,807.772054,501.428767,833.000000,926.000000,797.000000,911.000000,./train_imgs/177-1-1-07-Z36_D-0000029.jpg


In [8]:
# Augmentation Setting
pixel_shifts = [12]
rotation_angles = [12]
inc_brightness_ratio = 1.2
dec_brightness_ratio = 0.8
noise_ratio = 0.008

In [9]:
# 좌우 반전
def left_right_flip(images, keypoints):
    flipped_keypoints = []
    flipped_images = np.flip(images, axis=1)
    for idx, sample_keypoints in enumerate(keypoints):
        if idx % 2 == 0:
            flipped_keypoints.append(480.-sample_keypoints)
        else:
            flipped_keypoints.append(sample_keypoints)

    # left_right_keypoints_convert
    for i in range(8):
        flipped_keypoints[2+(4*i):4+(4*i)], flipped_keypoints[4+(4*i):6+(
            4*i)] = flipped_keypoints[4+(4*i):6+(4*i)], flipped_keypoints[2+(4*i):4+(4*i)]
    flipped_keypoints[36:38], flipped_keypoints[38:
                                                40] = flipped_keypoints[38:40], flipped_keypoints[36:38]
    flipped_keypoints[44:46], flipped_keypoints[46:
                                                48] = flipped_keypoints[46:48], flipped_keypoints[44:46]

    return flipped_images, flipped_keypoints

In [10]:
# 수직/수평 동시 이동
# forloop에서 shift_x, shift_y 중 하나만 놓으면
# 수직 또는 수평 이동만 따로 시행 가능
def shift_images(images, keypoints):
    # tensor -> numpy
    images = images.numpy()
    shifted_images = []
    shifted_keypoints = []
    for shift in pixel_shifts:
        for (shift_x, shift_y) in [(-shift, -shift), (-shift, shift), (shift, -shift), (shift, shift)]:
            # 이동할 matrix 생성
            M = np.float32([[1, 0, shift_x], [0, 1, shift_y]])
            shifted_keypoint = np.array([])
            shifted_x_list = np.array([])
            shifted_y_list = np.array([])
            # 이미지 이동
            shifted_image = cv2.warpAffine(
                images, M, (480, 270), flags=cv2.INTER_CUBIC)
            # 이동한만큼 keypoint 수정
            for idx, point in enumerate(keypoints):
                if idx % 2 == 0:
                    shifted_keypoint = np.append(
                        shifted_keypoint, point+shift_x)
                    shifted_x_list = np.append(shifted_x_list, point+shift_x)
                else:
                    shifted_keypoint = np.append(
                        shifted_keypoint, point+shift_y)
                    shifted_y_list = np.append(shifted_y_list, point+shift_y)
            # 수정된 keypoint가 이미지 사이즈를 벗어나지 않으면 append
            if np.all(0.0 < shifted_x_list) and np.all(shifted_x_list < 480) and np.all(0.0 < shifted_y_list) and np.all(shifted_y_list < 270):
                shifted_images.append(shifted_image.reshape(270, 480, 3))
                shifted_keypoints.append(shifted_keypoint)

    return shifted_images, shifted_keypoints

In [11]:
# 이미지 회전
def rotate_augmentation(images, keypoints):
    # tensor -> numpy
    images = images.numpy()
    rotated_images = []
    rotated_keypoints = []

    for angle in rotation_angles:
        for angle in [angle, -angle]:
            # 회전할 matrix 생성
            M = cv2.getRotationMatrix2D((240, 135), angle, 1.0)
            # cv2_imshow로는 문제없지만 추후 plt.imshow로 사진을 확인할 경우 black screen 생성...
            # 혹시 몰라 matrix를 ndarray로 변환
            M = np.array(M, dtype=np.float32)
            angle_rad = -angle*pi/180
            rotated_image = cv2.warpAffine(images, M, (480, 270))
            rotated_images.append(rotated_image)

            # keypoint를 copy하여 forloop상에서 값이 계속 없데이트 되는 것을 회피
            rotated_keypoint = keypoints.copy()
            rotated_keypoint[0::2] = rotated_keypoint[0::2] - 240
            rotated_keypoint[1::2] = rotated_keypoint[1::2] - 135

            for idx in range(0, len(rotated_keypoint), 2):
                rotated_keypoint[idx] = rotated_keypoint[idx] * \
                    cos(angle_rad)-rotated_keypoint[idx+1]*sin(angle_rad)
                rotated_keypoint[idx+1] = rotated_keypoint[idx] * \
                    sin(angle_rad)+rotated_keypoint[idx+1]*cos(angle_rad)

            rotated_keypoint[0::2] = rotated_keypoint[0::2] + 240
            rotated_keypoint[1::2] = rotated_keypoint[1::2] + 135
            rotated_keypoints.append(rotated_keypoint)

    return rotated_images, rotated_keypoints

In [12]:
# 이미지 해상도 조절
def alter_brightness(images):
    altered_brightness_images = []
    inc_brightness_images = np.clip(images*inc_brightness_ratio, 0.0, 1.0)
    dec_brightness_images = np.clip(images*dec_brightness_ratio, 0.0, 1.0)
    altered_brightness_images.append(inc_brightness_images)
    altered_brightness_images.append(dec_brightness_images)
    return altered_brightness_images

In [13]:
# Random 노이즈 추가
def add_noise(images):
    images = images.numpy()
    noise = noise_ratio * np.random.randn(270, 480, 3)
    noise = noise.astype(np.float32)
    # 생성한 noise를 원본에 add
    noisy_image = cv2.add(images, noise)
    return noisy_image

In [14]:
def trainGenerator():
    # 원본 이미지 resize
    for i in range(len(train)):
        img = tf.io.read_file(train['path'][i])  # path(경로)를 통해 이미지 읽기
        # 경로를 통해 불러온 이미지를 tensor로 변환
        img = tf.image.decode_jpeg(img, channels=3)
        img = tf.image.resize(img, [270, 480])   # 이미지 resize
        img = img/255                            # 이미지 rescaling
        target = train.iloc[:, 1:49].iloc[i, :]  # keypoint 뽑아주기
        # image size를 1920x1080 -> 480x270으로 바꿔줬으므로 keypoint도 변경
        target = target/4
        
        yield (img, target)

    # horizontal flip
    for i in range(len(train)):
        img = tf.io.read_file(train['path'][i])
        img = tf.image.decode_jpeg(img, channels=3)
        img = tf.image.resize(img, [270, 480])
        img = img/255
        target = train.iloc[:, 1:49].iloc[i, :]
        target = target/4
        
        img, target = left_right_flip(img, target)        
        
        yield (img, target)

    # Horizontal & Vertical shift
    for i in range(len(train)):
        img = tf.io.read_file(train['path'][i])
        img = tf.image.decode_jpeg(img, channels=3)
        img = tf.image.resize(img, [270, 480])
        img = img/255
        target = train.iloc[:, 1:49].iloc[i, :]
        target = target/4
        
        img_list, target_list = shift_images(img, target)
        
        for shifted_img, shifted_target in zip(img_list, target_list):
            yield (shifted_img, shifted_target)

    # Rotation
    for i in range(len(train)):
        img = tf.io.read_file(train['path'][i])
        img = tf.image.decode_jpeg(img, channels=3)
        img = tf.image.resize(img, [270, 480])
        img = img/255
        target = train.iloc[:, 1:49].iloc[i, :]
        target = target/4
        
        img_list, target_list = rotate_augmentation(img, target)
        
        for rotated_img, rotated_target in zip(img_list, target_list):
            yield (rotated_img, rotated_target)

#     # Alter_Brightness
#     for i in range(len(train)):
#         img = tf.io.read_file(train['path'][i])
#         img = tf.image.decode_jpeg(img, channels=3)
#         img = tf.image.resize(img, [270, 480])
#         img = img/255
#         target = train.iloc[:, 1:49].iloc[i, :]
#         target = target/4
        
#         img_list = alter_brightness(img)
        
#         for altered_brightness_images in img_list:
#             yield (altered_brightness_images, target)

#     # Adding_Noise
#     for i in range(len(train)):
#         img = tf.io.read_file(train['path'][i])
#         img = tf.image.decode_jpeg(img, channels=3)
#         img = tf.image.resize(img, [270, 480])
#         img = img/255
#         target = train.iloc[:, 1:49].iloc[i, :]
#         target = target/4
        
#         noisy_img = add_noise(img)
        
#         yield (noisy_img, target)

In [15]:
def validGenerator():
    # 원본 이미지 resize
    for i in range(len(valid)):
        img = tf.io.read_file(valid['path'][i])  # path(경로)를 통해 이미지 읽기
        # 경로를 통해 불러온 이미지를 tensor로 변환
        img = tf.image.decode_jpeg(img, channels=3)
        img = tf.image.resize(img, [270, 480])   # 이미지 resize
        img = img/255                            # 이미지 rescaling
        target = valid.iloc[:, 1:49].iloc[i, :]  # keypoint 뽑아주기
        # image size를 1920x1080 -> 480x270으로 바꿔줬으므로 keypoint도 변경
        target = target/4

        yield (img, target)

In [16]:
batch_size = 10

In [17]:
train_dataset = tf.data.Dataset.from_generator(
    trainGenerator, (tf.float32, tf.float32), (tf.TensorShape([270, 480, 3]), tf.TensorShape([48])))
train_dataset = train_dataset.batch(batch_size).prefetch(AUTOTUNE)

valid_dataset = tf.data.Dataset.from_generator(
    validGenerator, (tf.float32, tf.float32), (tf.TensorShape([270, 480, 3]), tf.TensorShape([48])))
valid_dataset = valid_dataset.batch(batch_size).prefetch(AUTOTUNE)

In [18]:
# Callback 설정
earlystop = EarlyStopping(patience=10)

learning_rate_reduction = ReduceLROnPlateau(
    monitor="val_loss",
    patience=10,
    factor=0.9,
    min_lr=0,
    verbose=1)

model_check = ModelCheckpoint(  # 에포크마다 현재 가중치를 저장
    filepath="./210327_aug_Xception_06_{epoch}.h5",  # 모델 파일 경로
    monitor='val_loss',  # val_loss 가 좋아지지 않으면 모델 파일을 덮어쓰지 않음.
    save_best_only=True)

callbacks = [earlystop, learning_rate_reduction, model_check]

In [19]:
base_model = Xception(input_shape=(
   270, 480, 3), include_top=False, weights='imagenet', pooling='avg')

x = base_model.output
x = Dense(512, activation='relu', input_dim=(7*13*1536))(x)
x = Dropout(0.1)(x)
predictions = Dense(48)(x)

model = Model(inputs=base_model.input, outputs=predictions)

model.compile(optimizer=Adam(learning_rate=0.01),
                  loss='mean_squared_error',
                  metrics=['accuracy'])

In [20]:
model.summary()

Model: "model"
__________________________________________________________________________________________________
Layer (type)                    Output Shape         Param #     Connected to                     
input_1 (InputLayer)            [(None, 270, 480, 3) 0                                            
__________________________________________________________________________________________________
block1_conv1 (Conv2D)           (None, 134, 239, 32) 864         input_1[0][0]                    
__________________________________________________________________________________________________
block1_conv1_bn (BatchNormaliza (None, 134, 239, 32) 128         block1_conv1[0][0]               
__________________________________________________________________________________________________
block1_conv1_act (Activation)   (None, 134, 239, 32) 0           block1_conv1_bn[0][0]            
______________________________________________________________________________________________

In [21]:
checkpoint_path = "./210326_aug_Xception_05_1.h5"

In [22]:
model.load_weights(checkpoint_path)

In [23]:
history = model.fit(
    train_dataset,
    epochs=100,
    validation_data=valid_dataset,
    callbacks=callbacks,
    verbose=1
)

Epoch 1/100
Epoch 2/100
Epoch 3/100
Epoch 4/100
Epoch 5/100
Epoch 6/100
Epoch 7/100
Epoch 8/100
Epoch 9/100
Epoch 10/100
Epoch 11/100
Epoch 12/100
Epoch 13/100
Epoch 14/100
Epoch 15/100
Epoch 16/100
Epoch 17/100
Epoch 18/100
Epoch 19/100
Epoch 20/100
Epoch 21/100
Epoch 22/100
Epoch 23/100
Epoch 24/100
Epoch 25/100
Epoch 26/100
Epoch 27/100
Epoch 28/100
Epoch 29/100
Epoch 30/100
Epoch 31/100
Epoch 32/100
Epoch 33/100
Epoch 34/100
Epoch 35/100
Epoch 36/100
Epoch 37/100
Epoch 38/100
Epoch 39/100
Epoch 40/100
Epoch 41/100
Epoch 42/100
Epoch 43/100
Epoch 44/100
Epoch 45/100
Epoch 00045: ReduceLROnPlateau reducing learning rate to 0.008999999798834325.
