In [1]:
import cv2
import numpy as np

import os
os.environ["TF_CPP_MIN_LOG_LEVEL"] = "2"

from glob import glob

In [2]:
import numpy as np
import tensorflow as tf
from tensorflow.keras import layers as L
from tensorflow.keras.callbacks import ModelCheckpoint, ReduceLROnPlateau, EarlyStopping, CSVLogger

# from tensorflow.keras.applications import MobileNetV2
from tensorflow.keras.applications import EfficientNetV2M
from tensorflow.keras.applications import EfficientNetV2B0

from tensorflow.keras.optimizers.legacy import Adam

In [3]:
from tensorflow.keras import mixed_precision
mixed_precision.set_global_policy('mixed_float16')

# Set GPU memory growth
gpus = tf.config.list_physical_devices('GPU')
if gpus:
    try:
        # Currently, memory growth must be the same across GPUs
        for gpu in gpus:
            tf.config.experimental.set_memory_growth(gpu, True)
        logical_gpus = tf.config.experimental.list_logical_devices('GPU')
        print(len(gpus), "Physical GPUs,", len(logical_gpus), "Logical GPUs")
    except RuntimeError as e:
        # Memory growth must be set before GPUs have been initialized
        print(e)

The dtype policy mixed_float16 may run slowly because this machine does not have a GPU. Only Nvidia GPUs with compute capability of at least 7.0 run quickly with mixed_float16.


In [4]:
global image_h
global image_w
global num_landmarks

""" Hyperparameters """
image_h = 224
image_w = 224
num_landmarks = 106
input_shape = (image_h, image_w, 3)
batch_size = 32
lr = 0.001
num_epochs = 50

In [5]:
initial_learning_rate = 0.001
lr_schedule = tf.keras.optimizers.schedules.ExponentialDecay(
    initial_learning_rate,
    decay_steps=100000,
    decay_rate=0.96,
    staircase=True)

In [6]:
 """ Seeding """
np.random.seed(42)
tf.random.set_seed(42)

In [7]:
""" Paths """
path = "./LaPa"

In [8]:
train_img = sorted(glob(os.path.join(path, "train", "images", "*.jpg")))
# 4. train_img = [ '이미지 경로1 ', '이미지경로2', ...]
train_land = sorted(glob(os.path.join(path, "train", "landmarks", "*.txt")))
# 4-1. train_img = [ 'landmarks경로1 ', 'landmarks경로2', ...]
valid_img = sorted(glob(os.path.join(path, "val", "images", "*.jpg")))
valid_land = sorted(glob(os.path.join(path, "val", "landmarks", "*.txt")))
test_img = sorted(glob(os.path.join(path, "test", "images", "*.jpg")))
test_land = sorted(glob(os.path.join(path, "test", "landmarks", "*.txt")))

In [9]:
# reduce size of data
train_img = train_img[0:2000]
train_land = train_land[0:2000]
valid_img = valid_img[0:400]
valid_land = valid_land[0:400]
test_img = test_img[0:400]
test_land = test_land[0:400]

In [10]:
print(len(train_img), len(train_land), len(valid_img), len(valid_land), len(test_img), len(test_land))

2000 2000 400 400 400 400


In [11]:
def read_image_landmarks(image_path, landmark_path):
    # 6. 
    # 6-1 . 이미지경로 뭉탱이, 랜드마크경로 뭉탱이
    """ Image """
    image = cv2.imread(image_path, cv2.IMREAD_COLOR)
    h, w, _ = image.shape
    image = cv2.resize(image, (image_w, image_h))
    
    # Cast the image data to float32
    image = tf.cast(image, dtype=tf.float32)
    image /= 255.0
  #  image = image.astype(np.float32)

    """ Landmarks """
    data = open(landmark_path, "r").read()
    landmarks = []

    for line in data.strip().split("\n")[1:]:
        x, y = line.split(" ")
        x = float(x)/w
        y = float(y)/h

        landmarks.append(x)
        landmarks.append(y)

    landmarks = np.array(landmarks, dtype=np.float32)

    return image, landmarks

In [12]:
def preprocess(x, y):
    # 뭉탱이1[0], 뭉탱이2[0]
    # 이미지경로 하나, 랜드마크 경로 하나 씩
    def f(x, y):
        x = x.decode()
        # b'./lbsdf/image'
        y = y.decode()

        image, landmarks = read_image_landmarks(x, y)
        return image, landmarks

    # 5. x = <'이미지경로1', '2'> / y= <'랜드마크경로1', '2'>
    image, landmarks = tf.numpy_function(f, [x, y], [tf.float32, tf.float32])
    image.set_shape([image_h, image_w, 3])
    landmarks.set_shape([num_landmarks * 2])

    return image, landmarks

In [13]:
def build_model(input_shape, num_landmarks):
    inputs = L.Input(input_shape)

#   backbone = MobileNetV2(include_top=False, weights="imagenet", input_tensor=inputs, alpha=0.5)
    backbone = EfficientNetV2B0(include_top=False, weights="imagenet", input_tensor=inputs)
    backbone.trainable = True

    x = backbone.output
    x = L.GlobalAveragePooling2D()(x)
    x = L.Dropout(0.2)(x)
    outputs = L.Dense(num_landmarks*2, activation="sigmoid")(x)

    model = tf.keras.models.Model(inputs, outputs)
    return model

In [14]:
ds_train = tf.data.Dataset.from_tensor_slices((train_img, train_land))
# 3. ds_train = ( 텐서'이미지경로', 텐서'랜드마크')
ds_valid = tf.data.Dataset.from_tensor_slices((valid_img, valid_land))

# for i in ds_train:
#     print("type(i) : ",type(i))
#     print(i)
#     print(i[0])
#     print(i[1])
#     print('-----------------------------------------------------------------------------------------') 

In [15]:
ds_train = ds_train.shuffle(buffer_size=100)
# 2 ds_train= 튜플 / x = tensor1 , y=tensor2
ds_valid = ds_valid.shuffle(buffer_size=100)
ds_train

<_ShuffleDataset element_spec=(TensorSpec(shape=(), dtype=tf.string, name=None), TensorSpec(shape=(), dtype=tf.string, name=None))>

In [16]:
ds_train = ds_train.map(preprocess)
# 1. preprocess(x,y) #가정 map 하나씩 적용
ds_valid = ds_valid.map(preprocess)
# map( lambda x,y :  preprocess(x,y) , ds_valid)
# map(preprocess , ds_valid)
# ds_valid.map(preprocess)
ds_train

<_MapDataset element_spec=(TensorSpec(shape=(224, 224, 3), dtype=tf.float32, name=None), TensorSpec(shape=(212,), dtype=tf.float32, name=None))>

In [17]:
ds_train = ds_train.batch(batch_size).prefetch(2)
ds_valid = ds_valid.batch(batch_size).prefetch(2)
# Most dataset input pipelines should end with a call to prefetch. 
# This allows later elements to be prepared while the current element is being processed. 
# This often improves latency and throughput, at the cost of using additional memory to store prefetched elements.
ds_train

<_PrefetchDataset element_spec=(TensorSpec(shape=(None, 224, 224, 3), dtype=tf.float32, name=None), TensorSpec(shape=(None, 212), dtype=tf.float32, name=None))>

In [18]:
model_path = os.path.join("model_result", "model_effi.h5")
csv_path = os.path.join("model_result", "data_effi.csv")

In [19]:
""" Model """
model = build_model(input_shape, num_landmarks)
model.compile(loss="binary_crossentropy", optimizer=tf.keras.optimizers.Adam(lr_schedule))

""" Training """
callbacks = [
        ModelCheckpoint(model_path, verbose=1, save_best_only=True, monitor='val_loss'),
        ReduceLROnPlateau(monitor='val_loss', factor=0.1, patience=5, min_lr=1e-7, verbose=1),
        CSVLogger(csv_path, append=True),
        EarlyStopping(monitor='val_loss', patience=10, restore_best_weights=False)
    ]

In [None]:
model.fit(ds_train,
        validation_data=ds_valid,
        epochs=num_epochs,
        callbacks=callbacks
    )


Epoch 1/50

In [None]:
# 29:58:42  -> 3:12:48 after reducing data size (224*224)

# learning rate schedule (224*224) 2:39:53, 
# but loss:2.8818 -> ini lr = 0.01 then loss 0.7004 -> 0.7806->0.7242->0.7200->0.7073