# 필요한 LIB 호출

In [None]:
!pip install pandas scipy tensorflow-hub tensorflow-datasets

# Import

In [1]:
from pathlib import Path
from functools import reduce
import pandas as pd
import os
import random
import shutil

### 경로지정

In [2]:
!pwd

/tf/notebook


In [3]:
target = 'header'; # block | header | footer

# 원본 이미지 디렉토리 경로
source_dir = 'efficientlearning/header/'
# 나눈 이미지를 저장할 디렉토리 경로
destination_dir = 'efficientlearning_new/header/'


# 각 세트의 비율 (train: 70%, test: 15%, validation: 15%)
train_ratio = 0.7
test_ratio = 0.15
validation_ratio = 0.15

# 각 세트에 해당하는 디렉토리 생성
train_dir = os.path.join(destination_dir, 'train')
test_dir = os.path.join(destination_dir, 'test')
validation_dir = os.path.join(destination_dir, 'validation')

os.makedirs(train_dir, exist_ok=True)
os.makedirs(test_dir, exist_ok=True)
os.makedirs(validation_dir, exist_ok=True)

## 파일 Image 

In [4]:
# 폴더와 파일을 순회하며 이미지 파일을 각 세트 디렉토리로 복사
for root, dirs, files in os.walk(source_dir):
    # 폴더 경로 생성
    relative_path = os.path.relpath(root, source_dir)
    train_path = os.path.join(train_dir, relative_path)
    test_path = os.path.join(test_dir, relative_path)
    validation_path = os.path.join(validation_dir, relative_path)
    os.makedirs(train_path, exist_ok=True)
    os.makedirs(test_path, exist_ok=True)
    os.makedirs(validation_path, exist_ok=True)

    # 파일 복사
    for file in files:
        source_path = os.path.join(root, file)
        if random.random() < train_ratio:
            destination_path = os.path.join(train_path, file)
        elif random.random() < test_ratio:
            destination_path = os.path.join(test_path, file)
        else:
            destination_path = os.path.join(validation_path, file)
        shutil.copyfile(source_path, destination_path)

print("Images split into train, test, and validation sets.")

Images split into train, test, and validation sets.


# 학습데이터 전처리 과정

In [19]:
step2_train_path = Path('/tf/datasets/step2_new/header/train')
step2_validation_path = Path('/tf/datasets/step2_new/header/validation/')

In [20]:
labels = [path.name for path in step2_train_path.iterdir()]
def prepare_data_frame(step2_path):
    files = {label.name: [file for file in label.iterdir()] for label in step2_path.iterdir()} # file의 이름을 가져와서  files에 딕셔너리 형태로 저장
    
    file_paths = reduce(lambda x,y: x+y, files.values()) # reduce함수를 사용해 files values값을 확인하여 lambda함수로 x에 대입
    file_names = list(map(lambda x: x.name, file_paths)) # list화 해서 path들을 [] file_name에 입력
    name_to_path_dict = dict(zip(file_names, map(lambda x: str(x), file_paths))) #filename하고 file_path를 딕셔너리로 저장
    file_path_series = pd.Series(name_to_path_dict, dtype='string', name='file_path') #  #pandas의 Series 형태로 저장
    
    label_series_list = [
        pd.Series(1.0, index=map(lambda x: x.name, files[label]), name=label)  
        for label in labels
    ]
    df = reduce(lambda x, y: pd.merge(x, y, how='outer', left_index=True, right_index=True), label_series_list)
    df = df.fillna(value=0.0)
    
    df_with_file_path = pd.merge(df, file_path_series, left_index=True, right_index=True) #dataframe형태로 저장해서 붙여넣고 리턴
    
    return df_with_file_path

In [21]:
train_df = prepare_data_frame(step2_train_path)   # train_dataframe 
validation_df = prepare_data_frame(step2_validation_path) #valid dataframe
print(train_df, validation_df)

                                                    logo_center  logo_left  \
cys_20220928_181059_01.jpg                                  1.0        0.0   
cys_20220929_101001_01.jpg                                  1.0        0.0   
cys_20220929_110353_01.jpg                                  0.0        1.0   
cys_20220929_111453_02.jpg                                  1.0        0.0   
cys_20220929_112345_02.jpg                                  1.0        0.0   
...                                                         ...        ...   
hyw_screencapture-wpbingosite-wordpress-ruper-h...          0.0        1.0   
hyw_screencapture-wpbingosite-wordpress-ruper-h...          0.0        1.0   
hyw_screencapture-wpbingosite-wordpress-vatage-...          0.0        1.0   
hyw_screencapture-yonifit-co-kr-2022-09-30-09_1...          1.0        0.0   
hyw_screencapture-zierli-2022-09-28-16_29_23_01...          1.0        0.0   

                                                               

### 필요한 LIB 호출

In [22]:
import tensorflow as tf
import tensorflow_hub as hub

In [34]:
train_ds = tf.data.Dataset.from_tensor_slices({**train_df})
print( train_ds )
validation_ds = tf.data.Dataset.from_tensor_slices({**validation_df})
print( validation_ds )

<_TensorSliceDataset element_spec={'logo_center': TensorSpec(shape=(), dtype=tf.float64, name=None), 'logo_left': TensorSpec(shape=(), dtype=tf.float64, name=None), 'file_path': TensorSpec(shape=(), dtype=tf.string, name=None)}>
<_TensorSliceDataset element_spec={'logo_center': TensorSpec(shape=(), dtype=tf.float64, name=None), 'logo_left': TensorSpec(shape=(), dtype=tf.float64, name=None), 'file_path': TensorSpec(shape=(), dtype=tf.string, name=None)}>


In [35]:
def load_image(sample):
    file_path = sample['file_path']
    image_bytes = tf.io.read_file(file_path)
    image = tf.io.decode_image(image_bytes, channels=3, expand_animations=False)
    sample['image'] = image
    return sample

def preprocess_image(sample):
    image = tf.image.convert_image_dtype(sample['image'], tf.float32)
    image = tf.image.resize(image, [224, 224])
    sample['image'] = image
    return sample

def transform_data_for_keras_fit(sample):
    return sample['image'], tuple(sample[label] for label in labels)

In [36]:
trainable_ds = train_ds.map(load_image).map(preprocess_image).map(transform_data_for_keras_fit).batch(32)
testable_ds = validation_ds.map(load_image).map(preprocess_image).map(transform_data_for_keras_fit).batch(32)
print( trainable_ds )
print( testable_ds )

<_BatchDataset element_spec=(TensorSpec(shape=(None, 224, 224, 3), dtype=tf.float32, name=None), (TensorSpec(shape=(None,), dtype=tf.float64, name=None), TensorSpec(shape=(None,), dtype=tf.float64, name=None)))>
<_BatchDataset element_spec=(TensorSpec(shape=(None, 224, 224, 3), dtype=tf.float32, name=None), (TensorSpec(shape=(None,), dtype=tf.float64, name=None), TensorSpec(shape=(None,), dtype=tf.float64, name=None)))>


In [37]:
inputs = tf.keras.Input(shape=(224, 224, 3), name="image") #image size를 resize를 거쳐 224로 된것을 입력 layer로 설정
efficientnet_v2 = hub.KerasLayer('https://tfhub.dev/google/imagenet/efficientnet_v2_imagenet21k_b0/feature_vector/2', trainable=False)  # 학습용 layer 및 모델 불러오기 tfhub에서
net = efficientnet_v2(inputs)
#outputs = [tf.keras.layers.Dense(1, name='dense_'+label)(net) for label in labels] #output layer를 추가
outputs = [tf.keras.layers.Dense(1, name=label, activation='sigmoid')(net) for label in labels]

efficientnet_v2_step2 = tf.keras.Model(inputs=inputs, outputs=outputs)  # 모델 병합

In [38]:
efficientnet_v2_step2.compile(
    optimizer=tf.keras.optimizers.Adam(learning_rate=0.01),   # adam을 사용하여 0.01 learningrate를 줌
    loss=[tf.keras.losses.BinaryCrossentropy(from_logits=True, name='binary_crossentropy_'+label) for label in labels], #loss는 binary_crossentropy 사용
    metrics=tf.keras.metrics.BinaryAccuracy(),
)

In [39]:
efficientnet_v2_step2.summary()

Model: "model_1"
__________________________________________________________________________________________________
 Layer (type)                Output Shape                 Param #   Connected to                  
 image (InputLayer)          [(None, 224, 224, 3)]        0         []                            
                                                                                                  
 keras_layer_1 (KerasLayer)  (None, 1280)                 5919312   ['image[0][0]']               
                                                                                                  
 logo_center (Dense)         (None, 1)                    1281      ['keras_layer_1[0][0]']       
                                                                                                  
 logo_left (Dense)           (None, 1)                    1281      ['keras_layer_1[0][0]']       
                                                                                            

In [40]:
history = efficientnet_v2_step2.fit(
    trainable_ds, epochs=100, validation_data=testable_ds,
    callbacks=[tf.keras.callbacks.TensorBoard(log_dir='logs/fit')]
)

Epoch 1/100


  output, from_logits = _get_logits(
2023-07-17 05:31:50.432879: I tensorflow/compiler/xla/stream_executor/cuda/cuda_dnn.cc:432] Loaded cuDNN version 8600
2023-07-17 05:31:57.512605: I tensorflow/compiler/xla/stream_executor/cuda/cuda_blas.cc:606] TensorFloat-32 will be used for the matrix multiplication. This will only be logged once.
2023-07-17 05:32:00.481163: I tensorflow/compiler/xla/service/service.cc:168] XLA service 0x7f5248013d50 initialized for platform CUDA (this does not guarantee that XLA will be used). Devices:
2023-07-17 05:32:00.481198: I tensorflow/compiler/xla/service/service.cc:176]   StreamExecutor device (0): NVIDIA GeForce RTX 3060 Ti, Compute Capability 8.6
2023-07-17 05:32:00.488897: I tensorflow/compiler/mlir/tensorflow/utils/dump_mlir_util.cc:255] disabling MLIR crash reproducer, set env var `MLIR_CRASH_REPRODUCER_DIRECTORY` to enable.
2023-07-17 05:32:00.583989: I ./tensorflow/compiler/jit/device_compiler.h:186] Compiled cluster using XLA!  This line is logge

Epoch 2/100
Epoch 3/100
Epoch 4/100
Epoch 5/100
Epoch 6/100
Epoch 7/100
Epoch 8/100
Epoch 9/100
Epoch 10/100
Epoch 11/100
Epoch 12/100
Epoch 13/100
Epoch 14/100
Epoch 15/100
Epoch 16/100
Epoch 17/100
Epoch 18/100
Epoch 19/100
Epoch 20/100
Epoch 21/100
Epoch 22/100
Epoch 23/100


Epoch 24/100
Epoch 25/100
Epoch 26/100
Epoch 27/100
Epoch 28/100
Epoch 29/100
Epoch 30/100
Epoch 31/100
Epoch 32/100
Epoch 33/100
Epoch 34/100
Epoch 35/100
Epoch 36/100
Epoch 37/100
Epoch 38/100
Epoch 39/100
Epoch 40/100
Epoch 41/100
Epoch 42/100
Epoch 43/100
Epoch 44/100
Epoch 45/100


Epoch 46/100
Epoch 47/100
Epoch 48/100
Epoch 49/100
Epoch 50/100
Epoch 51/100
Epoch 52/100
Epoch 53/100
Epoch 54/100
Epoch 55/100
Epoch 56/100
Epoch 57/100
Epoch 58/100
Epoch 59/100
Epoch 60/100
Epoch 61/100
Epoch 62/100
Epoch 63/100
Epoch 64/100
Epoch 65/100
Epoch 66/100
Epoch 67/100


Epoch 68/100
Epoch 69/100
Epoch 70/100
Epoch 71/100
Epoch 72/100
Epoch 73/100
Epoch 74/100
Epoch 75/100
Epoch 76/100
Epoch 77/100
Epoch 78/100
Epoch 79/100
Epoch 80/100
Epoch 81/100
Epoch 82/100
Epoch 83/100
Epoch 84/100
Epoch 85/100
Epoch 86/100
Epoch 87/100
Epoch 88/100
Epoch 89/100


Epoch 90/100
Epoch 91/100
Epoch 92/100
Epoch 93/100
Epoch 94/100
Epoch 95/100
Epoch 96/100
Epoch 97/100
Epoch 98/100
Epoch 99/100
Epoch 100/100


In [43]:
# efficientnet_v2_step2.save_weights('/tf/datasets/step2_multi/'+target+'/'+target)
efficientnet_v2_step2.save('/tf/datasets/step2_multi/'+target+'/'+'4')

INFO:tensorflow:Assets written to: /tf/datasets/step2_multi/header/4/assets


INFO:tensorflow:Assets written to: /tf/datasets/step2_multi/header/4/assets


In [69]:
import tensorflow as tf
from tensorflow.keras.preprocessing import image
from tensorflow.keras.applications.efficientnet import EfficientNetB0

# 저장된 모델 로드
model = tf.keras.models.load_model('/tf/datasets/step2_multi/header/4')

# 이미지 불러오기
img_path = '/tf/datasets/step2_new/data/header/logo_left/hyw_screencapture-demo-theme-sky-zamona-fashion-fashion-02-2022-09-27-15_27_20926_01.jpg'
img = image.load_img(img_path, target_size=(224, 224))
img = image.img_to_array(img)
img = img / 255.0  # Normalize the image
img = tf.expand_dims(img, axis=0)  # Add batch dimension

# Make predictions
predictions = model.predict(img)

print(predictions)
class_0_probability = predictions[0][0]
class_1_probability = predictions[1][0]

print("logo_center:", class_0_probability)
print("logo_left:", class_1_probability)

[array([[7.356843e-10]], dtype=float32), array([[1.]], dtype=float32)]
logo_center: [7.356843e-10]
logo_left: [1.]


In [45]:
!pip install tensorflow-addons


Collecting tensorflow-addons
  Downloading tensorflow_addons-0.21.0-cp38-cp38-manylinux_2_17_x86_64.manylinux2014_x86_64.whl (612 kB)
[2K     [90m━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━[0m [32m612.0/612.0 kB[0m [31m25.2 MB/s[0m eta [36m0:00:00[0m
Collecting typeguard<3.0.0,>=2.7 (from tensorflow-addons)
  Downloading typeguard-2.13.3-py3-none-any.whl (17 kB)
Installing collected packages: typeguard, tensorflow-addons
Successfully installed tensorflow-addons-0.21.0 typeguard-2.13.3
[0m

In [53]:
class_0_probability = predictions[0][0]
class_1_probability = predictions[1][0]

print("logo_center:", class_0_probability)
print("logo_left:", class_1_probability)

logo_center: [5.7939576e-07]
logo_left: [0.99999857]
