# 1. Import Packages

In [None]:
# -*- coding: utf-8 -*-

import tensorflow as tf 
from tensorflow.keras.applications.efficientnet import preprocess_input, EfficientNetB0
from sklearn.model_selection import train_test_split
from PIL import Image
import numpy as np

import pickle
import os
import re

from osAdvanced import File_Control
from ProgressBar import Progress_Bar

import matplotlib.pyplot as plt
import matplotlib.font_manager as fm

# 2. Data Load

## 2.1 데이터 경로를 불러온 뒤 label 정보를 dictionary 형태로 저장하기.

In [None]:
dataset = File_Control.searchAllFilesInDirectoryByDir("/raid/korean_food/", "jpg")
label_dict = {}
p = re.compile("\/[가-힣]*\/.*\/")
for i in range(len(dataset)):
    label = p.search(dataset[i][0]).group()
    label = label.replace("/", "|")
    label_dict[str(i)] = label[1:len(label)-1] 

In [None]:
label_dict

## 2.3 데이터 전처리
나중에 전처리 과정을 생략 가능하게끔 Pickle형태로 전처리된 데이터를 저장.

In [None]:
def elementCount(list):
    count = 0
    for element in list:
        count += len(element)
    return count

print("총 데이터 개수 :", elementCount(dataset))
y = np.ndarray((elementCount(dataset)), dtype=np.int32)
x = np.ndarray((elementCount(dataset), 224, 224, 3), dtype=np.float32)

dataset_len = elementCount(dataset)
index = 0
RESIZE_WIDTH = 224
RESIZE_HEIGHT = 224
CHANNEL = 3
for i in range(len(dataset)):
    for data in dataset[i]:
        img = Image.open(data)
        img = img.convert('RGB')
        img = img.resize((RESIZE_WIDTH, RESIZE_HEIGHT))
        img = preprocess_input(np.array(img))
        y[index] = i
        x[index] = img
        index = index + 1
        Progress_Bar.printProgressBar(index, dataset_len, data)

y_unique_num = np.unique(y, axis=0)
y_unique_num = y_unique_num.shape[0]

y_encoded = np.eye(y_unique_num)[y] # One-Hot Encoding

pickle_list = [x, y_encoded]
with open('/raid/korean_food_pkl/preprocessed_data_0530_EfficientNet.pkl', 'wb') as f:
    pickle.dump(pickle_list, f)
print("pickle saved.")

## 2.4 전처리된 Pickle 데이터 불러오기.


In [None]:
with open("/raid/korean_food_pkl/preprocessed_data_0530_EfficientNet.pkl", 'rb') as f:
    data = pickle.load(f) # 단 한줄씩 읽어옴

x = data[0]
y = data[1]
print(x.shape)
print(y.shape)

## 2.5 불러온 데이터를 Train/Test/Validation Set으로 나누기.

In [None]:
x_train, x_valtest, y_train, y_valtest = train_test_split(x, y, test_size = 0.3, random_state=1)
x_val, x_test, y_val, y_test = train_test_split(x_valtest, y_valtest, test_size = 0.5, random_state=1)

del x
del y
del x_valtest
del y_valtest

print("train size : ", y_train.shape[0])
print("test size : ", y_test.shape[0])
print("validation size : ", y_val.shape[0])

## 2.5 데이터 로드가 잘 되었는지 확인
matplot에서는 한글 지원이 안되므로 폰트 파일을 불러와 추가함.

In [None]:
import random 

"""
plt.rcParams['axes.unicode_minus'] = False
path = '/usr/share/fonts/truetype/nanum/NanumGothic.ttf'
font_name = mpl.font_manager.FontProperties(fname=path).get_name()
plt.rc('font', family=font_name)
print([f.fname for f in matplotlib.font_manager.fontManager.ttflist])
"""

path = '/usr/share/fonts/truetype/nanum/NanumGothic.ttf'
prop = fm.FontProperties(fname=path, size=18)

w = 10
h = 10
columns = 4
rows = 2
fig = plt.figure(figsize=(20, 10))

ax = []
for i in range(columns*rows):
    img_index = random.randint(0, len(y_train))
    img = x_train[img_index]
    #img = img[:,:,::-1]
    ax.append(fig.add_subplot(rows, columns, i+1))
    y_str = str(np.argmax(y_train[img_index]))
    ax[-1].set_title(label_dict[y_str], fontproperties=prop)  # set title
    plt.imshow(img)
plt.show()


# 3. 모델 로드

## 3.1 EfficientNetB0모델 로드

In [None]:
 def fc_layers(model):
    x = model.output
    x = layers.Flatten()(x)
    x = layers.Dense(1024, name="dense_1")(x)
    x = layers.BatchNormalization()(x)
    x = layers.Activation(activation='relu')(x)
    x = layers.Dropout(0.3)(x)
    
    x = layers.Dense(512, name="dense_2")(x)
    x = layers.BatchNormalization()(x)
    x = layers.Activation(activation='relu')(x)
    x = layers.Dropout(0.3)(x)
    
    x = layers.Dense(150, activation='softmax', name='dense_3')(x)
    model = keras.Model(model.input, x)
    return model

print("train shape : ", x_train.shape)
print("test shape : ", x_test.shape)
print("validation shape : ", x_val.shape)

os.environ["CUDA_VISIBLE_DEVICES"] = "2"

IMAGE_SHAPE = (224, 224, 3)

model = EfficientNetB0(
    include_top=False,
    input_shape=IMAGE_SHAPE,
    weights=None,
    classes=150,
)

model = fc_layers(model)
model.summary()

## 3.1 모델 컴파일 및 하이퍼 파라미터 설정

In [None]:
"""
tf.debugging.set_log_device_placement(True)

global_average_layer = tf.keras.layers.GlobalAveragePooling2D()
prediction_layer = tf.keras.layers.Dense(150)

inputs = tf.keras.Input(shape=(224, 224, 3))
x = resnet_101_model(x)
x = global_average_layer(x)
x = tf.keras.layers.Dropout(0.2)(x)
outputs = prediction_layer(x)
resnet_101_model = tf.keras.Model(inputs, outputs)
"""
model.compile(optimizer='adam',
              loss='categorical_crossentropy',
              metrics=['accuracy'])

## 3.2 모델 학습

In [None]:

CHECKPOINT_PATH = './checkpoints_0530_efficientNet'

model_checkpoint_callback = tf.keras.callbacks.ModelCheckpoint(
    filepath=CHECKPOINT_PATH,
    save_weights_only=True,
    monitor='val_accuracy',
    mode='max',
    save_best_only=True
)

model_early_stopping_callback = tf.keras.callbacks.EarlyStopping(monitor='loss', patience=3)

#early_stopping_callback = tf.keras.callbacks.EarlyStopping(monitor='loss', patience=50)

hist = model.fit(x_train,
                 y_train,
                 epochs = 50,
                 batch_size = 32,
                 validation_data=(x_val, y_val), 
                 callbacks=[model_checkpoint_callback, model_early_stopping_callback])

In [None]:
eval_result = model.evaluate(x_test, y_test)
print(eval_result)