## Import

In [1]:
# Tensorflow
import tensorflow as tf
import tensorflow.keras as keras
from tensorflow.python.client import device_lib

# for keras
from classification_models.keras import Classifiers

# model
import tensorflow_addons as tfa
from tensorflow.keras import layers, models
from tensorflow.keras.models import Sequential, Model, load_model
from tensorflow.keras.preprocessing.image import load_img, img_to_array

# image processing, callbacks
from tensorflow.keras.preprocessing import image
from tensorflow.keras.preprocessing.image import ImageDataGenerator
from tensorflow.keras.callbacks import ModelCheckpoint, ReduceLROnPlateau

from sklearn.metrics import classification_report

# file
import zipfile
import os
import shutil

# sub
from tqdm import tqdm
import random

# basic
import pandas as pd
import numpy as np

# Plot
from concurrent.futures import ThreadPoolExecutor
from PIL import Image
import cv2
import matplotlib.pyplot as plt
import seaborn as sns


TensorFlow Addons (TFA) has ended development and introduction of new features.
TFA has entered a minimal maintenance and release mode until a planned end of life in May 2024.
Please modify downstream libraries to take dependencies from other repositories in our TensorFlow community (e.g. Keras, Keras-CV, and Keras-NLP). 

For more information see: https://github.com/tensorflow/addons/issues/2807 

 The versions of TensorFlow you are currently using is 2.10.0 and is not supported. 
Some things might work, some things might not.
If you were to encounter a bug, do not file an issue.
If you want to make sure you're using a tested and supported configuration, either change the TensorFlow version or the TensorFlow Addons's version. 
You can find the compatibility matrix in TensorFlow Addon's readme:
https://github.com/tensorflow/addons


## GPU 연결 확인 및 할당 메모리 제한

In [2]:
print(tf.__version__)
print(device_lib.list_local_devices())

2.10.0
[name: "/device:CPU:0"
device_type: "CPU"
memory_limit: 268435456
locality {
}
incarnation: 12646702193184146134
xla_global_id: -1
, name: "/device:GPU:0"
device_type: "GPU"
memory_limit: 9383706624
locality {
  bus_id: 1
  links {
  }
}
incarnation: 6708710404353872730
physical_device_desc: "device: 0, name: NVIDIA GeForce RTX 2080 Ti, pci bus id: 0000:b3:00.0, compute capability: 7.5"
xla_global_id: 416903419
]


In [3]:
gpus = tf.config.experimental.list_physical_devices('GPU')
if gpus:
  try:
    tf.config.experimental.set_virtual_device_configuration(
        gpus[0],
        [tf.config.experimental.VirtualDeviceConfiguration(memory_limit=1024 * 10)])
    logical_gpus = tf.config.experimental.list_logical_devices('GPU')
    print(len(gpus), "Physical GPUs,", len(logical_gpus), "Logical GPUs")
  except RuntimeError as e:
    print(e)

1 Physical GPUs, 1 Logical GPUs


In [4]:
seed_value = 42
np.random.seed(seed_value)
random.seed(seed_value)
tf.random.set_seed(seed_value)

- 총 8번의 학습 및 비교 분석 진행
- 해당 ipynb파일에선 가장 좋은 방식인 2-4차의 학습 진행 코드를 포함

#### 모델 학습 진행

In [5]:
import tensorflow_addons as tfa
from tensorflow.keras import models
from tensorflow.keras import layers

#Vision Transformer
from vit_keras import vit

from sklearn.metrics import classification_report

In [6]:
# 데이터 폴더 경로 설정
train_data_dir = './Kfood/Kfood/kfood_health_train/'  # 학습용 데이터 폴더 경로
val_data_dir = './Kfood/Kfood/kfood_health_val/' # 검증용 데이터 폴더 경로

all_items = os.listdir(train_data_dir)
num_classes = len(all_items)

In [14]:
def mosaic_augmentation(image):
    # 이미지를 4등분하여 각 부분에 모자이크를 적용
    height, width, _ = image.shape
    quarter_height, quarter_width = height // 2, width // 2

    # 랜덤한 위치에서 시작점을 선택
    start_x = np.random.randint(0, quarter_height)
    start_y = np.random.randint(0, quarter_width)

    # 모자이크할 영역 선택
    end_x = start_x + quarter_height
    end_y = start_y + quarter_width

    # 모자이크 적용
    image[start_x:end_x, start_y:end_y, :] = np.mean(image[start_x:end_x, start_y:end_y, :], axis=(0, 1), keepdims=True)

    return image

In [15]:
# 이미지 크기 설정
img_width, img_height = 224, 224

# 데이터 증강 설정
train_datagen = ImageDataGenerator(
    rescale=1. / 255,  # 이미지 값을 0과 1 사이로 정규화
    
    rotation_range=20,  # 회전
    
    width_shift_range=0.2,  # 가로 이동
    height_shift_range=0.2,  # 세로 이동
    
    shear_range=0.2,  # 전단 변형
    
    horizontal_flip=True,  # 수평 뒤집기
    vertical_flip=True,  # 수직 뒤집기
    
    channel_shift_range=40,  # 채널 시프트
    zoom_range=0.2,  # 확대/축소
    # brightness_range=[0.8, 1.2]
    preprocessing_function=mosaic_augmentation,  # 모자이크 적용 함수
)

val_datagen = ImageDataGenerator(
    rescale=1. / 255, # 이미지 값을 0과 1 사이로 정규화
)

In [16]:
# 학습용 데이터 로딩 및 전처리
train_generator = train_datagen.flow_from_directory(
    train_data_dir,
    target_size=(img_width, img_height),
    batch_size=32,
    class_mode='categorical', # 다중 클래스 분류를 위해 categorical로 설정  
)

validation_generator = val_datagen.flow_from_directory(
    val_data_dir,
    target_size=(img_height, img_width),
    batch_size=32,
    class_mode='categorical')

Found 14115 images belonging to 13 classes.
Found 1764 images belonging to 13 classes.


In [17]:
vit_model = vit.vit_b32(
            image_size = 224,
            activation = 'softmax',
            pretrained = False,   
            include_top = False,
            pretrained_top = False,
            classes = num_classes)

In [18]:
model = models.Sequential()
model.add(vit_model)
model.add(layers.Flatten())
model.add(layers.Dense(13, activation='softmax'))

In [19]:
learning_rate = 1e-4

optimizer = tfa.optimizers.RectifiedAdam(learning_rate = learning_rate)

model.compile(optimizer = optimizer, 
              loss = tf.keras.losses.CategoricalCrossentropy(label_smoothing = 0.2), 
              metrics = [tf.keras.metrics.TopKCategoricalAccuracy(k=1)])

In [None]:
# 체크포인트 파일 경로 및 설정
checkpoint_filepath = './Checkpoint/Mission3_VIT_zoom.h5'
model_checkpoint_callback = ModelCheckpoint(
    filepath=checkpoint_filepath,
    save_best_only=True,  # 최상의 모델만 저장
    monitor='val_top_k_categorical_accuracy',
    mode='max',
    verbose=1
)

reduce_lr_callback = ReduceLROnPlateau(
    monitor='val_loss',  # 검증 손실을 모니터링
    factor=0.2,  # 학습률을 0.2배로 줄임
    patience=5,  # 5 에폭 동안 검증 손실이 감소하지 않으면 학습률을 조절
    min_lr=1e-6  # 학습률의 하한 설정
)

# 모델을 학습할 때 ModelCheckpoint 콜백을 지정
with tf.device('/device:GPU:0'): 
    history = model.fit(
        train_generator,
        epochs=50,
        verbose=1,
        validation_data=validation_generator,
        callbacks=[model_checkpoint_callback, reduce_lr_callback]
    )

#### 검증 데이터에 대한 평가 진행

In [13]:
# 체크포인트 파일 경로
checkpoint_filepath = './Checkpoint/Mission3_VIT_zoom.h5'

# 모델 로드
loaded_model = load_model(checkpoint_filepath)

# 검증용 데이터 제너레이터 설정
val_data_dir = './Kfood/Kfood/kfood_health_val/' 

validation_datagen = ImageDataGenerator(rescale=1.0 / 255) 

# 이미지 크기 설정
img_width, img_height = 224, 224

validation_generator = validation_datagen.flow_from_directory(
    val_data_dir,
    target_size=(img_height, img_width),
    batch_size=1,
    class_mode='categorical',
    shuffle=False
)

Found 1764 images belonging to 13 classes.


In [14]:
import numpy as np
with tf.device('/device:GPU:0'): 
    validation_predict = loaded_model.predict(validation_generator)


from sklearn.metrics import classification_report

validation_pred_classes = np.argmax(validation_predict,axis=1)

# classification report 출력
target_names = list(validation_generator.class_indices.keys())


validation_results = pd.DataFrame(classification_report(validation_generator.classes, validation_pred_classes, target_names=target_names, output_dict=True)).transpose()
print(validation_results)

              precision    recall  f1-score      support
가리비            0.877551  0.834951  0.855721   103.000000
갈비찜            0.788462  0.814570  0.801303   151.000000
고등어            0.915493  0.948905  0.931900   137.000000
김치국            0.946746  0.969697  0.958084   165.000000
낚지볶음           0.958333  0.942623  0.950413   122.000000
돼지갈비찜          0.807339  0.822430  0.814815   107.000000
된장찌개           0.916667  0.908257  0.912442   109.000000
떡국             0.943548  0.991525  0.966942   118.000000
모듬초밥           0.985714  0.857143  0.916944   161.000000
배추김치           0.938462  0.953125  0.945736   128.000000
부대찌개           0.925926  0.930233  0.928074   215.000000
순대             0.884211  0.965517  0.923077    87.000000
오리로스구이         0.987261  0.962733  0.974843   161.000000
accuracy       0.916667  0.916667  0.916667     0.916667
macro avg      0.913516  0.915516  0.913869  1764.000000
weighted avg   0.918033  0.916667  0.916699  1764.000000


In [None]:
import numpy as np
import cv2
import matplotlib.pyplot as plt

# 모델에서 Attention 레이어를 추출
attention_layer = vit_model.get_layer(name='attention')

# 샘플 이미지 경로 설정
sample_image_path = 'path/to/your/sample/image.jpg'

# 샘플 이미지를 모델의 입력 크기로 로드 및 전처리
sample_image = cv2.imread(sample_image_path)
sample_image = cv2.resize(sample_image, (img_width, img_height))
sample_image = sample_image / 255.0  # 이미지를 0과 1 사이로 정규화
sample_image = np.expand_dims(sample_image, axis=0)  # 배치 차원 추가

# 모델의 입력에 대한 Attention Map을 얻음
attention_map = attention_layer.predict(sample_image)

# Attention Map을 히트맵으로 변환
heatmap = np.mean(attention_map, axis=-1)  # 각 Head의 평균을 구함
heatmap = np.maximum(heatmap, 0)  # ReLU를 적용하여 음수 값을 제거
heatmap /= np.max(heatmap)  # 정규화

# 샘플 이미지 로드
original_image = cv2.imread(sample_image_path)
original_image = cv2.resize(original_image, (img_width, img_height))

# 히트맵을 원본 이미지 크기로 조정
heatmap = cv2.resize(heatmap[0], (original_image.shape[1], original_image.shape[0]))

# 히트맵을 RGB 형식으로 변환
heatmap = np.uint8(255 * heatmap)

# 히트맵을 원본 이미지에 적용하여 시각화
heatmap = cv2.applyColorMap(heatmap, cv2.COLORMAP_JET)
superimposed_img = heatmap * 0.4 + original_image  # 이미지를 40%만큼 투명하게 함

# 결과 시각화
plt.imshow(cv2.cvtColor(superimposed_img, cv2.COLOR_BGR2RGB))
plt.axis('off')
plt.show()
