# 흉부X선 사진을 통한 병변 탐지
- 방사선 노출은 암 발생 위험을 높인다.
- X선은 CT의 2~3% 정도로 방사선량이 적다.
- 방사선량이 적은 X선 사진을 통해 병변 탐지를 극대화 하고자한다.
## 가설
- 세상에는 상대적으로 많은 양의 정상 데이터와 적은 양의 비정상 데이터가 존재할 것이다.
- autoencoder를 통해 비정상 데이터를 생성할 수 있을 것이다.
- autoencoder GAN을 이용해 abnomaly detection을 해낼 수 있을 것이다.

# TensorFlow 및 기타 라이브러리 가져오기

In [81]:
import random
import tensorflow as tf
import numpy as np

from tensorflow.keras.models import Sequential
from tensorflow.keras import layers
from tensorflow import keras

random.seed(10)

# 데이터세트 다운로드 및 탐색하기
흉부x선 사진 데이터세트 사용

In [82]:
import pathlib
from google.colab import drive
drive.mount('/content/gdrive')
data_dir_train = "gdrive/MyDrive/chest_xray/train"
data_dir_train = pathlib.Path(data_dir_train)
data_dir_val = "gdrive/MyDrive/chest_xray/val"
data_dir_val = pathlib.Path(data_dir_val)
data_dir_test = "gdrive/MyDrive/chest_xray/test"
data_dir_test = pathlib.Path(data_dir_test)

Drive already mounted at /content/gdrive; to attempt to forcibly remount, call drive.mount("/content/gdrive", force_remount=True).


# 데이터세트 만들기

In [83]:
batch_size = 32
img_height = 180
img_width = 180

In [84]:
train_ds = tf.keras.preprocessing.image_dataset_from_directory(
    data_dir_train,
    seed=123,
    image_size=(img_height, img_width),
    batch_size=batch_size)

Found 5216 files belonging to 2 classes.


In [85]:
val_ds = tf.keras.preprocessing.image_dataset_from_directory(
    data_dir_val,
    seed=123,
    image_size=(img_height, img_width),
    batch_size=batch_size)

Found 16 files belonging to 2 classes.


In [86]:
test_ds = tf.keras.preprocessing.image_dataset_from_directory(
    data_dir_test,
    seed=123,
    image_size=(img_height, img_width),
    batch_size=batch_size)

Found 624 files belonging to 2 classes.


In [87]:
class_names = train_ds.class_names
print(class_names)

['NORMAL', 'PNEUMONIA']


# 모델 만들기
- 데이터 증강
- 3개 컨볼루션 블록
- Fully connected layer
- 드롭아웃

In [88]:
data_augmentation = keras.Sequential(
    [
     layers.experimental.preprocessing.RandomFlip('horizontal',
                                                  input_shape=(img_height,
                                                               img_width,
                                                               3)),
     layers.experimental.preprocessing.RandomRotation(0.1),
     layers.experimental.preprocessing.RandomZoom(0.1)
    ]
)

In [89]:
model = Sequential([
                    data_augmentation,
                    layers.experimental.preprocessing.Rescaling(1./255),
                    layers.Conv2D(16, 3, padding='same', activation='relu'),
                    layers.MaxPooling2D(),
                    layers.Conv2D(32, 3, padding='same', activation='relu'),
                    layers.MaxPooling2D(),
                    layers.Conv2D(64, 3, padding='same', activation='relu'),
                    layers.MaxPooling2D(),
                    layers.Dropout(0.2),
                    layers.Flatten(),
                    layers.Dense(128, activation='relu'),
                    layers.Dense(1, activation='sigmoid')
])

# 모델 컴파일하기
- optimizers.Adam 옵티마이저
- losses.BinaryCrossentropy 손실 함수

In [90]:
model.compile(optimizer='adam',
              loss=tf.keras.losses.BinaryCrossentropy(from_logits=True),
              metrics=['accuracy'])

# 모델 요약

In [91]:
model.summary()

Model: "sequential_14"
_________________________________________________________________
Layer (type)                 Output Shape              Param #   
sequential_13 (Sequential)   (None, 180, 180, 3)       0         
_________________________________________________________________
rescaling_8 (Rescaling)      (None, 180, 180, 3)       0         
_________________________________________________________________
conv2d_21 (Conv2D)           (None, 180, 180, 16)      448       
_________________________________________________________________
max_pooling2d_24 (MaxPooling (None, 90, 90, 16)        0         
_________________________________________________________________
conv2d_22 (Conv2D)           (None, 90, 90, 32)        4640      
_________________________________________________________________
max_pooling2d_25 (MaxPooling (None, 45, 45, 32)        0         
_________________________________________________________________
conv2d_23 (Conv2D)           (None, 45, 45, 64)      

# 모델 훈련하기

In [92]:
epochs=15
callback = tf.keras.callbacks.EarlyStopping(monitor='loss', patience=3),
checkpoint_filepath = 'gdrive/MyDrive/chest_xray/tmp/checkpoint'
model_checkpoint_callback = tf.keras.callbacks.ModelCheckpoint(
    filepath=checkpoint_filepath,
    save_weights_only=True,
    monitor='val_accuracy',
    mode='max',
    save_best_only=True
)

history = model.fit(
    train_ds,
    validation_data=val_ds,
    epochs=epochs,
    callbacks=[callback, model_checkpoint_callback]
)

Epoch 1/15


  '"`binary_crossentropy` received `from_logits=True`, but the `output`'


Epoch 2/15
Epoch 3/15
Epoch 4/15
Epoch 5/15
Epoch 6/15
Epoch 7/15
Epoch 8/15
Epoch 9/15
Epoch 10/15
Epoch 11/15
Epoch 12/15
Epoch 13/15
Epoch 14/15
Epoch 15/15


# 예측하기
- 이미지 분류

In [94]:
model.load_weights(checkpoint_filepath)

predictions = model.predict(test_ds)
score = tf.nn.sigmoid(predictions[0])

print(
    "This image most likely belongs to {} with a {:.2f} percent confidence.".format(class_names[np.argmax(score)], 100 * np.max(score))
)

This image most likely belongs to NORMAL with a 52.77 percent confidence.


# Future work
- autoencoder를 통한 abnomal image 생성
- autoencoder GAN을 이용한 abnomaly detection