In [None]:
!unzip '/content/drive/MyDrive/ESG 2024/교안/실습/datasets/Pneumonia.zip' -d '/content/drive/MyDrive/ESG 2024/교안/실습/datasets/Pneumonia'

### Data load, preprocessing

In [None]:
import os
import cv2
import numpy as np
import seaborn as sns
import matplotlib.pyplot as plt
from sklearn.model_selection import train_test_split
from tensorflow.keras.utils import to_categorical
from tensorflow.keras.models import Sequential
from tensorflow.keras.layers import Conv2D, MaxPooling2D, Flatten, Dense
from sklearn.metrics import confusion_matrix, ConfusionMatrixDisplay, classification_report

def load_and_preprocess_data(base_dir, img_size=(299, 299)):
    images = []
    labels = []
    categories = ['NORMAL', 'PNEUMONIA']

    for category in categories:
        class_num = categories.index(category)
        for subset in ['train', 'val', 'test']:
            subset_dir = os.path.join(base_dir, subset, category)
            for img_name in os.listdir(subset_dir):
                try:
                    img_path = os.path.join(subset_dir, img_name)
                    img = cv2.imread(img_path, cv2.IMREAD_GRAYSCALE)

                    img = cv2.equalizeHist(img)
                    img = cv2.resize(img, img_size)
                    img = img / 255.0

                    images.append(img)
                    labels.append(class_num)
                except Exception as e:
                    print(f"Error processing image {img_name}: {e}")

    images = np.array(images).reshape(-1, img_size[0], img_size[1], 1)
    return images, np.array(labels)

def split_data(X, y, test_size=0.2, val_size=0.2):
    X_train_val, X_test, y_train_val, y_test = train_test_split(X, y, test_size=test_size, stratify=y, random_state=42)
    X_train, X_val, y_train, y_val = train_test_split(X_train_val, y_train_val, test_size=val_size/(1-test_size), stratify=y_train_val, random_state=42)
    return X_train, y_train, X_val, y_val, X_test, y_test

# 데이터 로드 및 전처리
base_dir = '/content/drive/MyDrive/ESG 2024/교안/실습/datasets/Pneumonia'
X, y = load_and_preprocess_data(base_dir)

# 데이터 분할
X_train, y_train, X_val, y_val, X_test, y_test = split_data(X, y)

# # 레이블을 원-핫 인코딩으로 변환
# y_train = to_categorical(y_train, 2)
# y_val = to_categorical(y_val, 2)
# y_test = to_categorical(y_test, 2)

print("데이터 로딩 및 전처리 완료")
print(f"훈련 데이터 형태: {X_train.shape}, 레이블 형태: {y_train.shape}")
print(f"검증 데이터 형태: {X_val.shape}, 레이블 형태: {y_val.shape}")
print(f"테스트 데이터 형태: {X_test.shape}, 레이블 형태: {y_test.shape}")

### Modeling

In [None]:
def create_simple_cnn(input_shape):
    model = Sequential([
        Conv2D(32, (3, 3), activation='relu', input_shape=input_shape),
        MaxPooling2D((2, 2)),
        Conv2D(64, (3, 3), activation='relu'),
        MaxPooling2D((2, 2)),
        Conv2D(64, (3, 3), activation='relu'),
        Flatten(),
        Dense(64, activation='relu'),
        Dense(2, activation='softmax')
    ])
    return model

In [None]:
# # loop로 생성
# def create_simple_cnn(input_shape, num_conv_layers=3, filters=(32, 64, 64)):
#     model = Sequential()
#     model.add(Conv2D(filters[0], (3, 3), activation='relu', input_shape=input_shape))
#     model.add(MaxPooling2D((2, 2)))

#     for i in range(1, num_conv_layers):
#         model.add(Conv2D(filters[i], (3, 3), activation='relu'))
#         model.add(MaxPooling2D((2, 2)))

#     model.add(Flatten())
#     model.add(Dense(64, activation='relu'))
#     model.add(Dense(2, activation='softmax'))

#     return model

# # 모델 생성
# input_shape = (299, 299, 1)
# model = create_simple_cnn(input_shape, num_conv_layers=3, filters=(32, 64, 128))

In [None]:
input_shape = (299, 299, 1)
model = create_simple_cnn(input_shape)

model.compile(optimizer='adam',
              loss='categorical_crossentropy',
              metrics=['accuracy'])

model.summary()

### Model training

In [None]:
history = model.fit(
    X_train, y_train,
    validation_data=(X_val, y_val),
    epochs=20,
    batch_size=32
)

test_loss, test_acc = model.evaluate(X_test, y_test)
print(f'Test accuracy: {test_acc:.4f}')


In [None]:
# 학습 시각화
plt.figure(figsize=(12, 4))
plt.subplot(1, 2, 1)
plt.plot(history.history['accuracy'], label='Training Accuracy')
plt.plot(history.history['val_accuracy'], label='Validation Accuracy')
plt.title('Model Accuracy')
plt.xlabel('Epoch')
plt.ylabel('Accuracy')
plt.legend()

plt.subplot(1, 2, 2)
plt.plot(history.history['loss'], label='Training Loss')
plt.plot(history.history['val_loss'], label='Validation Loss')
plt.title('Model Loss')
plt.xlabel('Epoch')
plt.ylabel('Loss')
plt.legend()

plt.tight_layout()
plt.show()

# 모델 저장
model.save('simple_cnn_pneumonia_model.h5')

In [None]:
y_pred = model.predict(X_test)
y_pred_classes = (y_pred > 0.5).astype(int)
y_true = y_test

In [None]:
# Confusion Matrix
cm = confusion_matrix(y_true, y_pred_classes)
class_names = ['Normal', 'Pneumonia']

plt.figure(figsize=(10,7))
sns.heatmap(cm, annot=True, fmt='d', cmap='Blues', xticklabels=class_names, yticklabels=class_names)
plt.title('Confusion Matrix')
plt.ylabel('True label')
plt.xlabel('Predicted label')
plt.show()

# report
print(classification_report(y_true, y_pred_classes))