<a href="https://colab.research.google.com/github/Joonyoung-Song/Kaggle-Chest-X-Ray-Images-Pneumonia-/blob/main/CNN_base.ipynb" target="_parent"><img src="https://colab.research.google.com/assets/colab-badge.svg" alt="Open In Colab"/></a>

In [None]:
import numpy as np
import pandas as pd
from tqdm.auto import tqdm

from keras.preprocessing import image
from keras.utils import np_utils
import os
from google.colab import drive
from pathlib import Path

import cv2

In [None]:
drive.mount('/gdrive', force_remount=True)

In [None]:
# Define path to the data directory
data_dir = Path('drive/MyDrive/Colab Notebooks/kaggle_pneumonia_classification')

# Path to train directory (Fancy pathlib...no more os.path!!)
train_dir = data_dir / 'train'

# Path to validation directory
val_dir = data_dir / 'val'

# Path to test directory
test_dir = data_dir / 'test'

In [None]:
# Get the path to the normal and pneumonia sub-directories
normal_cases_dir = train_dir / 'NORMAL'
pneumonia_cases_dir = train_dir / 'PNEUMONIA'

# Get the list of all the images
normal_cases = normal_cases_dir.glob('*.jpeg')
pneumonia_cases = pneumonia_cases_dir.glob('*.jpeg')

# An empty list. We will insert the data into this list in (img_path, label) format
train_data = []

# Go through all the normal cases. The label for these cases will be 0
for img in normal_cases:
    train_data.append((img,0))

# Go through all the pneumonia cases. The label for these cases will be 1
for img in pneumonia_cases:
    train_data.append((img, 1))

# Get a pandas dataframe from the data we have in our list 
train_data = pd.DataFrame(train_data, columns=['image', 'label'],index=None)

# Shuffle the data 
train_data = train_data.sample(frac=1.).reset_index(drop=True)

# How the dataframe looks like?
train_data.head()

In [None]:
img_size=224
X_train = np.zeros(shape=(len(train_data),img_size,img_size,3))

for idx,fname in enumerate(tqdm(train_data.image)):
  img = image.load_img(fname,target_size=(img_size,img_size))
  img_array_train = image.img_to_array(img)
  img_array_train = np.expand_dims(img_array_train,axis=0)
  X_train[idx] = img_array_train

In [None]:
# Get the path to the normal and pneumonia sub-directories
normal_cases_dir = val_dir / 'NORMAL'
pneumonia_cases_dir = val_dir / 'PNEUMONIA'

# Get the list of all the images
normal_cases = normal_cases_dir.glob('*.jpeg')
pneumonia_cases = pneumonia_cases_dir.glob('*.jpeg')

# An empty list. We will insert the data into this list in (img_path, label) format
val_data = []

# Go through all the normal cases. The label for these cases will be 0
for img in normal_cases:
    val_data.append((img,0))

# Go through all the pneumonia cases. The label for these cases will be 1
for img in pneumonia_cases:
    val_data.append((img, 1))

# Get a pandas dataframe from the data we have in our list 
val_data = pd.DataFrame(val_data, columns=['image', 'label'],index=None)

# Shuffle the data 
val_data = val_data.sample(frac=1.).reset_index(drop=True)

# How the dataframe looks like?
val_data.head()

In [None]:
img_size=224
X_val = np.zeros(shape=(len(val_data),img_size,img_size,3))

for idx,fname in enumerate(tqdm(val_data.image)):
  img = image.load_img(fname,target_size=(img_size,img_size))
  img_array_val = image.img_to_array(img)
  img_array_val = np.expand_dims(img_array_val,axis=0)
  X_val[idx] = img_array_val

In [None]:
# Get the path to the normal and pneumonia sub-directories
normal_cases_dir = test_dir / 'NORMAL'
pneumonia_cases_dir = test_dir / 'PNEUMONIA'

# Get the list of all the images
normal_cases = normal_cases_dir.glob('*.jpeg')
pneumonia_cases = pneumonia_cases_dir.glob('*.jpeg')

# An empty list. We will insert the data into this list in (img_path, label) format
test_data = []

# Go through all the normal cases. The label for these cases will be 0
for img in normal_cases:
    test_data.append((img,0))

# Go through all the pneumonia cases. The label for these cases will be 1
for img in pneumonia_cases:
    test_data.append((img, 1))

# Get a pandas dataframe from the data we have in our list 
test_data = pd.DataFrame(test_data, columns=['image', 'label'],index=None)

# Shuffle the data 
test_data = test_data.sample(frac=1.).reset_index(drop=True)

# How the dataframe looks like?
test_data.head()

In [None]:
img_size=224
X_test = np.zeros(shape=(len(test_data),img_size,img_size,3))

for idx,fname in enumerate(tqdm(test_data.image)):
  img = image.load_img(fname,target_size=(img_size,img_size))
  img_array_test = image.img_to_array(img)
  img_array_test = np.expand_dims(img_array_test,axis=0)
  X_test[idx] = img_array_test

In [None]:
# CNN 빌드

def conv_block(filters):
    block = tf.keras.Sequential([
        tf.keras.layers.SeparableConv2D(filters, 3, activation='relu', padding='same'),
        tf.keras.layers.SeparableConv2D(filters, 3, activation='relu', padding='same'),
        tf.keras.layers.BatchNormalization(),
        tf.keras.layers.MaxPool2D()
    ]
    )
    
    return block

def dense_block(units, dropout_rate):
    block = tf.keras.Sequential([
        tf.keras.layers.Dense(units, activation='relu'),
        tf.keras.layers.BatchNormalization(),
        tf.keras.layers.Dropout(dropout_rate)
    ])
    
    return block

def build_model():
    model = tf.keras.Sequential([
        tf.keras.Input(shape=(img_size, img_size, 3)),
        
        tf.keras.layers.Conv2D(16, 3, activation='relu', padding='same'),
        tf.keras.layers.Conv2D(16, 3, activation='relu', padding='same'),
        tf.keras.layers.MaxPool2D(),
        
        conv_block(32),
        conv_block(64),
        
        conv_block(128),
        tf.keras.layers.Dropout(0.2),
        
        conv_block(256),
        tf.keras.layers.Dropout(0.2),
        
        tf.keras.layers.Flatten(),
        dense_block(512, 0.7),
        dense_block(128, 0.5),
        dense_block(64, 0.3),
        
        tf.keras.layers.Dense(2, activation='softmax')
        
        
    ])
    model.compile( loss='binary_crossentropy',optimizer='adam',metrics='accuracy')
    return model






In [None]:
# 모델 밸런싱

train_data['label'].value_counts(normalize=True)[0]

initial_bias = np.log([train_data['label'].value_counts()[1]/train_data['label'].value_counts()[0]])
initial_bias

weight_for_0 = (1 / train_data['label'].value_counts()[0])*(len(train_data))/2.0 
weight_for_1 = (1 / train_data['label'].value_counts()[1])*(len(train_data))/2.0

class_weight = {0: weight_for_0, 1: weight_for_1}

print('Weight for class 0: {:.2f}'.format(weight_for_0))
print('Weight for class 1: {:.2f}'.format(weight_for_1))

In [None]:
import tensorflow as tf
from tensorflow.keras.callbacks import EarlyStopping
from tensorflow.keras.utils import plot_model, to_categorical

In [None]:
y_train=train_data["label"].values
y_val=val_data["label"].values

In [None]:
clf = build_model()

es = EarlyStopping(monitor='val_accuracy', min_delta=0.001, patience=5,
                   verbose=1, mode='min', baseline=None, restore_best_weights=True)

# rlr = ReduceLROnPlateau(monitor='val_loss', factor=0.5,
#                     patience=3, min_lr=1e-6, mode='min', verbose=1)

clf.fit(X_train, 
        to_categorical(y_train),
        validation_data=(X_val, to_categorical(y_val)),
        epochs=100,
        batch_size=128,
        class_weight=class_weight,
        callbacks=[es
                  #  ,rlr
                   ]
        )