In [None]:
library(keras)
library(tensorflow)

library(OpenImageR)
library(imager)
library(EBImage)

library(tidyverse)
library(rsample)
library(readxl)
ggplot2::theme_set(theme_bw())

## 데이터 로드

In [None]:
Covid_meta <- read_xlsx("../input/covid19-radiography-dataset/COVID.metadata.xlsx",
                                sheet = "Sheet1") 
Lung_meta <- read_xlsx("../input/covid19-radiography-dataset/Lung_Opacity.metadata.xlsx",
                        sheet = "Sheet1") 
Normal_meta <- read_xlsx("../input/covid19-radiography-dataset/Normal.metadata.xlsx",
                        sheet = "Sheet1") 
Viral_meta <- read_xlsx("../input/covid19-radiography-dataset/Viral Pneumonia.metadata.xlsx",
                        sheet = "Sheet1") 

In [None]:
Covid_img <- file.path("../input/covid19-radiography-dataset/COVID/images") %>%
  list.files(full.names = T)
Lung_img <- file.path("../input/covid19-radiography-dataset/Lung_Opacity/images") %>%
  list.files(full.names = T)
Normal_img <- file.path("../input/covid19-radiography-dataset/Normal/images") %>%
  list.files(full.names = T)
Viral_img <- file.path("../input/covid19-radiography-dataset/Viral Pneumonia/images") %>%
  list.files(full.names = T)

In [None]:
classes = c("COVID", "Lung_Opacity", "Normal", "Viral Pneumonia")
Covid_meta <- Covid_meta %>% mutate(class = rep(classes[1], nrow(Covid_meta))) %>%
  arrange(.['FILE NAME'])
Lung_meta <- Lung_meta %>% mutate(class = rep(classes[2], nrow(Lung_meta))) %>%
  arrange(.['FILE NAME'])
Normal_meta <- Normal_meta %>% mutate(class = rep(classes[3], nrow(Normal_meta))) %>%
  arrange(.['FILE NAME'])
Viral_meta <- Viral_meta %>% mutate(class = rep(classes[4], nrow(Viral_meta))) %>%
  arrange(.['FILE NAME'])

In [None]:
df <- rbind(Covid_meta, Lung_meta) %>% rbind(Normal_meta) %>% rbind(Viral_meta)
df$class <- df$class %>% as.factor()

## Ratio of Classes in Dataset

In [None]:
df %>% group_by(class) %>% count() %>% mutate(ratio = n / nrow(df)) %>% ungroup() %>%
    ggplot(aes(fct_reorder(class, -ratio), ratio, fill = class)) + geom_col() + 
    ggtitle("Tag Ratio of Classes in Dataset") + xlab("Class") + ylab("Ratio") + 
    geom_text(aes(label = paste((table(df$class) %>% prop.table() %>% round(3)) * 100, "%")), vjust = 3, size = 5) + 
    theme(legend.position = "none",
        axis.text.x = element_text(size = 11, face = "bold"),
        axis.title.y = element_text(size = 13, face = "bold", angle = 0, vjust = 0.5),
        axis.title.x = element_text(size = 13, face = "bold"),
        plot.title = element_text(size = 20, hjust = 0.5, face = "bold.italic"))

## 데이터 확인
- 이미지는 299 X 299의 픽셀 크기를 가짐

In [None]:
dim(image_load(Covid_img[1]) %>% image_to_array())

### Covid Images

In [None]:
par(mfrow = c(2, 2))
for (i in sample(length(Covid_img),4)) {    
  readImage(Covid_img[i]) %>% resize(w = 299, h = 299) %>% 
    Image() %>% plot() %>% title(xlab = paste0(Covid_meta[i,1]), cex.lab = 1.7, col.lab = "red") 
}

## Lung Opacity Images

In [None]:
par(mfrow = c(2,2))
for (i in sample(length(Lung_img),4)) {    
  readImage(Lung_img[i]) %>% resize(w = 299, h = 299) %>% 
    Image() %>% plot() %>% title(xlab = paste0(Lung_meta[i,1]), cex.lab = 1.7, col.lab = "red") 
}

### Normal Images

In [None]:
par(mfrow = c(2,2))
for (i in sample(length(Normal_img),4)) {    
  readImage(Normal_img[i]) %>% resize(w = 256, h = 256) %>% 
    Image() %>% plot() %>% title(xlab = paste0(Normal_meta[i,1]), cex.lab = 1.7, col.lab = "red") 
}

### Viral Pneumonia Images

In [None]:
par(mfrow = c(2,2))
for (i in sample(length(Viral_img),4)) {    
  readImage(Viral_img[i]) %>% resize(w = 256, h = 256) %>% 
    Image() %>% plot() %>% title(xlab = paste0(Viral_meta[i, 1]), cex.lab = 1.7, col.lab = "red") 
}

## Create Generator

In [None]:
(classes) ; length(classes)
batch_size = 32
path = "../input/covid19-radiography-dataset"

In [None]:
train_datagen1 <- image_data_generator(
    rescale = 1/255,
    validation_split = 0.2,
)

In [None]:
train_generator1 <- flow_images_from_directory(
    directory = path,
    generator = train_datagen1,
    target_size = c(299, 299),
    batch_size = batch_size,
    class_mode = "categorical",
    classes = classes,
    shuffle = TRUE,
    color_mode = "grayscale",
    subset = "training"
)

In [None]:
val_generator1 <- flow_images_from_directory(
    directory = path,
    generator = train_datagen1,
    target_size = c(299, 299),
    batch_size = batch_size,
    class_mode = "categorical",
    classes = classes,
    shuffle = FALSE,
    color_mode = "grayscale",
    subset = "validation"
)

In [None]:
(train_length1 <- train_generator1$classes %>% length)
(val_length <- val_generator1$classes %>% length)

## Model Creation & Fit

In [None]:
model <- keras_model_sequential() %>%
    layer_conv_2d(filters = 32, kernel_size = c(3, 3), activation = 'relu', 
                  padding = "same", input_shape = c(299, 299, 1)) %>%
    layer_batch_normalization() %>%
    layer_max_pooling_2d(pool_size = c(2, 2), strides = 2, padding = "same") %>%

    layer_conv_2d(filters = 64, kernel_size = c(3, 3), activation = 'relu',
                 padding = "same") %>%
    layer_batch_normalization() %>%
    layer_max_pooling_2d(pool_size = c(2, 2), strides = 2, padding = "same") %>%

    layer_conv_2d(filters = 128, kernel_size = c(3, 3), activation = 'relu',
                 padding = "same") %>%
    layer_batch_normalization() %>%
    layer_max_pooling_2d(pool_size = c(2, 2), strides = 2, padding = "same") %>%

    layer_dropout(rate = 0.5)

In [None]:
model %>% layer_flatten() %>%
    layer_dense(units = 256, activation = 'relu') %>%

    layer_dropout(rate = 0.5) %>%

    layer_dense(units = 128, activation = "relu") %>%

    layer_dense(units = 4,activation = 'softmax')

In [None]:
model %>%
  compile(
    loss = 'categorical_crossentropy',
    optimizer = optimizer_rmsprop(),
    metrics = 'accuracy'
  )

In [None]:
summary(model)

In [None]:
reduce_lr <- callback_reduce_lr_on_plateau(monitor = "val_loss", factor = 0.05)
early_stop <- callback_early_stopping(patience = 10, restore_best_weights = TRUE)
checkpoint <- callback_model_checkpoint("checkpoints.h5", save_best_only = TRUE)

In [None]:
history1 <- model %>% fit(
    train_generator1,
    validation_data = val_generator1,
    batch_size = batch_size,
    callbacks = list(reduce_lr, early_stop, checkpoint),
    epochs = 100,
    verbose = FALSE
)

## Model Evaluation

In [None]:
history1

In [None]:
plot(history1)

In [None]:
model %>% evaluate(val_generator1)

In [None]:
pred1 <- model %>% predict(val_generator1) %>% 
    ramify::argmax()
pred1 <- (pred1 - 1) %>% as.factor()

In [None]:
truth <- val_generator1$classes %>% factor()
(levels(truth) <- classes)
(levels(pred1) <- classes)

In [None]:
caret::confusionMatrix(pred1, truth)

In [None]:
table(truth, pred1) %>% as.tibble() %>% 
    ggplot(aes(truth, pred1, fill = n)) + geom_tile() + 
    geom_text(aes(label = n), size = 7) + 
    theme(legend.position = "none",
         axis.title.x = element_text(size = 15, face = "bold"),
         axis.title.y = element_text(size = 15, face = "bold",
                                    angle = 0, vjust = 0.5),
        axis.text = element_text(size = 13, face = "bold")) +
    scale_fill_distiller(palette="Oranges", direction=1) 

### Image Augmentation

In [None]:
sample_gen <- image_data_generator(
    rescale = 1/255,
    rotation_range = 40,
    width_shift_range = 0.2,
    height_shift_range = 0.2,
    shear_range = 0.2,
    zoom_range = 0.2,
    horizontal_flip = TRUE,
    fill_mode = "nearest"
)

In [None]:
sample_aug <- flow_images_from_data(
    sample_img,
    generator = sample_gen,
    batch_size = 1
)

In [None]:
sample_img1 <- image_load(Covid_img[1]) %>% 
    image_to_array() %>% array_reshape(c(1, 299, 299, 3))

par(mfrow = c(2,2), pty = 's', mar = c(1, 0, 1, 0))
for (i in 1:4) {
  aug_img <- generator_next(sample_aug)
  plot(as.raster(aug_img[1, , , ]))
}

### Fit Augmented Images

In [None]:
train_datagen2 <- image_data_generator(
    rescale = 1/255,
    rotation_range = 40,
    width_shift_range = 0.2,
    height_shift_range = 0.2,
    shear_range = 0.2,
    zoom_range = 0.2,
    horizontal_flip = TRUE,
    fill_mode = "nearest",
    validation_split = 0.2
)

In [None]:
train_generator2 <- flow_images_from_directory(
    directory = path,
    generator = train_datagen2,
    target_size = c(299, 299),
    batch_size = batch_size,
    class_mode = "categorical",
    classes = classes,
    shuffle = TRUE,
    color_mode = "grayscale",
    subset = "training"
)

In [None]:
(train_length2 <- train_generator2$classes %>% length)
val_length

In [None]:
history2 <- model %>% fit(
    train_generator2,
    validation_data = val_generator1,
    batch_size = batch_size,
    callbacks = list(reduce_lr, early_stop, checkpoint),
    epochs = 100,
    verbose = FALSE
)

In [None]:
history2
plot(history2)

In [None]:
model %>% evaluate(val_generator1)

In [None]:
pred2 <- model %>% predict(val_generator1) %>% 
    ramify::argmax()
pred2 <- (pred2 - 1) %>% as.factor()

In [None]:
(levels(pred2) <- classes)

In [None]:
caret::confusionMatrix(pred2, truth)

In [None]:
table(truth, pred2) %>% as.tibble() %>% 
    ggplot(aes(truth, pred2, fill = n)) + geom_tile() + 
    geom_text(aes(label = n), size = 7) + 
    theme(legend.position = "none",
         axis.title.x = element_text(size = 15, face = "bold"),
         axis.title.y = element_text(size = 15, face = "bold",
                                    angle = 0, vjust = 0.5),
        axis.text = element_text(size = 13, face = "bold")) +
    scale_fill_distiller(palette="Greens", direction = 1) 