In [None]:
import os
import cv2
import imageio
import pandas as pd
import numpy as np
from sklearn.utils import shuffle
from sklearn.metrics import confusion_matrix
from sklearn.model_selection import train_test_split
import tensorflow
from tensorflow.keras.models import Sequential
# from keras.layers.normalization import BatchNormalization
from tensorflow.keras.layers import Dense, Dropout, Conv2D, MaxPooling2D, Flatten, BatchNormalization
from tensorflow.keras.optimizers import Adam
from tensorflow.keras.metrics import categorical_crossentropy
from tensorflow.keras.preprocessing.image import ImageDataGenerator
from tensorflow.keras.models import Model
from tensorflow.keras.callbacks import EarlyStopping, ReduceLROnPlateau, ModelCheckpoint
from tensorflow.keras.metrics import binary_accuracy
from tensorflow.keras.layers import Activation
import shutil
import matplotlib.pyplot as plt
import plotly.offline as py
import plotly.figure_factory as ff
from keras.applications.vgg16 import VGG16
from keras import layers
import tensorflow as tf
%matplotlib inline

In [None]:
import os
for dirname, _, filenames in os.walk('/kaggle/input'):
    for filename in filenames:
        os.path.join(dirname, filename)

In [None]:
covid = pd.read_excel('../input/covid19-radiography-database/COVID-19_Radiography_Dataset/COVID.metadata.xlsx')
covid.head()

In [None]:
normal = pd.read_excel('../input/covid19-radiography-database/COVID-19_Radiography_Dataset/Normal.metadata.xlsx')
normal.head()

In [None]:
SAMPLE_SIZE = 3616

In [None]:
covid['label'] = 0
normal['label'] = 1

In [None]:
covid = covid[['FILE NAME', 'label']]
normal = normal[['FILE NAME', 'label']]

In [None]:
# sampling data for covid and normal cases
df_0 = covid.sample(SAMPLE_SIZE, random_state=26)
df_1 = normal.sample(SAMPLE_SIZE, random_state=26)

# concat dataframes
data = pd.concat([df_0, df_1], axis=0).reset_index(drop=True)

# check numbers of each label
data['label'].value_counts()

In [None]:
# shuffle data
data = shuffle(data)
data.head()

In [None]:
df_train, df_val = train_test_split(data, test_size=0.20, random_state=26, stratify=data['label'])
df_test, df_val = train_test_split(df_val, test_size=0.50, random_state=26, stratify=df_val['label'])

print(df_train.shape)
print(df_val.shape)
print(df_test)

In [None]:
# Create a new directory
base_dir = 'base_dir'
os.mkdir(base_dir)

# create a path to 'base_dir' to which we will join the names of the new folders
# train_dir
train_dir = os.path.join(base_dir, 'train_dir')
os.mkdir(train_dir)

# val_dir
val_dir = os.path.join(base_dir, 'val_dir')
os.mkdir(val_dir)

In [None]:
test_dir = os.path.join(base_dir, 'test_dir')
os.mkdir(test_dir)

In [None]:
train_covid = os.path.join(train_dir, 'covid')
os.mkdir(train_covid)
train_normal = os.path.join(train_dir, 'normal')
os.mkdir(train_normal)

# create new folders inside val_dir
val_covid = os.path.join(val_dir, 'covid')
os.mkdir(val_covid)
val_normal = os.path.join(val_dir, 'normal')
os.mkdir(val_normal)

In [None]:
test_covid = os.path.join(test_dir, 'covid')
os.mkdir(test_covid)
test_normal = os.path.join(test_dir, 'normal')
os.mkdir(test_normal)

In [None]:
train_list = list(df_train['FILE NAME'])
val_list = list(df_val['FILE NAME'])
test_list = list(df_test['FILE NAME'])

In [None]:
print(test_list)

In [None]:
for image in train_list:
    
    # add .png extension 
    filename = image + '.png'
    # get the label for a certain image
    target = int(data.loc[data['FILE NAME'] == image, ['label']].values)
    
    # match the target with the folder's name and source path of the image
    if target == 0:
        label = 'covid'
        src = os.path.join('../input/covid19-radiography-database/COVID-19_Radiography_Dataset/COVID/images', filename)
        
    if target == 1:
        label = 'normal'
        # As 'FILE NAME's in .xlsx file begins with 'NORMAL' but real file names begin with 'Normal'
        filename = filename.capitalize()
        src = os.path.join('../input/covid19-radiography-database/COVID-19_Radiography_Dataset/Normal/images', filename)
  
    # destination path to image
    dst = os.path.join(train_dir, label, filename)
    
    # copy the image from the source to the destination
    shutil.copyfile(src, dst)
# Copy images to val_dir folder
for image in val_list:
    
    # add .png extension 
    filename = image + '.png'
    # get the label for a certain image
    target = int(data.loc[data['FILE NAME'] == image, ['label']].values)
    
    # match the target with the folder's name and source path of the image
    if target == 0:
        label = 'covid'
        src = os.path.join('../input/covid19-radiography-database/COVID-19_Radiography_Dataset/COVID/images', filename)
        
    if target == 1:
        label = 'normal'
        filename = filename.capitalize()
        src = os.path.join('../input/covid19-radiography-database/COVID-19_Radiography_Dataset/Normal/images', filename)
    # destination path to image
    dst = os.path.join(val_dir, label, filename)
    
    # copy the image from the source to the destination
    shutil.copyfile(src, dst)

In [None]:
for image in test_list:
    
    # add .png extension 
    filename = image + '.png'
    # get the label for a certain image
    target = int(data.loc[data['FILE NAME'] == image, ['label']].values)
    
    # match the target with the folder's name and source path of the image
    if target == 0:
        label = 'covid'
        src = os.path.join('../input/covid19-radiography-database/COVID-19_Radiography_Dataset/COVID/images', filename)
        
    if target == 1:
        label = 'normal'
        filename = filename.capitalize()
        src = os.path.join('../input/covid19-radiography-database/COVID-19_Radiography_Dataset/Normal/images', filename)
    # destination path to image
    dst = os.path.join(test_dir, label, filename)
    
    # copy the image from the source to the destination
    shutil.copyfile(src, dst)

In [None]:
print(len(os.listdir('base_dir/train_dir/covid')))
print(len(os.listdir('base_dir/train_dir/normal')))

In [None]:
train_path = "base_dir/train_dir"
val_path = "base_dir/val_dir"
test_path = "base_dir/test_dir"

In [None]:
datagen = ImageDataGenerator(rescale=1.0/255)
train_datagen = ImageDataGenerator(rescale=1./255,
        rotation_range=0.2,
        shear_range=0.2,
        horizontal_flip=True)
IMAGE_HEIGHT = 256
IMAGE_WIDTH= 256
train_gen = train_datagen.flow_from_directory(train_path,
                                        target_size=(IMAGE_HEIGHT,IMAGE_WIDTH),
                                        batch_size=32,class_mode = 'binary')

val_gen = datagen.flow_from_directory(val_path,
                                        target_size=(IMAGE_HEIGHT,IMAGE_WIDTH),
                                        batch_size=32,class_mode = 'binary')

test_gen = datagen.flow_from_directory(test_path,
                                        target_size=(IMAGE_HEIGHT,IMAGE_WIDTH),
                                        batch_size=32,class_mode = 'binary')
train_gen.shuffle = True

# Tiến hành shuffle
train_gen.on_epoch_end()
val_gen.shuffle = True

# Tiến hành shuffle
val_gen.on_epoch_end()
test_gen.shuffle = True

# Tiến hành shuffle
test_gen.on_epoch_end()

In [None]:
print(test_gen.classes)

In [None]:
print(train_gen[0][0].shape)

In [None]:
test_gen

In [None]:
base_model = VGG16(weights='/kaggle/input/keras-pretrained-models/vgg16_weights_tf_dim_ordering_tf_kernels_notop.h5', include_top=False, input_shape=(256,256, 3))

# Đóng băng các tầng của mô hình VGG16
for layer in base_model.layers:
    layer.trainable = False

# Xây dựng mô hình phân loại mới
model = Sequential()
model.add(base_model)
model.add(Flatten())
model.add(Dense(1024))
model.add(Activation("relu"))
# model.add(BatchNormalization())
# model.add(Dropout(0.2))
model.add(Dense(512))
model.add(Activation("relu"))
# model.add(Dense(256))
# model.add(Activation("relu"))
# model.add(BatchNormalization())
# model.add(Dropout(0.2))
model.add(Dense(1))
model.add(Activation("sigmoid"))
# Summary model 
model.summary()
# model.summary()

In [None]:
learning_rate = 0.00001  # Tốc độ học ban đầu
decay_steps = 10          # Số epoch để áp dụng decay
decay_rate = 1            # Tỷ lệ giảm learning rate

# Khởi tạo learning rate scheduler
lr_scheduler = tf.keras.optimizers.schedules.ExponentialDecay(learning_rate, decay_steps, decay_rate)
optimizer1 = tf.keras.optimizers.Adam(learning_rate = lr_scheduler)
model.compile(optimizer = optimizer1, loss = 'binary_crossentropy',metrics =['accuracy'])

history = model.fit(train_gen,
    steps_per_epoch=train_gen.n // train_gen.batch_size,
    epochs=20,
    validation_data=val_gen,
    validation_steps=val_gen.n // val_gen.batch_size)

In [None]:
from matplotlib import pyplot  as plt
plt.plot(history.history['accuracy'])
plt.plot(history.history['val_accuracy'])
plt.title('model accuracy')
plt.ylabel('accuracy')
plt.xlabel('epoch')
plt.legend(['train','test'],loc ='upper left')
plt.show()
#summarize history for loss
plt.plot(history.history['loss'])
plt.plot(history.history['val_loss'])
plt.title('model loss')
plt.ylabel('loss')
plt.xlabel('epoch')
plt.legend(['train','val'],loc ='upper left')
plt.show()

In [None]:
model.evaluate(test_gen)

In [None]:
import numpy as np
import matplotlib.pyplot as plt
import seaborn as sns
from sklearn.metrics import confusion_matrix

In [None]:
Y_pred = model.predict(test_gen)
def predict(y_hat):
    y_hat[y_hat >= 0.5] =1
    y_hat[y_hat<0.5] =0
    return y_hat
y_pred = predict(Y_pred)

# Tính confusion matrix
cm = confusion_matrix(y_true, y_pred)

# Tạo heatmap từ confusion matrix
sns.heatmap(cm, annot=True, fmt='d', cmap='Blues')

# Cấu hình labels trục x và trục y
class_names = test_gen.class_indices
class_labels = list(class_names.keys())
tick_marks = np.arange(len(class_labels))
plt.xticks(tick_marks, class_labels, rotation=45)
plt.yticks(tick_marks, class_labels)
plt.xlabel('Predicted labels')
plt.ylabel('True labels')
plt.title('Confusion Matrix')

# Hiển thị confusion matrix
plt.show()

In [None]:
from sklearn.metrics import confusion_matrix, classification_report
labels = ['Covid', 'Normal']
report = classification_report(test_gen.classes, y_pred, target_names=labels)
print(report)