In [None]:
import matplotlib.pyplot as plt
import os
import seaborn as sns
import numpy as np
import tensorflow as tf
from sklearn.metrics import confusion_matrix
from tensorflow import keras
from tensorflow.keras import layers
from tensorflow.keras.preprocessing import image_dataset_from_directory
from tensorflow.keras.models import Sequential
from sklearn.metrics import accuracy_score, precision_score, recall_score, f1_score, roc_auc_score
from tensorflow.keras.models import save_model

In [None]:
normal_image_count = len(os.listdir('chest_xray/train/NORMAL/'))
pneumonia_image_count = len(os.listdir('chest_xray/train/PNEUMONIA/'))

data = {'Category': ['Normal', 'Pneumonia'], 'Count': [normal_image_count, pneumonia_image_count]}
colors = ['#808080', '#FFA500']

sns.set_style('darkgrid')
sns.barplot(data=data, x='Category', hue='Category', y='Count', palette=colors)

In [None]:
ds_train_ = image_dataset_from_directory(
    'chest_xray/train',
    labels='inferred',
    seed=123,
    batch_size=32,
    image_size=[128,128],
    shuffle=True,
)

ds_valid_ = image_dataset_from_directory(
    'chest_xray/val',
    labels='inferred',
    seed=123,
    batch_size=32,
    image_size=[128,128],
    shuffle=False,
)

In [None]:
class_names = ds_train_.class_names

plt.figure(figsize=(10, 10))
for images, labels in ds_train_.take(1):
    for i in range(9):
        ax = plt.subplot(3,3,i+1)
        plt.imshow(images[i].numpy().astype('uint8'))
        plt.title(class_names[labels[i]])
        plt.axis('off')

In [None]:
AUTOTUNE = tf.data.AUTOTUNE

ds_train_ = ds_train_.cache().shuffle(100).prefetch(buffer_size=AUTOTUNE)
ds_valid_ = ds_valid_.cache().prefetch(buffer_size=AUTOTUNE)

In [None]:
data_augmentation = tf.keras.Sequential([
  layers.RandomFlip("horizontal_and_vertical"),
  layers.RandomRotation(0.2),
  layers.RandomZoom(0.1),
  layers.RandomTranslation(height_factor=(-0.2, 0.2), width_factor=(-0.2, 0.2), interpolation="nearest"),
  layers.RandomContrast(0.1),
])

In [None]:
num_classes = len(class_names)

model = Sequential([
    layers.Rescaling(1./255, input_shape=(128,128,3)),
    data_augmentation,
    layers.Conv2D(32, 3, padding='same', activation='relu'),
    layers.MaxPooling2D(),
    layers.Conv2D(64, 3, padding='same', activation='relu'),
    layers.MaxPooling2D(),
    layers.Conv2D(128, 3, padding='same', activation='relu'),
    layers.MaxPooling2D(),
    layers.Flatten(),
    layers.Dense(128, activation='relu'),
    layers.Dense(1, activation='sigmoid')
])

In [None]:
model.compile(optimizer='adam', loss='binary_crossentropy', metrics=['accuracy'])
early_stop=tf.keras.callbacks.EarlyStopping(monitor='val_loss',mode='min',verbose=1,patience=5)

In [None]:
model.summary()

In [None]:
epochs=20
history=model.fit(
    ds_train_,
    validation_data=ds_valid_,
    epochs=epochs,
    callbacks=[early_stop]
)

In [None]:
acc = history.history['accuracy']
val_acc = history.history['val_accuracy']

loss = history.history['loss']
val_loss = history.history['val_loss']

epochs_range = range(early_stop.stopped_epoch+1)

plt.figure(figsize=(8, 8))
plt.subplot(1, 2, 1)
plt.plot(epochs_range, acc, label='Training Accuracy')
plt.plot(epochs_range, val_acc, label='Validation Accuracy')
plt.legend(loc='lower right')
plt.title('Training and Validation Accuracy')

plt.subplot(1, 2, 2)
plt.plot(epochs_range, loss, label='Training Loss')
plt.plot(epochs_range, val_loss, label='Validation Loss')
plt.legend(loc='upper right')
plt.title('Training and Validation Loss')
plt.show()

In [None]:
conf_matrix = confusion_matrix(true_labels, predicted_labels)

plt.figure(figsize=(8, 6))
sns.heatmap(conf_matrix, annot=True, fmt='d', cmap='Oranges',
            xticklabels=class_names, yticklabels=class_names)
plt.xlabel('Predicted')
plt.ylabel('True')
plt.title('Confusion Matrix')
plt.show()

In [None]:
img = tf.keras.utils.load_img('chest_xray/test/NORMAL/IM-0010-0001.jpeg', target_size=[128,128])
img_array=tf.keras.utils.img_to_array(img)
img_array=tf.expand_dims(img_array, 0)

predictions = model.predict(img_array)

print("This image most likely belongs to {} with a {:.2f} precent confidence.".format(class_names[0 if predictions[0][0] <= 0.5 else 1], max(predictions[0][0], 1 - predictions[0][0]) * 100 ))

In [None]:
ds_test = image_dataset_from_directory(
    'chest_xray/test',
    labels='inferred',
    seed=123,
    batch_size=32,
    image_size=[128,128],
    shuffle=False,
)

predictions = model.predict(ds_test)

true_labels = np.concatenate([y for x, y in ds_test], axis=0)
predicted_labels = [0 if szam <= 0.5 else 1 for szam in predictions]

accuracy = accuracy_score(true_labels, predicted_labels)

print(f'Accuracy score: {accuracy * 100:.2f}%')

In [None]:
precision = precision_score(true_labels, predicted_labels)
print(f'Precision score: {precision * 100:.2f}%')

In [None]:
recall = recall_score(true_labels, predicted_labels)
print(f'Recall score: {recall * 100:.2f}%')

In [None]:
f1 = f1_score(true_labels, predicted_labels)
print(f'F1 score: {f1 * 100:.2f}%')

In [None]:
roc_auc = roc_auc_score(true_labels, predicted_labels)
print(f'ROC-AUC score: {roc_auc * 100:.2f}%')