# Aplicação de Aprendizagem de Profunda na Detecção de Pneumonia


Alunos:

*   Lucas Van-Lume Lima (lvll)
*   Jorge Guilherme Luna de Vasconcelos Cabral (jglvc)

## Importação do dataset
Instalando Kaggle e o Pandas:

In [None]:
!pip install kaggle
!pip install pandas



In [None]:
from google.colab import files

Upload do arquivo que contém a chave da API do Kaggle:

In [None]:
files.upload()

TypeError: 'NoneType' object is not subscriptable

Criação do diretório para a leitura da chave Kaggle:

In [None]:
!mkdir ~/.kaggle
!cp kaggle.json ~/.kaggle/
!chmod 600 ~/.kaggle/kaggle.json

Autenticação e download do dataset:

In [None]:
# Authenticate using your Kaggle API credentials
from kaggle.api.kaggle_api_extended import KaggleApi

api = KaggleApi()
api.authenticate()

# 'dataset_download_files' method to download the dataset
api.dataset_download_files('paultimothymooney/chest-xray-pneumonia')

# The dataset will be downloaded as a zip file, you can unzip it
import zipfile
with zipfile.ZipFile('/content/chest-xray-pneumonia.zip', 'r') as zip_ref:
    zip_ref.extractall('/content/dataset-folder')  # Extract to a specific folder

Importação de bibliotecas:

In [None]:
import os
import numpy as np
import pandas as pd
import matplotlib.pyplot as plt
import seaborn as sns
import tensorflow as tf
from tensorflow import keras

Separação das pastas de treinamento, test e validação:

In [None]:
train_dir = "../content/dataset-folder/chest_xray/chest_xray/train/"
test_dir = "../content/dataset-folder/chest_xray/chest_xray/test/"
val_dir = "../content/dataset-folder/chest_xray/chest_xray/val/"

## Visualização dos dados

In [None]:
print("========================================\nTrain set:")
num_pneumonia = len(os.listdir(os.path.join(train_dir, 'PNEUMONIA')))
num_normal = len(os.listdir(os.path.join(train_dir, 'NORMAL')))
print(f"PNEUMONIA={num_pneumonia}")
print(f"NORMAL={num_normal}")

print("========================================\nTest set:")
print(f"PNEUMONIA={len(os.listdir(os.path.join(test_dir, 'PNEUMONIA')))}")
print(f"NORMAL={len(os.listdir(os.path.join(test_dir, 'NORMAL')))}")

print("========================================\nValidation set:")
print(f"PNEUMONIA={len(os.listdir(os.path.join(val_dir, 'PNEUMONIA')))}")
print(f"NORMAL={len(os.listdir(os.path.join(val_dir, 'NORMAL')))}")

Imagens de raio-X com diagnóstico de pneumonia

In [None]:
pneumonia = [f for f in os.listdir("../content/dataset-folder/chest_xray/chest_xray/train/PNEUMONIA") if os.path.isfile(os.path.join("../content/dataset-folder/chest_xray/chest_xray/train/PNEUMONIA", f)) and not f.startswith('.')]
pneumonia_dir = "../content/dataset-folder/chest_xray/chest_xray/train/PNEUMONIA"

plt.figure(figsize=(20, 10))

for i in range(9):
    plt.subplot(3, 3, i + 1)
    # Only read files and skip hidden files (starting with .)
    img_path = os.path.join(pneumonia_dir, pneumonia[i])
    if os.path.isfile(img_path) and not pneumonia[i].startswith('.'):
        img = plt.imread(img_path)
        plt.imshow(img, cmap='gray')
        plt.axis('off')

plt.tight_layout()

Imagens de raio-X saudáveis

In [None]:
normal = [f for f in os.listdir("../content/dataset-folder/chest_xray/chest_xray/train/NORMAL") if os.path.isfile(os.path.join("../content/dataset-folder/chest_xray/chest_xray/train/NORMAL", f)) and not f.startswith('.')] # Filter out files starting with '.'
normal_dir = "../content/dataset-folder/chest_xray/chest_xray/train/NORMAL"

plt.figure(figsize=(20, 10))

for i in range(9):
    plt.subplot(3, 3, i + 1)
    img = plt.imread(os.path.join(normal_dir, normal[i]))
    plt.imshow(img, cmap='gray')
    plt.axis('off')

plt.tight_layout()

Informações sobre as imagens

In [None]:
normal_img = os.listdir("../content/dataset-folder/chest_xray/chest_xray/train/NORMAL")[0]

sample_img = plt.imread(os.path.join(normal_dir, normal_img))
plt.imshow(sample_img, cmap='gray')
plt.colorbar()
plt.title('Raw Chest X Ray Image')

print(f"The dimensions of the image are {sample_img.shape[0]} pixels width and {sample_img.shape[1]} pixels height, one single color channel.\n")
print(f"The maximum pixel value is {sample_img.max():.4f} and the minimum is {sample_img.min():.4f}.\n")
print(f"The mean value of the pixels is {sample_img.mean():.4f} and the standard deviation is {sample_img.std():.4f}.\n")

Distribuição dos valores dos pixels

In [None]:
sns.histplot(sample_img.ravel(), bins=50,
             label=f"Pixel Mean {np.mean(sample_img):.4f} & Standard Deviation {np.std(sample_img):.4f}", kde=False)
plt.legend(loc='upper center')
plt.title('Distribution of Pixel Intensities in the Image')
plt.xlabel('Pixel Intensity')
plt.ylabel('Pixels in Image')
plt.show()

## Tratamento dos dados

Setando gerador de imagens

In [None]:
from tensorflow.keras.preprocessing.image import ImageDataGenerator

image_generator = ImageDataGenerator(
    rotation_range=20,
    width_shift_range=0.1,
    shear_range=0.1,
    zoom_range=0.1,
    samplewise_center=True,
    samplewise_std_normalization=True
)

Gerador de dados para treino, validação e teste

In [None]:
print("===============\nTrain:")
train = image_generator.flow_from_directory(train_dir,
                                            batch_size=8,
                                            shuffle=True,
                                            class_mode='binary',
                                            target_size=(180, 180))

print("===============\nValidation:")
validation = image_generator.flow_from_directory(val_dir,
                                                batch_size=1,
                                                shuffle=False,
                                                class_mode='binary',
                                                target_size=(180, 180))

print("===============\nTest:")
test = image_generator.flow_from_directory(test_dir,
                                            batch_size=1,
                                            shuffle=False,
                                            class_mode='binary',
                                            target_size=(180, 180))

Informações das imagens tratadas

In [None]:
sns.set_style('white')
generated_image, label = train.__getitem__(0)
plt.imshow(generated_image[0], cmap='gray')
plt.colorbar()
plt.title('Raw Chest X Ray Image')

print(f"The dimensions of the image are {generated_image.shape[1]} pixels width and {generated_image.shape[2]} pixels height, one single color channel.\n")
print(f"The maximum pixel value is {generated_image.max():.4f} and the minimum is {generated_image.min():.4f}\n")
print(f"The mean value of the pixels is {generated_image.mean():.4f} and the standard deviation is {generated_image.std():.4f}\n")

Distribuição dos valores dos pixels das imagens tratadas

In [None]:
sns.histplot(generated_image.ravel(),
             label=f"Pixel Mean {np.mean(generated_image):.4f} & Standard Deviation {np.std(generated_image):.4f}", kde=False)
plt.legend(loc='upper center')
plt.title('Distribution of Pixel Intensities in the Image')
plt.xlabel('Pixel Intensity')
plt.ylabel('# Pixels in Image')
plt.show()

## Tratamento dos dados 2

In [None]:
IMAGE_SIZE = (180, 180)

train_ds = tf.keras.utils.image_dataset_from_directory(
    train_dir,
    validation_split=0.1,  # 10% para validação
    subset='training',  # Usando o subset de treinamento
    seed=123,
    image_size=IMAGE_SIZE,
    batch_size=32
)

validation_ds = tf.keras.utils.image_dataset_from_directory(
    val_dir,  # Diretório para validação
    seed=123,
    image_size=IMAGE_SIZE,
    batch_size=32
)

test_ds = tf.keras.utils.image_dataset_from_directory(
    test_dir,  # Diretório para teste
    seed=123,
    image_size=IMAGE_SIZE,
    batch_size=32
)

# Visualizar os tamanhos dos datasets
print(f"Train Dataset Size: {len(train_ds)} batches")
print(f"Validation Dataset Size: {len(validation_ds)} batches")
print(f"Test Dataset Size: {len(test_ds)} batches")

## CNN Model

### Inception V3

In [None]:
from keras.applications import InceptionV3
from keras.layers import Dense, GlobalAveragePooling2D
from keras.models import Sequential
from keras.layers import Dense, Conv2D, MaxPool2D, Dropout, Flatten, BatchNormalization

In [None]:
inception_base_model = InceptionV3(input_shape=(180,180,3),include_top=False,weights='imagenet')

In [None]:
# Class weights

weight_for_0 = num_pneumonia / (num_normal + num_pneumonia)
weight_for_1 = num_normal / (num_normal + num_pneumonia)

class_weight = {0: weight_for_0, 1: weight_for_1}

print(f"Weight for class 0: {weight_for_0:.2f}")
print(f"Weight for class 1: {weight_for_1:.2f}")

In [None]:
inception_model = tf.keras.Sequential([
    inception_base_model,
    GlobalAveragePooling2D(),
    Dense(512, activation="relu"),
    BatchNormalization(),
    Dropout(0.6),
    Dense(128, activation="relu"),
    BatchNormalization(),
    Dropout(0.4),
    Dense(64,activation="relu"),
    BatchNormalization(),
    Dropout(0.3),
    Dense(1,activation="sigmoid")
])

opt = tf.keras.optimizers.Adam(learning_rate=0.001)
METRICS = [
    'accuracy',
    tf.keras.metrics.Precision(name='precision'),
    tf.keras.metrics.Recall(name='recall')
]
inception_model.compile(optimizer=opt,loss='binary_crossentropy',metrics=METRICS)

In [None]:
r = inception_model.fit(train,
          epochs=10,
          validation_data=validation,
          class_weight=class_weight,
          steps_per_epoch=100,
          validation_steps=16)

In [None]:
plt.figure(figsize=(12, 8))

plt.subplot(2, 2, 1)
plt.plot(r.history['loss'], label='Loss')
plt.plot(r.history['val_loss'], label='Val_Loss')
plt.legend()
plt.title('Loss Evolution')

plt.subplot(2, 2, 2)
plt.plot(r.history['accuracy'], label='Accuracy')
plt.plot(r.history['val_accuracy'], label='Val_Accuracy')
plt.legend()
plt.title('Accuracy Evolution')

In [None]:
evaluation =inception_model.evaluate(test)
print(f"Test Accuracy: {evaluation[1] * 100:.2f}%")

evaluation = inception_model.evaluate(train)
print(f"Train Accuracy: {evaluation[1] * 100:.2f}%")

In [None]:
from sklearn.metrics import confusion_matrix

test_steps_per_epoch = np.math.ceil(test.samples / test.batch_size)
predictions = inception_model.predict(test, steps=test_steps_per_epoch)

predicted_classes = np.where(predictions > 0.5, 1, 0)

true_classes = test.classes

class_labels = list(test.class_indices.keys())

conf_matrix = confusion_matrix(true_classes, predicted_classes)

plt.figure(figsize=(8, 6))
sns.heatmap(conf_matrix, annot=True, fmt='d', cmap='Blues', xticklabels=class_labels, yticklabels=class_labels)
plt.xlabel('Predicted')
plt.ylabel('True')
plt.title('Confusion Matrix')
plt.show()

### VGG16


In [None]:
from keras.models import Sequential
from keras.layers import GlobalAveragePooling2D
from keras.applications import VGG16
# from tensorflow.keras.regularizers import l2

In [None]:
vgg16_base_model = VGG16(input_shape=(180,180,3),include_top=False,weights='imagenet')

In [None]:
 vgg16_model = tf.keras.Sequential([
        vgg16_base_model,
        GlobalAveragePooling2D(),
        Dense(512, activation="relu"),
        BatchNormalization(),
        Dropout(0.6),
        Dense(128, activation="relu"),
        BatchNormalization(),
        Dropout(0.6),
        Dense(64,activation="relu"),
        BatchNormalization(),
        Dropout(0.6),
        Dense(1,activation="sigmoid")
    ])

In [None]:
opt = tf.keras.optimizers.Adam(learning_rate=0.001)
METRICS = [
  'accuracy',
  tf.keras.metrics.Precision(name='precision'),
  tf.keras.metrics.Recall(name='recall')
]
vgg16_model.compile(optimizer=opt,loss='binary_crossentropy',metrics=METRICS)

In [None]:
r = vgg16_model.fit(train,
          epochs=10,
          validation_data=validation,
          class_weight=class_weight,
          steps_per_epoch=100,
          validation_steps=16)

In [None]:
plt.figure(figsize=(12, 8))

plt.subplot(2, 2, 1)
plt.plot(r.history['loss'], label='Loss')
plt.plot(r.history['val_loss'], label='Val_Loss')
plt.legend()
plt.title('Loss Evolution')

plt.subplot(2, 2, 2)
plt.plot(r.history['accuracy'], label='Accuracy')
plt.plot(r.history['val_accuracy'], label='Val_Accuracy')
plt.legend()
plt.title('Accuracy Evolution')

In [None]:
evaluation = vgg16_model.evaluate(train)
print(f"Train Accuracy: {evaluation[1] * 100:.2f}%")

evaluation =vgg16_model.evaluate(test)
print(f"Test Accuracy: {evaluation[1] * 100:.2f}%")



In [None]:
from sklearn.metrics import confusion_matrix, classification_report

predicted_vals = vgg16_model.predict(test, steps=len(test))
pd.DataFrame(classification_report(test.classes, predicted_vals > 0.5, output_dict=True))

### Resnet


In [None]:
from keras.applications import ResNet50

resnet_base_model = ResNet50(input_shape=(180,180,3), include_top=False, weights='imagenet')

In [None]:
    resnet_model = tf.keras.Sequential([
        resnet_base_model,
        GlobalAveragePooling2D(),
        Dense(512, activation="relu"),
        BatchNormalization(),
        Dropout(0.6),
        Dense(128, activation="relu"),
        BatchNormalization(),
        Dropout(0.4),
        Dense(64,activation="relu"),
        BatchNormalization(),
        Dropout(0.3),
        Dense(1,activation="sigmoid")
    ])

    opt = tf.keras.optimizers.Adam(learning_rate=0.001) #
    METRICS = [
        'accuracy',
        tf.keras.metrics.Precision(name='precision'),
        tf.keras.metrics.Recall(name='recall')
    ]
    resnet_model.compile(optimizer=opt,loss='binary_crossentropy',metrics=METRICS)

In [None]:


r = resnet_model.fit(train,
          epochs=10,
          validation_data=validation,
          class_weight=class_weight,
          steps_per_epoch=100,
          validation_steps=16)

In [None]:
plt.figure(figsize=(12, 8))

plt.subplot(2, 2, 1)
plt.plot(r.history['loss'], label='Loss')
plt.plot(r.history['val_loss'], label='Val_Loss')
plt.legend()
plt.title('Loss Evolution')

plt.subplot(2, 2, 2)
plt.plot(r.history['accuracy'], label='Accuracy')
plt.plot(r.history['val_accuracy'], label='Val_Accuracy')
plt.legend()
plt.title('Accuracy Evolution')

In [None]:
evaluation =resnet_model.evaluate(test)
print(f"Test Accuracy: {evaluation[1] * 100:.2f}%")

evaluation = resnet_model.evaluate(train)
print(f"Train Accuracy: {evaluation[1] * 100:.2f}%")

In [None]:
from sklearn.metrics import confusion_matrix, classification_report

predicted_vals = resnet_model.predict(test, steps=len(test))
pd.DataFrame(classification_report(test.classes, predicted_vals > 0.5, output_dict=True))

### DenseNet

In [None]:
from keras.applications.densenet import DenseNet121
from keras.layers import Dense, GlobalAveragePooling2D
from keras.models import Model
from keras import backend as K


In [None]:
base_model = DenseNet121(input_shape=(180, 180, 3), include_top=False, weights='imagenet', pooling='avg')


In [None]:
layers = base_model.layers
print(f"The model has {len(layers)} layers")

In [None]:
print(f"The input shape {base_model.input}")
print(f"The output shape {base_model.output}")

In [None]:

base_model = DenseNet121(include_top=False, weights='imagenet')
x = base_model.output

x = GlobalAveragePooling2D()(x)

predictions = Dense(1, activation="sigmoid")(x)

model = Model(inputs=base_model.input, outputs=predictions)


model.compile(loss='binary_crossentropy',
              optimizer='adam',
              metrics=['accuracy'])

In [None]:
r = model.fit(
    train,
    epochs=10,
    validation_data=validation,
    class_weight=class_weight,
    steps_per_epoch=100,
    validation_steps=16,
)

In [None]:
plt.figure(figsize=(12, 8))

plt.subplot(2, 2, 1)
plt.plot(r.history['loss'], label='Loss')
plt.plot(r.history['val_loss'], label='Val_Loss')
plt.legend()
plt.title('Loss Evolution')

plt.subplot(2, 2, 2)
plt.plot(r.history['accuracy'], label='Accuracy')
plt.plot(r.history['val_accuracy'], label='Val_Accuracy')
plt.legend()
plt.title('Accuracy Evolution')

In [None]:
evaluation = model.evaluate(test)
print(f"Test Accuracy: {evaluation[1] * 100:.2f}%")

evaluation = model.evaluate(train)
print(f"Train Accuracy: {evaluation[1] * 100:.2f}%")

In [None]:
from sklearn.metrics import confusion_matrix, classification_report

predicted_vals = model.predict(test, steps=len(test))
pd.DataFrame(classification_report(test.classes, predicted_vals > 0.5, output_dict=True))

In [None]:
from sklearn.metrics import confusion_matrix

test_steps_per_epoch = np.math.ceil(test.samples / test.batch_size)
predictions = model.predict(test, steps=test_steps_per_epoch)

predicted_classes = np.where(predictions > 0.5, 1, 0)

true_classes = test.classes

class_labels = list(test.class_indices.keys())

conf_matrix = confusion_matrix(true_classes, predicted_classes)

plt.figure(figsize=(8, 6))
sns.heatmap(conf_matrix, annot=True, fmt='d', cmap='Blues', xticklabels=class_labels, yticklabels=class_labels)
plt.xlabel('Predicted')
plt.ylabel('True')
plt.title('Confusion Matrix')
plt.show()