In [2]:
# This Python 3 environment comes with many helpful analytics libraries installed
# It is defined by the kaggle/python Docker image: https://github.com/kaggle/docker-python
# For example, here's several helpful packages to load

import numpy as np # linear algebra
import pandas as pd # data processing, CSV file I/O (e.g. pd.read_csv)
import tensorflow as tf
from tensorflow.keras.preprocessing.image import ImageDataGenerator
from tensorflow.keras import layers, models
import matplotlib.pyplot as plt
%matplotlib inline 

In [29]:
train_n = "/kaggle/input/labeled-chest-xray-images/chest_xray/train/NORMAL"
test_n = "/kaggle/input/labeled-chest-xray-images/chest_xray/test/NORMAL"
train_p = "/kaggle/input/labeled-chest-xray-images/chest_xray/train/PNEUMONIA"
test_p = "/kaggle/input/labeled-chest-xray-images/chest_xray/test/PNEUMONIA"
test = "/kaggle/input/labeled-chest-xray-images/chest_xray/test"
train = "/kaggle/input/labeled-chest-xray-images/chest_xray/train"

In [4]:
train_generator = ImageDataGenerator(rescale=1./255, 
                                   rotation_range=40,
                                   width_shift_range=0.2, 
                                   height_shift_range=0.2,
                                   shear_range=0.2,
                                   zoom_range=0.2, 
                                   horizontal_flip=True, 
                                   fill_mode='nearest')


In [30]:
import os 
from PIL import Image
img = Image.open("/kaggle/input/labeled-chest-xray-images/chest_xray/train/NORMAL/NORMAL-1003233-0001.jpeg")
print(img)


<PIL.JpegImagePlugin.JpegImageFile image mode=L size=2572x2476 at 0x7A341466A740>


In [6]:
#test_generator = ImageDataGenerator(rescale=1./225)

In [7]:
#train_generator = train_generator.flow_from_directory(train, target_size=(),)

Found 5232 images belonging to 2 classes.


In [8]:
#validation_generator = test_generator.flow_from_directory(test, target_size=(),)

Found 624 images belonging to 2 classes.


In [31]:
def load_preprocess_and_threshold_image(file_path, target_size, threshold):
    image = tf.io.read_file(file_path)
    image = tf.image.decode_image(image, channels=1)  # Converte para escala de cinza
    image = tf.image.resize(image, target_size)
    image = tf.cast(image, tf.float32) / 255.0  # Normalizar para [0, 1]
    image = tf.where(image > threshold, 1.5, 0.0)  # Aplicar threshold
    return image


In [32]:
image_folder_normal = train_n
image_folder_cancer = train_p

In [33]:
image_paths_normal = [os.path.join(image_folder_normal, filename) for filename in os.listdir(image_folder_normal) if filename.endswith('.jpeg') or filename.endswith('.png')]
image_paths_cancer = [os.path.join(image_folder_cancer, filename) for filename in os.listdir(image_folder_cancer) if filename.endswith('.jpeg') or filename.endswith('.png')]


In [34]:
target_size = (224, 224)
threshold = 1.5 

In [35]:
images_normal = [load_preprocess_and_threshold_image(path, target_size, threshold) for path in image_paths_normal]
images_cancer = [load_preprocess_and_threshold_image(path, target_size, threshold) for path in image_paths_cancer]


In [36]:
images_normal_tensor = tf.stack(images_normal)
images_cancer_tensor = tf.stack(images_cancer)

In [37]:
print(images_normal_tensor.shape)
print(images_cancer_tensor.shape)


(1349, 224, 224, 1)
(3883, 224, 224, 1)


In [38]:
image_folder_tn = test_n
image_folder_tp = test_p

In [39]:
image_paths_tn = [os.path.join(image_folder_tn, filename) for filename in os.listdir(image_folder_tn) if filename.endswith('.jpeg') or filename.endswith('.png')]
image_paths_tp  = [os.path.join(image_folder_tp, filename) for filename in os.listdir(image_folder_tp) if filename.endswith('.jpeg') or filename.endswith('.png')]

In [40]:
images_normaln = [load_preprocess_and_threshold_image(path, target_size, threshold) for path in image_paths_tn]
images_cancerp = [load_preprocess_and_threshold_image(path, target_size, threshold) for path in image_paths_tp]


In [41]:
images_normal_tensorn = tf.stack(images_normaln)
images_cancer_tensorp = tf.stack(images_cancerp)

In [42]:
print(images_normal_tensorn.shape)
print(images_cancer_tensorp.shape)


(234, 224, 224, 1)
(390, 224, 224, 1)


In [63]:
model = models.Sequential()
input_shape = (224, 224, 1)
inputs = tf.keras.Input(shape=input_shape)
from tensorflow.keras.callbacks import EarlyStopping

In [64]:
x = layers.Conv2D(32, (3, 3), activation='relu')(inputs)
x = layers.BatchNormalization()(x)
x = layers.MaxPooling2D((2, 2))(x)
x = layers.Dropout(0.3)(x)
# Segunda camada convolucional + MaxPooling
x = layers.Conv2D(64, (3, 3), activation='relu')(x)
x = layers.MaxPooling2D((2, 2))(x)
x = layers.Dropout(0.3)(x)

# Terceira camada convolucional + MaxPooling
x = layers.Conv2D(128, (3, 3), activation='relu')(x)
x = layers.BatchNormalization()(x)
x = layers.MaxPooling2D((2, 2))(x)
x = layers.Dropout(0.3)(x)
# Flatten para conectar as camadas densas
x = layers.Flatten()(x)

# Camada densa com 128 neurônios
x = layers.Dense(128, activation='relu')(x)

# Camada de saída (binária, então 1 neurônio e ativação sigmoide)
outputs = layers.Dense(1, activation='sigmoid')(x)
x = layers.Dropout(0.3)(x)
# Definir o modelo
model = models.Model(inputs=inputs, outputs=outputs)

In [69]:
from tensorflow.keras.optimizers import Adam
model.compile(optimizer=Adam(learning_rate=0.0001), loss='binary_crossentropy', metrics=['accuracy'])
early_stopping = EarlyStopping(monitor='val_loss', patience=5, restore_best_weights=True)

In [70]:
model.summary()

In [71]:
train_images = np.concatenate([images_normal_tensor, images_cancer_tensor])

# Rótulos de treino: 0 para normal, 1 para câncer
train_labels = np.concatenate([
    np.zeros(len(images_normal_tensor)),  # 0 para normal
    np.ones(len(images_cancer_tensor))    # 1 para câncer
])

# 2. Combinar as imagens de teste/validação e criar os rótulos
# Imagens de teste
val_images = np.concatenate([images_normal_tensorn, images_cancer_tensorp])

# Rótulos de validação: 0 para normal, 1 para câncer
val_labels = np.concatenate([
    np.zeros(len(images_normal_tensorn)),  # 0 para normal
    np.ones(len(images_cancer_tensorp))    # 1 para câncer
])

# Shuffle os dados de treino para garantir boa mistura
#shuffle_indices_train = np.random.permutation(len(train_images))
#train_images = train_images[shuffle_indices_train]
#train_labels = train_labels[shuffle_indices_train]

# Shuffle os dados de validação para garantir boa mistura
shuffle_indices_val = np.random.permutation(len(val_images))
val_images = val_images[shuffle_indices_val]
val_labels = val_labels[shuffle_indices_val]

In [None]:
datagen = ImageDataGenerator(
    rotation_range=10,
    width_shift_range=0.1,
    height_shift_range=0.1,
    horizontal_flip=True
)

datagen.fit(train_images)
history = model.fit(
    train_images,   # Tensores de treino
    train_labels,   # Rótulos de treino
    epochs=3,      # Ajuste o número de épocas conforme necessário
    batch_size=32,  # Ajuste o batch size conforme necessário
    validation_data=(val_images, val_labels)  # Conjunto de validação/teste
)

# 6. Avaliar o modelo                  
print("Perda de treino:", train_loss)
print("Perda de validação:", val_loss)

Epoch 1/3
[1m 50/164[0m [32m━━━━━━[0m[37m━━━━━━━━━━━━━━[0m [1m5:09[0m 3s/step - accuracy: 0.7282 - loss: 0.6894