In [None]:
# Instalações necessárias
!pip install -q tensorflow kagglehub

In [None]:
# Silencia mensagens do TF
%env TF_CPP_MIN_LOG_LEVEL=3

In [None]:
# Imports
import tensorflow as tf
from tensorflow.keras import layers, models
import matplotlib.pyplot as plt
import numpy as np
import seaborn as sns
from PIL import Image
import kagglehub
import os

In [None]:
# Baixa o dataset do kaggle
path = kagglehub.dataset_download("tarunparuchur/pneumonia-classification-from-chest-x-rays")
print("Path to dataset files:", path)

In [None]:
# Caminhos principais
train_dir = os.path.join(path, "chest_xray/train")
val_dir   = os.path.join(path, "chest_xray/val")
test_dir  = os.path.join(path, "chest_xray/test")

In [None]:
# Carregar as imagens com ImageDataGenerator
from tensorflow.keras.preprocessing.image import ImageDataGenerator

# Normalização + aumento de dados (data augmentation)
train_datagen = ImageDataGenerator(rescale=1./255,
                                   rotation_range=15,
                                   zoom_range=0.1,
                                   width_shift_range=0.1,
                                   height_shift_range=0.1,
                                   horizontal_flip=True)

val_datagen = ImageDataGenerator(rescale=1./255)
test_datagen = ImageDataGenerator(rescale=1./255)

In [None]:
# Geradores
batch_size = 32
img_size = (150, 150)

train_generator = train_datagen.flow_from_directory(
    train_dir,
    target_size=img_size,
    batch_size=batch_size,
    class_mode='binary'
)

val_generator = val_datagen.flow_from_directory(
    val_dir,
    target_size=img_size,
    batch_size=batch_size,
    class_mode='binary'
)

test_generator = test_datagen.flow_from_directory(
    test_dir,
    target_size=img_size,
    batch_size=batch_size,
    class_mode='binary',
    shuffle=False
)

In [None]:
# Converte os generators do Kaggle em arrays
def generator_to_numpy(generator):
    x, y = [], []
    for i in range(len(generator)):
        imgs, labels = generator[i]
        x.append(imgs)
        y.append(labels)
    return np.concatenate(x), np.concatenate(y)

x_train_kaggle, y_train_kaggle = generator_to_numpy(train_generator)
x_val_kaggle, y_val_kaggle     = generator_to_numpy(val_generator)
x_test_kaggle, y_test_kaggle   = generator_to_numpy(test_generator)

In [None]:
!pip install datasets

In [None]:
# Baixar o dataset do hugging face
from datasets import load_dataset

ds = load_dataset("hf-vision/chest-xray-pneumonia")

def preprocess(example):
    image = example["image"].convert("RGB").resize(img_size)
    image = np.array(image) / 255.0
    return image, example["label"]

x_train_hf, y_train_hf = zip(*[preprocess(ex) for ex in ds["train"]])
x_val_hf, y_val_hf     = zip(*[preprocess(ex) for ex in ds["validation"]])
x_test_hf, y_test_hf   = zip(*[preprocess(ex) for ex in ds["test"]])

x_train_hf, y_train_hf = np.array(x_train_hf), np.array(y_train_hf)
x_val_hf, y_val_hf     = np.array(x_val_hf), np.array(y_val_hf)
x_test_hf, y_test_hf   = np.array(x_test_hf), np.array(y_test_hf)

In [None]:
# Função para limitar por classe
def limitar_por_classe(x, y, limite_por_classe=1000):
    """Recebe arrays X, y e retorna no máximo 'limite_por_classe' exemplos por classe"""
    x_final, y_final = [], []

    for classe in np.unique(y):
        idxs = np.where(y == classe)[0]
        np.random.shuffle(idxs)
        idxs = idxs[:limite_por_classe]

        x_final.append(x[idxs])
        y_final.append(y[idxs])

    return np.concatenate(x_final), np.concatenate(y_final)

# ====== Aplicar limites ======

# Treino
x_train_kaggle, y_train_kaggle = limitar_por_classe(x_train_kaggle, y_train_kaggle, limite_por_classe=1000)
x_train_hf,     y_train_hf     = limitar_por_classe(x_train_hf,     y_train_hf,     limite_por_classe=1000)

# Validação
x_val_kaggle, y_val_kaggle = limitar_por_classe(x_val_kaggle, y_val_kaggle, limite_por_classe=500)
x_val_hf,     y_val_hf     = limitar_por_classe(x_val_hf,     y_val_hf,     limite_por_classe=500)

# Teste
x_test_kaggle, y_test_kaggle = limitar_por_classe(x_test_kaggle, y_test_kaggle, limite_por_classe=500)
x_test_hf,     y_test_hf     = limitar_por_classe(x_test_hf,     y_test_hf,     limite_por_classe=500)


In [None]:
#Concatenar os dois datasets


x_train = np.concatenate([x_train_kaggle, x_train_hf])
y_train = np.concatenate([y_train_kaggle, y_train_hf])

x_val   = np.concatenate([x_val_kaggle, x_val_hf])
y_val   = np.concatenate([y_val_kaggle, y_val_hf])

x_test  = np.concatenate([x_test_kaggle, x_test_hf])
y_test  = np.concatenate([y_test_kaggle, y_test_hf])

In [None]:
# Classes do problema
nomes_classes = list(train_generator.class_indices.keys())
print("Classes:", nomes_classes)

In [None]:
# Visualização de algumas imagens
def visualiza_imagens(generator):
    images, labels = next(generator)
    plt.figure(figsize=(10,10))
    for i in range(25):
        plt.subplot(5,5,i+1)
        plt.imshow(images[i])
        plt.title(nomes_classes[int(labels[i])])
        plt.axis("off")
    plt.show()

visualiza_imagens(train_generator)

In [None]:
# Importa base já treinada
base_model = tf.keras.applications.MobileNetV2(
    input_shape=(150,150,3),
    include_top=False,      # não usa as camadas finais originais
    weights='imagenet'      # carrega pesos treinados no ImageNet
)

# Congela a base (não treina de novo os pesos dela)
base_model.trainable = False

# Cria o modelo final
modelo_lia = models.Sequential([
    base_model,
    layers.GlobalAveragePooling2D(),   # "achata" mantendo info espacial
    layers.Dense(128, activation='relu'),
    layers.Dropout(0.5),
    layers.Dense(1, activation='sigmoid')
])


In [None]:
# Compilação
modelo_lia.compile(optimizer='adam',
                   loss='binary_crossentropy',
                   metrics=['accuracy'])

In [None]:
train_ds = tf.data.Dataset.from_tensor_slices((x_train, y_train)).shuffle(2000).batch(batch_size)
val_ds   = tf.data.Dataset.from_tensor_slices((x_val, y_val)).batch(batch_size)
test_ds  = tf.data.Dataset.from_tensor_slices((x_test, y_test)).batch(batch_size)

from tensorflow.keras.callbacks import EarlyStopping

early_stop = EarlyStopping(monitor='val_loss', patience=3, restore_best_weights=True)

# Treinamento com mais épocas
history = modelo_lia.fit(
    train_ds,
    epochs=5,
    validation_data=val_ds,
    callbacks=[early_stop]
)

In [None]:
# Avaliação
erro_teste, acc_teste = modelo_lia.evaluate(test_ds, verbose=2)
print("\nAcurácia com dados de Teste (Kaggle + HF):", acc_teste)

In [None]:
# Previsões no conjunto de teste combinado (Kaggle + HF)
y_pred = modelo_lia.predict(test_ds)
y_pred_classes = (y_pred > 0.8).astype("int32").flatten()
y_true = y_test   # já vem concatenado


In [None]:
# Matriz de confusão
from sklearn.metrics import confusion_matrix
cm = confusion_matrix(y_true, y_pred_classes)

plt.figure(figsize=(6,6))
sns.heatmap(cm, annot=True, fmt='d', cmap='Blues',
            xticklabels=nomes_classes,
            yticklabels=nomes_classes)
plt.title('Matriz de Confusão - Pneumonia')
plt.xlabel('Previsto')
plt.ylabel('Real')
plt.show()

In [None]:
# Testando com uma imagem nova
nova_imagem = Image.open("/content/teste2.jpg")  # substitua pelo caminho da imagem
nova_imagem = nova_imagem.resize(img_size)

plt.imshow(nova_imagem)
plt.axis("off")
plt.show()

In [None]:
# Prepara imagem para predição
nova_imagem_array = np.array(nova_imagem) / 255.0
nova_imagem_array = np.expand_dims(nova_imagem_array, axis=0)

# Faz a predição
previsao = modelo_lia.predict(nova_imagem_array)
probabilidade = previsao[0][0]

# Define a classe e a confiança
if probabilidade > 0.5:
    classe_prevista = nomes_classes[1]  # pneumonia
    confianca = probabilidade * 100
else:
    classe_prevista = nomes_classes[0]  # normal
    confianca = (1 - probabilidade) * 100

print(f"A nova imagem foi classificada como: {classe_prevista} com {confianca:.2f}% de confiança")