In [1]:
import kagglehub

# Download latest version
path = kagglehub.dataset_download("alaminbhuyan/breast-histopathology-images")

print("Path to dataset files:", path)

Downloading from https://www.kaggle.com/api/v1/datasets/download/alaminbhuyan/breast-histopathology-images?dataset_version_number=1...


100%|██████████| 929M/929M [00:21<00:00, 44.3MB/s]

Extracting model files...





Path to dataset files: /root/.cache/kagglehub/datasets/alaminbhuyan/breast-histopathology-images/versions/1


In [2]:
import tensorflow as tf
print("Versión de TensorFlow:", tf.__version__)
print("GPU Disponible:", tf.config.list_physical_devices('GPU'))

Versión de TensorFlow: 2.17.0
GPU Disponible: []


In [3]:
!pip install keras



In [4]:
import cv2
import os
import pandas as pd
from tensorflow.keras.preprocessing.image import ImageDataGenerator
from tensorflow.keras.preprocessing import image
from tensorflow.keras.utils import to_categorical
import matplotlib.pyplot as plt
from sklearn.metrics import classification_report, confusion_matrix
from sklearn.model_selection import train_test_split
import numpy as np
import glob

import random

In [5]:
# Define los directorios de los datos
non_cancer_image_dir = os.path.join(path, 'IDC_regular_ps50_idx5/negative_IDC')
cancer_image_dir = os.path.join(path, "IDC_regular_ps50_idx5/positive_IDC")

# Cargar imágenes y etiquetas
non_cancer_images = glob.glob(f'{non_cancer_image_dir}/**/*.png', recursive=True)
cancer_images = glob.glob(f'{cancer_image_dir}/**/*.png', recursive=True)

non_temp = []
can_temp = []

# Añadir datos de las imágenes sin cáncer
non_temp.extend([(img_path, 0) for img_path in non_cancer_images])  # 0 indica no cáncer

# Añadir datos de las imágenes con cáncer
can_temp.extend([(img_path, 1) for img_path in cancer_images])  # 1 indica cáncer

# Crear el DataFrame
df_non_cancer = pd.DataFrame(non_temp, columns=['image_path', 'label'])
df_cancer = pd.DataFrame(can_temp, columns=['image_path','label'])

In [6]:
non_cancer_images = df_non_cancer['image_path'].tolist()
cancer_images = df_cancer['image_path'].tolist()



some_non_img = random.sample(non_cancer_images, len(non_cancer_images))  # Aleatoriza todas las imágenes de no cáncer
some_can_img = random.sample(cancer_images, len(cancer_images))  # Aleatoriza todas las imágenes de cáncer

non_img_arr = []  # Array para imágenes no cáncer
can_img_arr = []  # Array para imágenes con cáncer

for img in some_non_img:
    n_img = cv2.imread(img, cv2.IMREAD_COLOR)
    n_img_size = cv2.resize(n_img, (50, 50), interpolation=cv2.INTER_LINEAR)
    non_img_arr.append([n_img_size, 0])

for img in some_can_img:

    c_img = cv2.imread(img, cv2.IMREAD_COLOR)
    c_img_size = cv2.resize(c_img, (50, 50), interpolation=cv2.INTER_LINEAR)
    can_img_arr.append([c_img_size, 1])

In [7]:
# 6. Concatenar las imágenes y etiquetas de ambas clases
breast_img_arr = non_img_arr + can_img_arr

# 7. Aleatorizar el orden de las imágenes
random.shuffle(breast_img_arr)

# 8. Separar las características (imágenes) y las etiquetas
X = [feature for feature, label in breast_img_arr]
y = [label for feature, label in breast_img_arr]

# Convertir a arrays de NumPy
X = np.array(X)
y = np.array(y)

In [8]:
X_train, X_test, y_train, y_test = train_test_split(X, y, test_size=0.20, random_state=42)

# 10. Convertir las etiquetas a formato one-hot encoding
y_train = to_categorical(y_train, 2)
y_test = to_categorical(y_test, 2)

datagen = ImageDataGenerator(
    rotation_range=20,
    width_shift_range=0.2,
    height_shift_range=0.2,
    shear_range=0.2,
    zoom_range=0.2,
    horizontal_flip=True,
    fill_mode='nearest'
)

train_datagen = datagen.flow(X_train, y_train, batch_size=32)

# Para el conjunto de prueba, sin aumentos
test_datagen = datagen.flow(X_test, y_test, batch_size=32, shuffle=False)

In [9]:
early_stopping = tf.keras.callbacks.EarlyStopping(
    monitor='val_loss',
    patience=5,
    min_delta=1e-7,
    restore_best_weights=True
)

plateau = tf.keras.callbacks.ReduceLROnPlateau(
    monitor='val_loss',
    factor=0.2,
    patience=2,
    min_delta=1e-7,
    cooldown=0,
    verbose=1
)

tf.random.set_seed(42)

In [10]:
model = tf.keras.Sequential([

    tf.keras.layers.Input(shape=(50, 50, 3)),
    tf.keras.layers.Conv2D(32, (3, 3), padding='same', activation='relu'),
    tf.keras.layers.BatchNormalization(),
    tf.keras.layers.MaxPooling2D(strides=2),

    tf.keras.layers.Conv2D(64, (3, 3), padding='same', activation='relu'),
    tf.keras.layers.BatchNormalization(),
    tf.keras.layers.MaxPooling2D((3, 3), strides=2),

    tf.keras.layers.Conv2D(128, (3, 3), padding='same', activation='relu'),
    tf.keras.layers.BatchNormalization(),
    tf.keras.layers.MaxPooling2D((3, 3), strides=2),

    tf.keras.layers.Conv2D(128, (3, 3), padding='same', activation='relu'),
    tf.keras.layers.BatchNormalization(),
    tf.keras.layers.MaxPooling2D((3, 3), strides=2),

    tf.keras.layers.Flatten(),
    tf.keras.layers.Dense(128, activation='relu'),
    tf.keras.layers.Dropout(0.3),
    tf.keras.layers.Dense(2, activation='softmax')
])

In [11]:
model.summary()

model.compile(optimizer=tf.keras.optimizers.Adam(learning_rate=0.001),
              loss='categorical_crossentropy',
              metrics=['accuracy','precision'])

In [None]:
history = model.fit(
    train_datagen,
    epochs=20,
    validation_data=test_datagen,
    callbacks=[early_stopping, plateau]
)

Epoch 1/20


  self._warn_if_super_not_called()


[1m 260/3940[0m [32m━[0m[37m━━━━━━━━━━━━━━━━━━━[0m [1m20:16[0m 331ms/step - accuracy: 0.7532 - loss: 0.7010 - precision: 0.7532

In [None]:
model.save('model.keras')

In [None]:
tf.saved_model.save(model, 'salida')

In [None]:
!pip install tensorflowjs


In [None]:
!mkdir output_model

In [None]:
!tensorflowjs_converter --input_format keras model.h5 output_model