# Libraries

In [None]:
!pip install kaggle
!kaggle datasets download -d navoneel/brain-mri-images-for-brain-tumor-detection -p /tmp
!pip install keras
!pip install -q -U keras-tuner
!pip install mplcursors

## Python Libraries

In [None]:
import os
import shutil
import string
import itertools
import zipfile

## Manipulation data & matrix

In [None]:
import numpy as np
from tqdm import tqdm

## Visualization

In [None]:
import matplotlib.pyplot as plt
import matplotlib.image as mping
from plotly.offline import init_notebook_mode, iplot
import plotly.graph_objs as go
from plotly import tools
%matplotlib inline

## Image process

In [None]:
import cv2
import imutils

## TensorFlow & Keras

In [None]:
import tensorflow as tf
from tensorflow.keras import layers, regularizers
from tensorflow.keras.preprocessing.image import ImageDataGenerator
from tensorflow.keras.optimizers import Adam, RMSprop
from tensorflow.keras.callbacks import Callback, ModelCheckpoint, EarlyStopping
from keras.applications.vgg16 import VGG16, preprocess_input
from keras.models import Model, Sequential
import tensorflow_hub as hub
from tensorflow.keras.layers import Input, Flatten, Dropout, Dense

## Hiperparams

In [None]:
import kerastuner as kt

## Scikit-Learn

In [None]:
from sklearn.preprocessing import LabelBinarizer
from sklearn.model_selection import train_test_split
from sklearn.metrics import accuracy_score, confusion_matrix


# Funtions

### plot Image function

In [None]:
def plotImage(images_arr):
  fig, axes = plt.subplots(1, 5, figsize=(20, 20))
  axes = axes.flatten()
  for img, ax in zip(images_arr, axes):
    ax.imshow(img[:,:,0])
    ax.axis('off')
  plt.tight_layout()
  plt.show

In [None]:
def visualizacion_resultados(history):
  epochs = [i for i in range(len(history.history["accuracy"]))]
  fig, ax = plt.subplots(1,2)
  train_acc = history.history["accuracy"]
  train_loss = history.history["loss"]
  val_acc = history.history["val_accuracy"]
  val_loss = history.history["val_loss"]
  fig.set_size_inches(16,9)

  ax[0].plot(epochs, train_acc, "go-",label = "Entrenamiento accuracy")
  ax[0].plot(epochs, val_acc, "ro-",label = "Validación accuracy")
  ax[0].set_title("Entrenamiento y validación accuracy")
  ax[0].legend()
  ax[0].set_xlabel("Epochs")
  ax[0].set_ylabel("Accuracy")

  ax[1].plot(epochs, train_loss, "go-",label = "Entrenamiento loss")
  ax[1].plot(epochs, val_loss, "ro-",label = "Validación loss")
  ax[1].set_title("Entrenamiento y validación loss")
  ax[1].legend()
  ax[1].set_xlabel("Epochs")
  ax[1].set_ylabel("Loss")

  plt.show()

In [None]:
def plot_confusion_matrix(cm, classes,
                          normalize=False,
                          title='Confusion matrix',
                          cmap=plt.cm.Blues):
    """
    This function prints and plots the confusion matrix.
    Normalization can be applied by setting `normalize=True`.
    """
    plt.figure(figsize = (6,6))
    plt.imshow(cm, interpolation='nearest', cmap=cmap)
    plt.title(title)
    plt.colorbar()
    tick_marks = np.arange(len(classes))
    plt.xticks(tick_marks, classes, rotation=90)
    plt.yticks(tick_marks, classes)
    if normalize:
        cm = cm.astype('float') / cm.sum(axis=1)[:, np.newaxis]

    thresh = cm.max() / 2.
    cm = np.round(cm,2)
    for i, j in itertools.product(range(cm.shape[0]), range(cm.shape[1])):
        plt.text(j, i, cm[i, j],
                 horizontalalignment="center",
                 color="white" if cm[i, j] > thresh else "black")
    plt.tight_layout()
    plt.ylabel('True label')
    plt.xlabel('Predicted label')
    plt.show()

In [None]:
def plot_samples(X, y, labels_dict, n=50):
    """
    Creates a gridplot for desired number of images (n) from the specified set
    """
    for index in range(len(labels_dict)):
        imgs = X[np.argwhere(y == index)][:n]
        j = 10
        i = int(n/j)

        plt.figure(figsize=(15,6))
        c = 1
        for img in imgs:
            plt.subplot(i,j,c)
            plt.imshow(img[0])

            plt.xticks([])
            plt.yticks([])
            c += 1
        plt.suptitle('Tumor: {}'.format(labels_dict[index]))
        plt.show()

In [None]:
def model_constructor(hp):
  model = tf.keras.models.Sequential()
  model.add(tf.keras.layers.Conv2D(75, (3,3), activation = 'relu', input_shape = (28,28,1)))
  model.add(tf.keras.layers.MaxPooling2D((2,2)))
  model.add(tf.keras.layers.Flatten())

  hp_units = hp.Int('units', min_value = 32, max_value = 512, step = 32)
  model.add(tf.keras.layers.Dense(units = hp_units, activation = 'relu', kernel_regularizer= regularizers.l2(1e-5)))
  model.add(tf.keras.layers.Dropout(0.2))
  model.add(tf.keras.layers.Dense(units = hp_units, activation = 'relu', kernel_regularizer= regularizers.l2(1e-5)))
  model.add(tf.keras.layers.Dropout(0.2))
  model.add(tf.keras.layers.Dense(2, activation = 'softmax'))

  hp_learning_rate = hp.Choice('learning_rate', values = [1e-2, 1e-3, 1e-4])

  model.compile(optimizer = tf.keras.optimizers.Adam(learning_rate = hp_learning_rate),
                loss = 'categorical_crossentropy',
                metrics = ['accuracy'])
  return model

In [None]:
def plot_confusion_matrix(cm, classes, normalize=False, title='Confusion matrix', cmap=plt.cm.Blues):
    if normalize:
        cm = cm.astype('float') / cm.sum(axis=1)[:, np.newaxis]
        print("Matriz de confusión normalizada")
    else:
        print('Matriz de confusión sin normalizar')

    plt.figure(figsize=(8, 6))
    plt.imshow(cm, interpolation='nearest', cmap=cmap)
    plt.title(title)
    plt.colorbar()
    tick_marks = np.arange(len(classes))
    plt.xticks(tick_marks, classes, rotation=45)
    plt.yticks(tick_marks, classes)

    fmt = '.2f' if normalize else 'd'
    thresh = cm.max() / 2.
    for i, j in itertools.product(range(cm.shape[0]), range(cm.shape[1])):
        plt.text(j, i, format(cm[i, j], fmt),
                 horizontalalignment="center",

                 color="white" if cm[i, j] > thresh else "black")

    plt.ylabel('True label')
    plt.xlabel('Predicted label')
    plt.tight_layout()
    plt.show()

### Process function

In [None]:
def load_data(dir_path, img_size=(100,100)):
    """
    Load resized images as np.arrays to workspace
    """
    X = []
    y = []
    i = 0
    labels = dict()
    for path in tqdm(sorted(os.listdir(dir_path))):
        if not path.startswith('.'):
            labels[i] = path
            for file in os.listdir(dir_path + path):
                if not file.startswith('.'):
                    img = cv2.imread(dir_path + path + '/' + file)
                    img = cv2.resize(img, img_size)
                    X.append(img)
                    y.append(i)
            i += 1
    X = np.array(X)
    y = np.array(y)
    print(f'{len(X)} images loaded from {dir_path} directory.')
    return X, y, labels

In [None]:
def preprocess_imgs(set_name, img_size):
    """
    Resize and apply VGG-15 preprocessing
    """
    set_new = []
    for img in set_name:
        img = cv2.resize(
            img,
            dsize=img_size,
            interpolation=cv2.INTER_CUBIC
        )
        set_new.append(preprocess_input(img))
    return np.array(set_new)

In [None]:
import cv2
import numpy as np
import imutils

def crop_imgs(set_name, add_pixels_value=0):
    """
    Finds the largest contour in each image,
    finds its extreme points, and crops the image.
    """
    set_new = []
    for img in set_name:
        gray = cv2.cvtColor(img, cv2.COLOR_RGB2GRAY)
        gray = cv2.GaussianBlur(gray, (5, 5), 0)

        # threshold the image, then perform a series of erosions +
        # dilations to remove any small regions of noise
        thresh = cv2.threshold(gray, 45, 255, cv2.THRESH_BINARY)[1]
        thresh = cv2.erode(thresh, None, iterations=2)
        thresh = cv2.dilate(thresh, None, iterations=2)

        # find contours in thresholded image, then grab the largest one
        cnts = cv2.findContours(thresh.copy(), cv2.RETR_EXTERNAL, cv2.CHAIN_APPROX_SIMPLE)
        cnts = imutils.grab_contours(cnts)

        # Handle case where no contours are found
        if len(cnts) == 0:
            # Append original image if no contours are found
            set_new.append(img)
            continue

        c = max(cnts, key=cv2.contourArea)

        # find the extreme points
        extLeft = tuple(c[c[:, :, 0].argmin()][0])
        extRight = tuple(c[c[:, :, 0].argmax()][0])
        extTop = tuple(c[c[:, :, 1].argmin()][0])
        extBot = tuple(c[c[:, :, 1].argmax()][0])

        # Ensure coordinates are within image boundaries
        extLeft = (max(0, extLeft[0] - add_pixels_value), extLeft[1])
        extRight = (min(img.shape[1], extRight[0] + add_pixels_value), extRight[1])
        extTop = (extTop[0], max(0, extTop[1] - add_pixels_value))
        extBot = (extBot[0], min(img.shape[0], extBot[1] + add_pixels_value))

        # crop
        new_img = img[extTop[1]:extBot[1], extLeft[0]:extRight[0]].copy()

        # Resize the image to a consistent size if necessary
        # new_img = cv2.resize(new_img, (desired_width, desired_height))

        set_new.append(new_img)

    return np.array(set_new, dtype=object) # Use dtype=object to handle variable shapes

### training and save function

In [None]:
class TrainingCallback(Callback):
  def on_epoch_end(self, epoch, logs = {}):
    if logs.get('accuracy') > 0.95:
      print("El modelo llego al 95% de accuracy, STOP")
      self.model.stop_training = True

In [None]:
def save_new_images(x_set, y_set, folder_name):
    i = 0
    for (img, imclass) in zip(x_set, y_set):
        if imclass == 0:
            cv2.imwrite(folder_name+'NO/'+str(i)+'.jpg', img)
        else:
            cv2.imwrite(folder_name+'YES/'+str(i)+'.jpg', img)
        i += 1

# Data

In [None]:
# removed extra spaces at the beginning of the line
with zipfile.ZipFile('/tmp/brain-mri-images-for-brain-tumor-detection.zip', 'r') as zip_ref:
    zip_ref.extractall('/tmp/brain-tumor')

## Create and organice folders

In [None]:
!mkdir -p brain_tumor_dataset/TRAIN/YES brain_tumor_dataset/TRAIN/NO \
brain_tumor_dataset/TEST/YES brain_tumor_dataset/TEST/NO \
brain_tumor_dataset/VAL/YES brain_tumor_dataset/VAL/NO

In [None]:
init_notebook_mode(connected=True)
RANDOM_SEED = 123

In [None]:
IMG_PATH = '/tmp/brain-tumor/brain_tumor_dataset/'
# split the data by train/val/test
for CLASS in os.listdir(IMG_PATH):
    if not CLASS.startswith('.'):
        IMG_NUM = len(os.listdir(IMG_PATH + CLASS))
        for (n, FILE_NAME) in enumerate(os.listdir(IMG_PATH + CLASS)):
            img = IMG_PATH + CLASS + '/' + FILE_NAME
            if n < 5:
                # Create the directory if it does not exist
                os.makedirs('/tmp/brain-tumor/TEST/' + CLASS.upper(), exist_ok=True)
                shutil.copy(img, '/tmp/brain-tumor/TEST/' + CLASS.upper() + '/' + FILE_NAME)
            elif n < 0.8*IMG_NUM:
                # Create the directory if it does not exist
                os.makedirs('/tmp/brain-tumor/TRAIN/' + CLASS.upper(), exist_ok=True)
                shutil.copy(img, '/tmp/brain-tumor/TRAIN/'+ CLASS.upper() + '/' + FILE_NAME)
            else:
                # Create the directory if it does not exist
                os.makedirs('/tmp/brain-tumor/VAL/' + CLASS.upper(), exist_ok=True)
                shutil.copy(img, '/tmp/brain-tumor/VAL/'+ CLASS.upper() + '/' + FILE_NAME)

In [None]:
TRAIN_DIR = '/tmp/brain-tumor/TRAIN/'
TEST_DIR = '/tmp/brain-tumor/TEST/'
VAL_DIR = '/tmp/brain-tumor/VAL/'

IMG_SIZE = (244,244)

In [None]:
import os
print(len(os.listdir(VAL_DIR + '/NO')))
print(len(os.listdir(VAL_DIR + '/YES')))


In [None]:
X_train, y_train, labels = load_data(TRAIN_DIR, IMG_SIZE)
x_test, y_test, _ = load_data(TEST_DIR, IMG_SIZE)
x_val, y_val, _ = load_data(VAL_DIR, IMG_SIZE)

In [None]:
train_datagen = ImageDataGenerator(rescale=1/255)
test_datagen = ImageDataGenerator(rescale=1/255)
val_datagen = ImageDataGenerator(rescale=1/255)

In [None]:
train_generator = train_datagen.flow_from_directory(
        TRAIN_DIR,
        target_size=IMG_SIZE,
        batch_size=32,
        class_mode='categorical',
        color_mode='grayscale',
        subset= 'training')

test_generator = test_datagen.flow_from_directory(
        TEST_DIR,
        target_size=IMG_SIZE,
        batch_size=32,
        class_mode='categorical',
        color_mode='grayscale'
        )

validation_generator = val_datagen.flow_from_directory(
        VAL_DIR,
        target_size=IMG_SIZE,
        batch_size=32,
        class_mode='categorical',
        color_mode='grayscale'
        )

In [None]:
sample_trining_images, _ = next(train_generator)
plotImage(sample_trining_images[:5])

In [None]:
callback_early = tf.keras.callbacks.EarlyStopping(monitor = 'loss', patience = 3, mode = 'auto')

# **Project**

In [None]:
X_train, y_train, labels = load_data(TRAIN_DIR, IMG_SIZE)
x_test, y_test, _ = load_data(TEST_DIR, IMG_SIZE)
x_val, y_val, _ = load_data(VAL_DIR, IMG_SIZE)

In [None]:
# Inicializar los contadores de clases
y = {0: [], 1: []}

# Contar cuántos ejemplos de cada clase hay en cada conjunto (train, val, test)
for set_name in (y_train, y_val, y_test):
    y[0].append(np.sum(set_name == 0))  # Contar clase 0 (No)
    y[1].append(np.sum(set_name == 1))  # Contar clase 1 (Yes)

# Etiquetas para los conjuntos
sets = ['Train Set', 'Validation Set', 'Test Set']

# Crear la figura y los ejes
fig, ax = plt.subplots()

# Definir el ancho de las barras
bar_width = 0.35

# Posición de las barras en el eje x
index = np.arange(len(sets))

# Dibujar las barras
bars1 = ax.bar(index, y[0], bar_width, label='No', color='#33cc33', alpha=0.7)
bars2 = ax.bar(index + bar_width, y[1], bar_width, label='Yes', color='#ff3300', alpha=0.7)

# Añadir título y etiquetas
ax.set_title('Count of classes in each set')
ax.set_xlabel('Set')
ax.set_ylabel('Count')
ax.set_xticks(index + bar_width / 2)
ax.set_xticklabels(sets)

# Añadir la leyenda
ax.legend()

# Mostrar la figura
plt.show()



In [None]:
plot_samples(X_train, y_train, labels, 30)

In [None]:
# Lista de posibles nombres de archivos
file_paths = [
    '/tmp/brain-tumor/TRAIN/YES/Y106.jpg',
    '/tmp/brain-tumor/TRAIN/YES/Y107.jpg',
    '/tmp/brain-tumor/TRAIN/YES/Y108.jpg'
]

img = None

# Intentar leer la imagen
for file_path in file_paths:
    try:
        img = cv2.imread(file_path)
        # Verificar si la imagen se cargó correctamente
        if img is not None:
            print(f"Imagen cargada exitosamente: {file_path}")
            break  # Salir del bucle si se encuentra una imagen válida
    except Exception as e:
        print(f"Error al intentar cargar {file_path}: {e}")

# Verificar si ninguna imagen fue cargada
if img is None:
    print("No se pudo cargar ninguna imagen de los archivos especificados.")


img = cv2.resize(
            img,
            dsize=IMG_SIZE,
            interpolation=cv2.INTER_CUBIC
        )
gray = cv2.cvtColor(img, cv2.COLOR_RGB2GRAY)
gray = cv2.GaussianBlur(gray, (5, 5), 0)

# threshold the image, then perform a series of erosions +
# dilations to remove any small regions of noise
thresh = cv2.threshold(gray, 45, 255, cv2.THRESH_BINARY)[1]
thresh = cv2.erode(thresh, None, iterations=2)
thresh = cv2.dilate(thresh, None, iterations=2)

# find contours in thresholded image, then grab the largest one
cnts = cv2.findContours(thresh.copy(), cv2.RETR_EXTERNAL, cv2.CHAIN_APPROX_SIMPLE)
cnts = imutils.grab_contours(cnts)
c = max(cnts, key=cv2.contourArea)

# find the extreme points
extLeft = tuple(c[c[:, :, 0].argmin()][0])
extRight = tuple(c[c[:, :, 0].argmax()][0])
extTop = tuple(c[c[:, :, 1].argmin()][0])
extBot = tuple(c[c[:, :, 1].argmax()][0])

# add contour on the image
img_cnt = cv2.drawContours(img.copy(), [c], -1, (0, 255, 255), 4)

# add extreme points
img_pnt = cv2.circle(img_cnt.copy(), extLeft, 8, (0, 0, 255), -1)
img_pnt = cv2.circle(img_pnt, extRight, 8, (0, 255, 0), -1)
img_pnt = cv2.circle(img_pnt, extTop, 8, (255, 0, 0), -1)
img_pnt = cv2.circle(img_pnt, extBot, 8, (255, 255, 0), -1)

# crop
ADD_PIXELS = 0
new_img = img[extTop[1]-ADD_PIXELS:extBot[1]+ADD_PIXELS, extLeft[0]-ADD_PIXELS:extRight[0]+ADD_PIXELS].copy()

In [None]:
plt.figure(figsize=(15,6))
plt.subplot(141)
plt.imshow(img)
plt.xticks([])
plt.yticks([])
plt.title('Step 1. Get the original image')
plt.subplot(142)
plt.imshow(img_cnt)
plt.xticks([])
plt.yticks([])
plt.title('Step 2. Find the biggest contour')
plt.subplot(143)
plt.imshow(img_pnt)
plt.xticks([])
plt.yticks([])
plt.title('Step 3. Find the extreme points')
plt.subplot(144)
plt.imshow(new_img)
plt.xticks([])
plt.yticks([])
plt.title('Step 4. Crop the image')
plt.show()

In [None]:
# apply this for each set
X_train_crop = crop_imgs(set_name=X_train)
X_val_crop = crop_imgs(set_name=x_val)
X_test_crop = crop_imgs(set_name=x_test)

In [None]:
plot_samples(X_train_crop, y_train, labels, 30)

In [None]:
X_train_prep = preprocess_imgs(set_name=X_train_crop, img_size=IMG_SIZE)
X_test_prep = preprocess_imgs(set_name=X_test_crop, img_size=IMG_SIZE)
X_val_prep = preprocess_imgs(set_name=X_val_crop, img_size=IMG_SIZE)

In [None]:
!mkdir -p /tmp/brain-tumor/TRAIN_CROP/YES /tmp/brain-tumor/TRAIN_CROP/NO \
/tmp/brain-tumor/TEST_CROP/YES /tmp/brain-tumor/TEST_CROP/NO \
/tmp/brain-tumor/VAL_CROP/YES /tmp/brain-tumor/VAL_CROP/NO

save_new_images(X_train_crop, y_train, folder_name='/tmp/brain-tumor/TRAIN_CROP/')
save_new_images(X_val_crop, y_val, folder_name='/tmp/brain-tumor/VAL_CROP/')
save_new_images(X_test_crop, y_test, folder_name='/tmp/brain-tumor/TEST_CROP/')

In [None]:
#plot_samples(X_train_prep, y_train, labels, 30)

In [None]:
# set the paramters we want to change randomly
demo_datagen = ImageDataGenerator(
    rotation_range=15,
    width_shift_range=0.05,
    height_shift_range=0.05,
    rescale=1./255,
    shear_range=0.05,
    brightness_range=[0.1, 1.5],
    horizontal_flip=True,
    vertical_flip=True
)

In [None]:
os.mkdir('preview')
x = X_train_crop[0]
x = x.reshape((1,) + x.shape)

i = 0
for batch in demo_datagen.flow(x, batch_size=1, save_to_dir='preview', save_prefix='aug_img', save_format='jpg'):
    i += 1
    if i > 20:
        break

In [None]:
plt.imshow(X_train_crop[0])
plt.xticks([])
plt.yticks([])
plt.title('Original Image')
plt.show()

plt.figure(figsize=(15,6))
i = 1
for img in os.listdir('preview/'):
    img = cv2.imread('preview/' + img)
    img = cv2.cvtColor(img, cv2.COLOR_BGR2RGB)
    plt.subplot(3,7,i)
    plt.imshow(img)
    plt.xticks([])
    plt.yticks([])
    i += 1
    if i > 3*7:
        break
plt.suptitle('Augemented Images')
plt.show()

In [None]:
!rm -rf preview/

In [None]:
TRAIN_DIR = '/tmp/brain-tumor/TRAIN_CROP/'
VAL_DIR = '/tmp/brain-tumor/VAL_CROP/'

train_datagen = ImageDataGenerator(
    rotation_range=30,
    width_shift_range=0.2,
    height_shift_range=0.2,
    shear_range=0.2,
    zoom_range=0.2,
    brightness_range=[0.5, 1.5],
    horizontal_flip=True,
    vertical_flip=True,
    preprocessing_function=preprocess_input
)

test_datagen = ImageDataGenerator(
    preprocessing_function=preprocess_input
)


train_generator = train_datagen.flow_from_directory(
    TRAIN_DIR,
    color_mode='rgb',
    target_size=IMG_SIZE,
    batch_size=32,
    class_mode='binary',
    seed=RANDOM_SEED
)


validation_generator = test_datagen.flow_from_directory(
    VAL_DIR,
    color_mode='rgb',
    target_size=IMG_SIZE,
    batch_size=16,
    class_mode='binary',
    seed=RANDOM_SEED
)

In [None]:
from tensorflow.keras.layers import Input

NUM_CLASSES = 1
input_tensor = Input(shape=IMG_SIZE + (3,))

# Load the VGG16 model with the specified input tensor
base_model = VGG16(
    weights='imagenet',
    include_top=False,
    input_tensor=input_tensor  # Use input_tensor here
)

base_model.trainable = False

# Construir el resto del modelo
x = base_model.output
x = Flatten()(x)
x = Dropout(0.5)(x)
output = Dense(NUM_CLASSES, activation='sigmoid')(x)

# Create the final model
model = Model(inputs=input_tensor, outputs=output)

model.compile(
    loss='binary_crossentropy',
    optimizer=RMSprop(learning_rate=1e-4),
    metrics=['accuracy']
)

model.summary()

In [None]:
import matplotlib.pyplot as plt

# Obtén un lote de imágenes y etiquetas desde el generador de entrenamiento
batch = next(train_generator)  # Obtén el primer lote de datos
images, labels = batch

# Configura la cuadrícula de visualización
plt.figure(figsize=(10, 10))
for i in range(9):  # Muestra 9 imágenes
    plt.subplot(3, 3, i + 1)
    plt.imshow(images[i].astype("uint8"))  # Convierte a formato uint8 si es necesario
    plt.title(f"Label: {'Tumor' if labels[i] == 1 else 'No Tumor'}")
    plt.axis('off')

plt.show()


In [None]:
import math

steps_per_epoch = 2 * math.ceil(train_generator.samples / train_generator.batch_size)
validation_steps = 2 * math.ceil(validation_generator.samples / validation_generator.batch_size)

print(f'Steps per epoch: {steps_per_epoch}')
print(f'Validation steps: {validation_steps}')

In [None]:
# Prueba a iterar por varios lotes para verificar si el generador funciona bien
for i in range(1):  # Cambia el número según la cantidad de lotes que quieras verificar
    batch = next(train_generator)
    print(f"Lote {i + 1} cargado correctamente.")


In [None]:
EPOCHS = 30
es = EarlyStopping(
    monitor='val_loss',
    mode='min',
    patience=5,
    restore_best_weights=True
)

history = model.fit(
    train_generator,
    steps_per_epoch=50,
    epochs=EPOCHS,
    validation_data=validation_generator,
    # Use calculated validation_steps to prevent data from being exhausted
    validation_steps=validation_steps,
    #callbacks=[es]
)

Epoch 1/30



Your `PyDataset` class should call `super().__init__(**kwargs)` in its constructor. `**kwargs` can include `workers`, `use_multiprocessing`, `max_queue_size`. Do not pass these arguments to `fit()`, as they will be ignored.



[1m 7/50[0m [32m━━[0m[37m━━━━━━━━━━━━━━━━━━[0m [1m16:36[0m 23s/step - accuracy: 0.4653 - loss: 6.6129


Your input ran out of data; interrupting training. Make sure that your dataset or generator can generate at least `steps_per_epoch * epochs` batches. You may need to use the `.repeat()` function when building your dataset.



[1m50/50[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m186s[0m 4s/step - accuracy: 0.5464 - loss: 5.9548 - val_accuracy: 0.6000 - val_loss: 3.0555
Epoch 2/30


In [None]:
# plot model performance
acc = history.history['accuracy']
val_acc = history.history['val_accuracy']
loss = history.history['loss']
val_loss = history.history['val_loss']
epochs_range = range(1, len(history.epoch) + 1)

plt.figure(figsize=(15,5))

plt.subplot(1, 2, 1)
plt.plot(epochs_range, acc, label='Train Set')
plt.plot(epochs_range, val_acc, label='Val Set')
plt.legend(loc="best")
plt.xlabel('Epochs')
plt.ylabel('Accuracy')
plt.title('Model Accuracy')

plt.subplot(1, 2, 2)
plt.plot(epochs_range, loss, label='Train Set')
plt.plot(epochs_range, val_loss, label='Val Set')
plt.legend(loc="best")
plt.xlabel('Epochs')
plt.ylabel('Loss')
plt.title('Model Loss')

plt.tight_layout()
plt.show()

In [None]:

# validate on val set
predictions = model.predict(X_val_prep)
predictions = [1 if x > 0.5 else 0 for x in predictions]

accuracy = accuracy_score(y_val, predictions)
print('Val Accuracy = %.2f' % accuracy)

# Ajustar labels si es un diccionario
labels = {0: 'NO', 1: 'YES'}  # Asegúrate de que tus etiquetas estén definidas correctamente

# Obtener lista de clases
class_names = list(labels.values())

# Generar la matriz de confusión
confusion_mtx = confusion_matrix(y_val, predictions)

# Graficar la matriz de confusión con etiquetas personalizadas
plot_confusion_matrix(confusion_mtx, classes=class_names, normalize=True)

In [None]:
# validate on val set
predictions = model.predict(X_test_prep)
predictions = [1 if x>0.5 else 0 for x in predictions]

accuracy = accuracy_score(y_test, predictions)
print('Val Accuracy = %.2f' % accuracy)

confusion_mtx = confusion_matrix(y_test, predictions)
plot_confusion_matrix(confusion_mtx, classes=class_names, normalize=False)

In [None]:
val_loss, val_accuracy = model.evaluate(validation_generator)
print(f"Validation Accuracy: {val_accuracy:.2f}")
print(f"Validation Loss: {val_loss:.2f}")

In [None]:
# prompt: what more to do?

from ipywidgets import interact, IntSlider

# Assuming you have your X_test_prep and y_test data loaded
# and your model is trained and ready to be used for prediction.

def predict_and_show(image_index):
  """
  Predicts the class of an image and shows it along with the prediction.
  """
  prediction = model.predict(np.expand_dims(X_test_prep[image_index], axis=0))
  predicted_class = 1 if prediction > 0.5 else 0
  true_class = y_test[image_index]

  plt.imshow(X_test_prep[image_index])
  plt.title(f"Prediction: {labels[predicted_class]}, True: {labels[true_class]}")
  plt.show()

# Create a slider widget for selecting an image index
image_slider = IntSlider(min=0, max=len(X_test_prep)-1, step=1, value=0)

# Use interact to link the slider to the predict_and_show function
interact(predict_and_show, image_index=image_slider);

In [None]:
ind_list = np.argwhere((y_test == predictions) == False)[:, -1]
if ind_list.size == 0:
    print('There are no missclassified images.')
else:
    for i in ind_list:
        plt.figure()
        plt.imshow(X_test_crop[i])
        plt.xticks([])
        plt.yticks([])
        plt.title(f'Actual class: {y_val[i]}\nPredicted class: {predictions[i]}')
        plt.show()

In [None]:
model.save('2024-11-13_VGG_model.h5')