# **Red neuronal convolucional para el diagnóstico de nódulos tiroideos según la clasificación EU-TIRADS**

## Por Alejandro Martínez Hernández

### Notebook 3/3

# **Creación de modelos**

PAra la siguiente parte se crearan los siguientes modelos de clasificación simple:
- model1
- model2

## **MobileNetV3**

### **Small Version**

#### **Adam Optimizer**

In [None]:
import numpy as np
import tensorflow as tf
from tensorflow.keras.models import Sequential
from tensorflow.keras.layers import Dense, Dropout, GlobalAveragePooling2D
from tensorflow.keras.preprocessing.image import ImageDataGenerator
from tensorflow.keras.applications import MobileNetV3Small
from sklearn.utils.class_weight import compute_class_weight
from tensorflow.keras.callbacks import EarlyStopping
from tensorflow.keras.optimizers import Adam

# Configurar generadores de imágenes con y sin aumento de datos
def setup_image_generators(base_dir, input_size=(224, 224), batch_size=5):
    """
    Configura generadores de imágenes para entrenamiento y validación.
    
    Args:
    base_dir (str): Directorio base donde están ubicadas las carpetas de imágenes.
    input_size (tuple): Tamaño al cual se redimensionarán las imágenes.
    batch_size (int): Número de imágenes por lote.

    Returns:
    train_generator, validation_generator: Generadores para el entrenamiento y validación.
    """
    train_datagen = ImageDataGenerator(
        rescale=1./255,
        rotation_range=20,
        width_shift_range=0.1,
        height_shift_range=0.1,
        shear_range=5,
        zoom_range=[0.5, 1.5],
        horizontal_flip=True,
        vertical_flip=True,
        fill_mode='constant',
        validation_split=0.2  # Reserva el 20% de los datos para validación
    )

    validation_datagen = ImageDataGenerator(
        rescale=1./255,
        validation_split=0.2
    )

    train_generator = train_datagen.flow_from_directory(
        base_dir,
        target_size=input_size,
        #batch_size=batch_size,
        class_mode='binary',
        subset='training'  # Especifica que este generador es para entrenamiento
    )

    validation_generator = validation_datagen.flow_from_directory(
        base_dir,
        target_size=input_size,
        #batch_size=batch_size,
        class_mode='binary',
        subset='validation'  # Especifica que este generador es para validación
    )

    return train_generator, validation_generator

def create_model(input_shape=(224, 224, 3)):
    """
    Crea un modelo de red neuronal secuencial utilizando MobileNetV3 como base.
    
    Args:
    input_shape (tuple): Dimensiones de las imágenes de entrada.
    num_classes (int): Número de clases para la clasificación. Se espera 2 para binaria.

    Returns:
    model: Modelo compilado de TensorFlow.
    """
    base_model = MobileNetV3Small(include_top=False, weights='imagenet', input_shape=input_shape)

    # Congelar el modelo base para reutilizar las características aprendidas en ImageNet
    base_model.trainable = False

    model = Sequential([
        base_model,
        GlobalAveragePooling2D(),
        Dense(10, activation='relu'),
        Dropout(0.5),
        Dense(1, activation='sigmoid')  # Cambio para clasificación binaria
    ])

    model.compile(optimizer=Adam(learning_rate=0.001), loss='binary_crossentropy', metrics=['accuracy'])
    return model

# Ruta al directorio de imágenes
base_directory = 'db_unal/organized/images/cropped'
train_gen, val_gen = setup_image_generators(base_directory)

# Crear y compilar el modelo
model = create_model()

# Calcular los pesos de clase para manejar datos desbalanceados
weights = compute_class_weight(
    class_weight='balanced',  # Especifica 'balanced' para ajustar automáticamente en base a la frecuencia de clases
    classes=np.unique(train_gen.classes),  # Obtener clases únicas de los datos de entrenamiento
    y=train_gen.classes)  # Etiquetas de clase reales para cada muestra de entrenamiento

# Crear un diccionario mapeando índices de clase a sus respectivos pesos calculados
class_weights = {i: weights[i] for i in range(len(weights))}

# Configurar el callback EarlyStopping
early_stopping = EarlyStopping(
    monitor='val_loss',  # Monitorear la pérdida de validación
    min_delta=0.01,      # Cambio mínimo detectado como una mejora
    patience=50,          # Número de épocas sin mejora después de las cuales el entrenamiento será detenido
    verbose=1,           # Mostrar mensajes de progreso
    mode='min'           # 'min' porque queremos minimizar la pérdida
)

# Entrenar el modelo
history = model.fit(
    train_gen,
    validation_data=val_gen,
    epochs=500,
    class_weight=class_weights, # Ajuste de pesos para cada clase
    callbacks=[early_stopping]
)

# Performance Metrics

import matplotlib.pyplot as plt

# Retrieve accuracy results on training and validation data sets for each training epoch.
acc = history.history['accuracy']  # Training accuracy
val_acc = history.history['val_accuracy']  # Validation accuracy

# Retrieve loss results on training and validation data sets for each training epoch.
loss = history.history['loss']  # Training loss
val_loss = history.history['val_loss']  # Validation loss

# Determine the number of epochs, based on the length of the accuracy list.
epochs = range(len(acc))

# Plot training and validation accuracy as a function of epochs.
plt.figure(figsize=(10, 6))  # Set the figure size for better readability
plt.plot(epochs, acc, label='Training Accuracy')
plt.plot(epochs, val_acc, label='Validation Accuracy')
plt.title('Training and Validation Accuracy')  # Set the title of the graph
plt.xlabel('Epochs')  # Label the x-axis as 'Epochs'
plt.ylabel('Accuracy')  # Label the y-axis as 'Accuracy'
plt.legend()  # Add a legend to distinguish training vs. validation

# Plot training and validation loss as a function of epochs.
plt.figure(figsize=(10, 6))  # Create a new figure for loss visualization
plt.plot(epochs, loss, label='Training Loss')
plt.plot(epochs, val_loss, label='Validation Loss')
plt.title('Training and Validation Loss')  # Set the title of the graph
plt.xlabel('Epochs')  # Label the x-axis as 'Epochs'
plt.ylabel('Loss')  # Label the y-axis as 'Loss'
plt.legend()  # Add a legend to distinguish training vs. validation

#### **SGD optimizer**

In [None]:
import numpy as np
import matplotlib.pyplot as plt
import os
import tensorflow as tf
from tensorflow.keras.preprocessing.image import ImageDataGenerator
from tensorflow.keras.applications import MobileNetV3Small
from tensorflow.keras.layers import Dense, GlobalAveragePooling2D
from tensorflow.keras.models import Model
from tensorflow.keras.optimizers import SGD

# Definir generadores de imágenes con aumento de datos
train_datagen = ImageDataGenerator(
    rescale=1./255,
    #rotation_range=20,
    width_shift_range=0.2,
    height_shift_range=0.2,
    horizontal_flip=True,
    vertical_flip=True,
    zoom_range=0.2,
    fill_mode='constant',
    validation_split=0.2  # Reservar 20% de los datos para validación
)

data_dir = 'db_unal/organized/images/raw'

train_generator = train_datagen.flow_from_directory(
    data_dir,
    target_size=(224, 224),
    #batch_size=50,
    class_mode='binary',
    subset='training'  # Especificar que esto es para el conjunto de entrenamiento
)

validation_generator = train_datagen.flow_from_directory(
    data_dir,
    target_size=(224, 224),
    #batch_size=20,
    class_mode='binary',
    subset='validation'  # Especificar que esto es para el conjunto de validación
)

# Cargar modelo preentrenado
base_model = MobileNetV3Small(weights='imagenet', include_top=False, input_shape=(224, 224, 3))

# Congelar capas base
for layer in base_model.layers:
    layer.trainable = False

# Añadir nuevas capas
x = base_model.output
x = GlobalAveragePooling2D()(x)
x = Dense(1024, activation='relu')(x)
predictions = Dense(1, activation='sigmoid')(x)

model = Model(inputs=base_model.input, outputs=predictions)

# Compilar el modelo
model.compile(optimizer=SGD(learning_rate=0.5, momentum=0.9), loss='binary_crossentropy', metrics=['accuracy'])

# Calcular los steps per epoch y validation steps
#train_steps = int(np.ceil(train_generator.samples / train_generator.batch_size))
#val_steps = int(np.ceil(validation_generator.samples / validation_generator.batch_size))

# Entrenar el modelo
history = model.fit(
    train_generator,
    #steps_per_epoch=train_steps,
    validation_data=validation_generator,
    #validation_steps=val_steps,
    epochs=25
)

# Retrieve accuracy results on training and validation data sets for each training epoch.
acc = history.history['accuracy']  # Training accuracy
val_acc = history.history['val_accuracy']  # Validation accuracy

# Retrieve loss results on training and validation data sets for each training epoch.
loss = history.history['loss']  # Training loss
val_loss = history.history['val_loss']  # Validation loss

# Determine the number of epochs, based on the length of the accuracy list.
epochs = range(len(acc))

# Plot training and validation accuracy as a function of epochs.
plt.figure(figsize=(10, 6))  # Set the figure size for better readability
plt.plot(epochs, acc, label='Training Accuracy')
plt.plot(epochs, val_acc, label='Validation Accuracy')
plt.title('Training and Validation Accuracy')  # Set the title of the graph
plt.xlabel('Epochs')  # Label the x-axis as 'Epochs'
plt.ylabel('Accuracy')  # Label the y-axis as 'Accuracy'
plt.legend()  # Add a legend to distinguish training vs. validation

# Plot training and validation loss as a function of epochs.
plt.figure(figsize=(10, 6))  # Create a new figure for loss visualization
plt.plot(epochs, loss, label='Training Loss')
plt.plot(epochs, val_loss, label='Validation Loss')
plt.title('Training and Validation Loss')  # Set the title of the graph
plt.xlabel('Epochs')  # Label the x-axis as 'Epochs'
plt.ylabel('Loss')  # Label the y-axis as 'Loss'
plt.legend()  # Add a legend to distinguish training vs. validation

### **Large Version**

#### **Adam Optimizer**

#### **SGD optimizer**

## **ResNet**

### **ResNet50**

#### **Adam Optimizer**

#### **SGD Optimizer**

In [None]:
import numpy as np
import matplotlib.pyplot as plt
import os
import tensorflow as tf
from tensorflow.keras.preprocessing.image import ImageDataGenerator
from tensorflow.keras.applications import ResNet50
from tensorflow.keras.layers import Dense, GlobalAveragePooling2D
from tensorflow.keras.models import Model
from tensorflow.keras.optimizers import SGD

# Definir generadores de imágenes con aumento de datos
train_datagen = ImageDataGenerator(
    rescale=1./255,
    rotation_range=20,
    width_shift_range=0.2,
    height_shift_range=0.2,
    horizontal_flip=True,
    fill_mode='nearest',
    validation_split=0.2  # Reservar 20% de los datos para validación
)

data_dir = 'db_unal/organized/images/cropped'

train_generator = train_datagen.flow_from_directory(
    data_dir,
    target_size=(224, 224),
    batch_size=20,
    class_mode='binary',
    subset='training'  # Especificar que esto es para el conjunto de entrenamiento
)

validation_generator = train_datagen.flow_from_directory(
    data_dir,
    target_size=(224, 224),
    batch_size=6,
    class_mode='binary',
    subset='validation'  # Especificar que esto es para el conjunto de validación
)

# Cargar modelo preentrenado
base_model = ResNet50(weights='imagenet', include_top=False, input_shape=(224, 224, 3))

# Congelar capas base
for layer in base_model.layers:
    layer.trainable = False

# Añadir nuevas capas
x = base_model.output
x = GlobalAveragePooling2D()(x)
x = Dense(1024, activation='relu')(x)
predictions = Dense(1, activation='sigmoid')(x)

model = Model(inputs=base_model.input, outputs=predictions)

# Compilar el modelo
model.compile(optimizer=SGD(learning_rate=0.001, momentum=0.9), loss='binary_crossentropy', metrics=['accuracy'])

# Calcular los steps per epoch y validation steps
train_steps = int(np.ceil(train_generator.samples / train_generator.batch_size))
val_steps = int(np.ceil(validation_generator.samples / validation_generator.batch_size))

# Entrenar el modelo
history = model.fit(
    train_generator,
    steps_per_epoch=train_steps,
    validation_data=validation_generator,
    validation_steps=val_steps,
    epochs=25
)

# Retrieve accuracy results on training and validation data sets for each training epoch.
acc = history.history['accuracy']  # Training accuracy
val_acc = history.history['val_accuracy']  # Validation accuracy

# Retrieve loss results on training and validation data sets for each training epoch.
loss = history.history['loss']  # Training loss
val_loss = history.history['val_loss']  # Validation loss

# Determine the number of epochs, based on the length of the accuracy list.
epochs = range(len(acc))

# Plot training and validation accuracy as a function of epochs.
plt.figure(figsize=(10, 6))  # Set the figure size for better readability
plt.plot(epochs, acc, label='Training Accuracy')
plt.plot(epochs, val_acc, label='Validation Accuracy')
plt.title('Training and Validation Accuracy')  # Set the title of the graph
plt.xlabel('Epochs')  # Label the x-axis as 'Epochs'
plt.ylabel('Accuracy')  # Label the y-axis as 'Accuracy'
plt.legend()  # Add a legend to distinguish training vs. validation

# Plot training and validation loss as a function of epochs.
plt.figure(figsize=(10, 6))  # Create a new figure for loss visualization
plt.plot(epochs, loss, label='Training Loss')
plt.plot(epochs, val_loss, label='Validation Loss')
plt.title('Training and Validation Loss')  # Set the title of the graph
plt.xlabel('Epochs')  # Label the x-axis as 'Epochs'
plt.ylabel('Loss')  # Label the y-axis as 'Loss'
plt.legend()  # Add a legend to distinguish training vs. validation

### **ResNet101**

#### **Adam Optimizer**

#### **SGD Optimizer**

## **VGG**

### **VGG16**

### **VGG19**

## **Xception**

## **DenseNet**