In [9]:
import os
import numpy as np
import plotly.express as px

from sklearn.neighbors import KNeighborsClassifier
from sklearn.utils.class_weight import compute_class_weight
from sklearn.metrics import confusion_matrix, classification_report
from sklearn.model_selection import train_test_split
from sklearn.manifold import TSNE
from sklearn.decomposition import PCA

import tensorflow as tf
from tensorflow.keras.applications import VGG16, InceptionV3, ResNet50
from tensorflow.keras.optimizers import Adam
from tensorflow.keras.preprocessing.image import ImageDataGenerator
from tensorflow.keras.layers import Input, Conv2D, MaxPooling2D, Flatten, Dense, Dropout, GlobalAveragePooling2D, BatchNormalization, Add, ReLU, Lambda
from tensorflow.keras.callbacks import EarlyStopping, ReduceLROnPlateau
from tensorflow.keras.models import Model

AUTOTUNE = tf.data.AUTOTUNE
IMG_SIZE = (224, 224)
EMBED_DIM = 128
BATCH_SIZE = 128
EPOCHS = 150
TEMPERATURE = 0.1


## Importación de datos

In [10]:
data_dir = "../data/processed/train"

def get_generators(data_dir, preprocess_fn, target_size=(224, 224), batch_size=128, validation_split=0.15):
    datagen = ImageDataGenerator(
        preprocessing_function=preprocess_fn,
        rotation_range=0.2,
        width_shift_range=0.1,
        height_shift_range=0.1,
        zoom_range=0.2,
        brightness_range=[0.8, 1.2],
        shear_range=0.2,
        horizontal_flip=True,
        validation_split=validation_split
    )

    train_generator = datagen.flow_from_directory(
        data_dir,
        target_size=target_size,
        batch_size=batch_size,
        class_mode='categorical',
        subset='training',
        shuffle=True
    )

    val_generator = datagen.flow_from_directory(
        data_dir,
        target_size=target_size,
        batch_size=batch_size,
        class_mode='categorical',
        subset='validation',
        shuffle=False
    )

    return train_generator, val_generator

In [11]:
train_generator, val_generator = get_generators(data_dir, lambda x: x/255.)
num_classes = len(train_generator.class_indices)
class_names = list(train_generator.class_indices.keys())

Found 7662 images belonging to 2 classes.
Found 1351 images belonging to 2 classes.


## Modelo generador de embeddings

In [12]:
def contrastive_encoder(input_shape=(224, 224, 3), embedding_dim=128):
    inputs = Input(shape=input_shape)

    # Bloque 1
    x = Conv2D(64, 3, padding='same', use_bias=False)(inputs)
    x = BatchNormalization()(x)
    x = ReLU()(x)
    x = Conv2D(64, 3, padding='same', use_bias=False)(x)
    x = BatchNormalization()(x)
    s = Conv2D(64, 1, padding='same', use_bias=False)(inputs)
    s = BatchNormalization()(s)
    x = Add()([x, s])
    x = ReLU()(x)
    x = MaxPooling2D()(x)

    # Bloque 2
    y = Conv2D(128, 3, padding='same', use_bias=False)(x)
    y = BatchNormalization()(y)
    y = ReLU()(y)
    y = Conv2D(128, 3, padding='same', use_bias=False)(y)
    y = BatchNormalization()(y)
    s2 = Conv2D(128, 1, padding='same', use_bias=False)(x)
    s2 = BatchNormalization()(s2)
    y = Add()([y, s2])
    y = ReLU()(y)
    y = MaxPooling2D()(y)

    # Bloque 3
    z = Conv2D(256, 3, padding='same', use_bias=False)(y)
    z = BatchNormalization()(z)
    z = ReLU()(z)
    z = Conv2D(256, 3, padding='same', use_bias=False)(z)
    z = BatchNormalization()(z)
    s3 = Conv2D(256, 1, padding='same', use_bias=False)(y)
    s3 = BatchNormalization()(s3)
    z = Add()([z, s3])
    z = ReLU()(z)

    z = GlobalAveragePooling2D()(z)
    z = Dense(512, activation='relu')(z)
    z = BatchNormalization()(z)

    # Proyección (cabeza contrastiva)
    p = Dense(EMBED_DIM, activation='relu')(z)
    p = Dense(EMBED_DIM)(p)
    outputs = Lambda(lambda t: tf.math.l2_normalize(t, axis=1), name="proj_norm")(p)

    return Model(inputs, outputs, name="ContrastiveEncoder")


In [13]:
class SupConLoss(tf.keras.losses.Loss):
    def __init__(self, temperature=0.1, name="supcon"):
        super().__init__(name=name)
        self.temperature = temperature

    def call(self, y_true, features):
        # features: [batch, dim]; y_true: [batch]
        features = tf.math.l2_normalize(features, axis=1)
        batch_size = tf.shape(features)[0]

        sim = tf.matmul(features, features, transpose_b=True)  # [B, B]
        sim = sim / self.temperature

        labels = tf.reshape(y_true, [-1, 1])  # [B, 1]
        mask = tf.equal(labels, tf.transpose(labels))  # [B, B]
        mask = tf.cast(mask, tf.float32)

        # Remove self-contrast
        logits_mask = tf.ones_like(mask) - tf.eye(batch_size)
        mask = mask * logits_mask

        # Log-softmax denom con exclusión self
        sim_max = tf.reduce_max(sim, axis=1, keepdims=True)
        sim = sim - sim_max
        exp_sim = tf.exp(sim) * logits_mask
        denom = tf.reduce_sum(exp_sim, axis=1, keepdims=True) + 1e-9
        log_prob = sim - tf.math.log(denom)

        # Promedio de log-prob de positivos por ancla
        pos_count = tf.reduce_sum(mask, axis=1) + 1e-9
        mean_log_pos = tf.reduce_sum(mask * log_prob, axis=1) / pos_count

        loss = -tf.reduce_mean(mean_log_pos)
        return loss


## Entrenar representaciones

In [16]:
def train_supcon(model, train_generator, val_generator, loss_fn, optimizer, epochs=50):
    train_loss = tf.keras.metrics.Mean(name="train_loss")
    val_loss = tf.keras.metrics.Mean(name="val_loss")

    for epoch in range(epochs):
        train_loss.reset_state()
        val_loss.reset_state()

        # Entrenamiento
        for images, labels in train_generator:
            with tf.GradientTape() as tape:
                embeddings = model(images, training=True)
                loss = loss_fn(labels, embeddings)
            grads = tape.gradient(loss, model.trainable_variables)
            optimizer.apply_gradients(zip(grads, model.trainable_variables))
            train_loss.update_state(loss)

        # Validación
        for images, labels in val_generator:
            embeddings = model(images, training=False)
            loss = loss_fn(labels, embeddings)
            val_loss.update_state(loss)

        print(f"Epoch {epoch+1}/{epochs} - Train Loss: {train_loss.result():.4f} - Val Loss: {val_loss.result():.4f}")


In [17]:
encoder = contrastive_encoder(input_shape=(224,224,3), embedding_dim=EMBED_DIM)
loss_fn = SupConLoss(temperature=TEMPERATURE)
optimizer = Adam(learning_rate=8e-4)
train_supcon(encoder, train_generator, val_generator, loss_fn, optimizer, epochs=120)

InvalidArgumentError: {{function_node __wrapped__Sub_device_/job:localhost/replica:0/task:0/device:CPU:0}} Incompatible shapes: [256,256] vs. [128,128] [Op:Sub] name: 

## Entrenar clasificador

In [None]:
x = encoder.output
clf = Dense(num_classes, activation="softmax")(x)
classifier = Model(encoder.input, clf)
classifier.compile(optimizer="adam", loss="categorical_crossentropy", metrics=["accuracy"])
classifier.fit(train_generator, validation_data=val_generator, epochs=20)





NameError: name 'num_classes' is not defined

## Evaluate KNN

In [None]:
def evaluate_knn(model, val_generator, class_names, k=5):
    embs, labs = [], []
    # recorrer todo el generador de validación
    for imgs, labels in val_generator:
        e = model(imgs, training=False).numpy()
        embs.append(e)
        # si labels es one-hot, convertir a entero con argmax
        if labels.ndim > 1:
            labs.append(np.argmax(labels, axis=1))
        else:
            labs.append(labels)

    # concatenar embeddings y etiquetas
    X = np.concatenate(embs, axis=0)
    y = np.concatenate(labs, axis=0)

    # entrenar y evaluar KNN
    knn = KNeighborsClassifier(n_neighbors=k, metric='cosine')
    knn.fit(X, y)
    y_pred = knn.predict(X)

    print(classification_report(y, y_pred, target_names=class_names))
    print(confusion_matrix(y, y_pred))


In [None]:
data_dir = "/ruta/a/HAM10000"  # carpetas por clase
evaluate_knn(encoder, val_generator, class_names, k=7)  # reutiliza val_ds y class_names


In [None]:
def visualize_embeddings_3d(model, val_ds, class_names, method="tsne"):
    # 1. Extraer embeddings y etiquetas
    embs, labs = [], []
    for imgs, labels in val_ds:
        e = model(imgs, training=False).numpy()
        embs.append(e)
        labs.append(labels.numpy())
    X = np.concatenate(embs, axis=0)
    y = np.concatenate(labs, axis=0)

    # 2. Reducir a 3D
    if method == "tsne":
        reducer = TSNE(n_components=3, perplexity=30, learning_rate=200, random_state=42)
    else:
        reducer = PCA(n_components=3)
    X_reduced = reducer.fit_transform(X)

    # 3. Visualizar con Plotly
    fig = px.scatter_3d(
        x=X_reduced[:,0], y=X_reduced[:,1], z=X_reduced[:,2],
        color=[class_names[i] for i in y],
        title=f"Embeddings en 3D ({method.upper()})",
        opacity=0.7
    )
    fig.show()
