In [1]:
# ==== CELL 0: Montaje Drive ====
from google.colab import drive
# Si ya est√° montado, Colab mostrar√° mensaje. Cambia force_remount=True si quieres forzar.
drive.mount('/content/drive', force_remount=True)

# Ajusta si hace falta:
DATASET_DIR = '/content/drive/MyDrive/p_1_image'  # <-- carpeta que contiene 'benign/' y 'malignant/'


Mounted at /content/drive


In [2]:
# ==== CELL 0: Montaje Drive ====
from google.colab import drive
# Si ya est√° montado, Colab mostrar√° mensaje. Cambia force_remount=True si quieres forzar.
drive.mount('/content/drive', force_remount=True)

# Ajusta si hace falta:
DATASET_DIR = '/content/drive/MyDrive/p_1_image'  # <-- carpeta que contiene 'benign/' y 'malignant/'


Mounted at /content/drive


In [3]:
# ==== CELL 1: Instalaci√≥n dependencias (ejecutar una sola vez) ====
# Nota: instalamos opencv-headless para poder usar cv2 en Colab
!pip install -q tensorflow scikit-image imutils reportlab opencv-python-headless


[?25l   [90m‚îÅ‚îÅ‚îÅ‚îÅ‚îÅ‚îÅ‚îÅ‚îÅ‚îÅ‚îÅ‚îÅ‚îÅ‚îÅ‚îÅ‚îÅ‚îÅ‚îÅ‚îÅ‚îÅ‚îÅ‚îÅ‚îÅ‚îÅ‚îÅ‚îÅ‚îÅ‚îÅ‚îÅ‚îÅ‚îÅ‚îÅ‚îÅ‚îÅ‚îÅ‚îÅ‚îÅ‚îÅ‚îÅ‚îÅ‚îÅ[0m [32m0.0/2.0 MB[0m [31m?[0m eta [36m-:--:--[0m[2K   [91m‚îÅ‚îÅ‚îÅ‚îÅ‚îÅ‚îÅ‚îÅ‚îÅ‚îÅ‚îÅ‚îÅ‚îÅ‚îÅ‚îÅ‚îÅ‚îÅ‚îÅ‚îÅ‚îÅ‚îÅ‚îÅ‚îÅ‚îÅ‚îÅ‚îÅ‚îÅ‚îÅ‚îÅ‚îÅ‚îÅ‚îÅ‚îÅ‚îÅ‚îÅ‚îÅ‚îÅ[0m[90m‚ï∫[0m[90m‚îÅ‚îÅ‚îÅ[0m [32m1.8/2.0 MB[0m [31m53.5 MB/s[0m eta [36m0:00:01[0m[2K   [90m‚îÅ‚îÅ‚îÅ‚îÅ‚îÅ‚îÅ‚îÅ‚îÅ‚îÅ‚îÅ‚îÅ‚îÅ‚îÅ‚îÅ‚îÅ‚îÅ‚îÅ‚îÅ‚îÅ‚îÅ‚îÅ‚îÅ‚îÅ‚îÅ‚îÅ‚îÅ‚îÅ‚îÅ‚îÅ‚îÅ‚îÅ‚îÅ‚îÅ‚îÅ‚îÅ‚îÅ‚îÅ‚îÅ‚îÅ‚îÅ[0m [32m2.0/2.0 MB[0m [31m39.2 MB/s[0m eta [36m0:00:00[0m
[?25h

In [None]:
# ==== CELL 2: Imports ====
import os, sys, math, random, io
import numpy as np
import pandas as pd
import matplotlib.pyplot as plt
import matplotlib.patches as patches
from glob import glob
from tqdm import tqdm
from sklearn.model_selection import train_test_split, GridSearchCV
from sklearn.utils.class_weight import compute_class_weight
from sklearn.ensemble import RandomForestClassifier
from sklearn.metrics import confusion_matrix, accuracy_score, classification_report
from skimage import io as skio, color, filters, feature, measure
from skimage.transform import resize as skresize
import cv2
import tensorflow as tf
from tensorflow.keras import layers, models, callbacks
from tensorflow.keras.applications import EfficientNetB0
from tensorflow.keras.preprocessing import image as kimage
from reportlab.lib.pagesizes import landscape, A4
from reportlab.lib.units import mm
from reportlab.pdfgen import canvas
from reportlab.lib import utils
from reportlab.lib.styles import ParagraphStyle
from reportlab.platypus import Paragraph, Frame
from datetime import datetime
from IPython.display import display
from google.colab import files
import seaborn as sns
import matplotlib

# Ensure TF GPU visibility
print("TF version:", tf.__version__)
print("GPU available:", tf.config.list_physical_devices('GPU'))


In [None]:
# ==== CELL 3: Par√°metros globales ====
IMAGE_SIZE = (224, 224)
BATCH_SIZE = 32
classes = ['benign', 'malignant']


In [None]:
# ==== CELL 4: Utilidades - load_img, compute_image_features, ensure_img_tensor, draw_image_keep_aspect ====
def load_img(path, target_size=IMAGE_SIZE):
    """Carga una imagen desde disco y la devuelve como uint8 HxWx3 (numpy)"""
    img = skio.imread(path)
    if img is None:
        raise ValueError(f"Imagen {path} no pudo ser leida.")
    if img.ndim == 2:  # grayscale
        img = color.gray2rgb(img)
    img_resized = skresize(img, target_size, anti_aliasing=True)
    img_resized = (img_resized * 255).astype('uint8')
    return img_resized

def compute_image_features(img):
    """
    Extrae 10 features sencillas para EDA (input: uint8 HxWx3)
    - edge_sum: cv2.Canny sumada
    - lbp_var: varianza del histograma LBP
    - contrast: std del gris
    - aspect: h / w
    Devuelve lista de 10 valores en el orden requerido por feat_cols.
    """
    arr = np.array(img)
    if arr.ndim == 2:
        arr = np.stack([arr, arr, arr], axis=-1)
    if arr.ndim != 3:
        raise ValueError(f"Imagen inv√°lida para features: shape {arr.shape}")
    # grayscale float [0,1]
    gray = color.rgb2gray(arr)
    mean_r = float(np.mean(arr[:, :, 0]))
    mean_g = float(np.mean(arr[:, :, 1]))
    mean_b = float(np.mean(arr[:, :, 2]))
    brightness = float(np.mean(gray))
    std_int = float(np.std(gray))
    entropy = float(measure.shannon_entropy((gray * 255).astype('uint8')))
    try:
        img_gray_uint8 = (gray * 255).astype('uint8')
        edges = cv2.Canny(img_gray_uint8, 100, 200)
        edge_sum = float(np.sum(edges))
    except Exception:
        edges_sobel = filters.sobel(gray)
        edge_sum = float(np.sum(edges_sobel))
    try:
        lbp = feature.local_binary_pattern((gray * 255).astype('uint8'), P=8, R=1, method='uniform')
        lbp_hist, _ = np.histogram(lbp.ravel(), bins=np.arange(0, 12), density=True)
        lbp_var = float(np.var(lbp_hist))
    except Exception:
        lbp_var = float(np.var((gray * 255).astype('uint8')))
    contrast = float(np.std((gray * 255).astype('float32')))
    h, w = arr.shape[:2]
    aspect = float(h / (w + 1e-9))
    return [mean_r, mean_g, mean_b, brightness, std_int, entropy, edge_sum, lbp_var, contrast, aspect]

def ensure_img_tensor(img):
    """A partir de PIL/np array garantiza un tensor float32 HxWx3"""
    arr = np.array(img)
    if arr.ndim == 2:
        arr = np.stack([arr, arr, arr], axis=-1)
    if arr.ndim == 4:
        arr = arr[0]
    if arr.ndim != 3:
        raise ValueError(f"Imagen inv√°lida: dimensiones {arr.shape}, se esperaba (H,W,3)")
    return tf.convert_to_tensor(arr, dtype=tf.float32)

def draw_image_keep_aspect(c, img_array, x, y, max_width, max_height):
    """
    Dibuja imagen en objeto ReportLab canvas 'c' manteniendo el aspecto.
    img_array: numpy array HxWx3 o ruta
    x, y: coordenada superior izquierda (reportlab origin at bottom-left)
    """
    from PIL import Image
    tmp_path = "/tmp/_tmp_report_image.png"
    img = img_array
    if isinstance(img, np.ndarray):
        if img.ndim == 2:
            img = np.stack([img] * 3, axis=-1)
        pil = Image.fromarray((img).astype('uint8'))
    else:
        pil = Image.open(img)
    pil.save(tmp_path, format='PNG')
    iw, ih = utils.ImageReader(tmp_path).getSize()
    ratio = min(max_width / iw, max_height / ih)
    draw_w = iw * ratio
    draw_h = ih * ratio
    c.drawImage(tmp_path, x, y - draw_h, width=draw_w, height=draw_h, preserveAspectRatio=True, mask='auto')
    try:
        os.remove(tmp_path)
    except:
        pass
    return draw_w, draw_h


In [None]:
# ==== CELL 5: Cargar paths y DataFrame ====
paths = []
labels = []
for cls in classes:
    folder = os.path.join(DATASET_DIR, cls)
    if not os.path.exists(folder):
        print(f"Atenci√≥n: carpeta {folder} no existe. Revisa DATASET_DIR y nombres de clases.")
        continue
    files_list = glob(os.path.join(folder, '*'))
    for f in files_list:
        paths.append(f)
        labels.append(cls)

df = pd.DataFrame({'path': paths, 'label': labels})
print("Total im√°genes:", len(df))
print(df['label'].value_counts())


In [None]:
# ==== CELL 6: EDA completo (llamada autom√°tica) ====
# 6.1 - Mostrar ejemplos
def show_examples(df, n_per_class=4):
    plt.figure(figsize=(n_per_class * 3, 6))
    for i, cls in enumerate(classes):
        sample = df[df['label'] == cls].sample(n=min(n_per_class, len(df[df['label'] == cls])), random_state=42).reset_index(drop=True)
        for j, row in sample.iterrows():
            plt.subplot(2, n_per_class, i * n_per_class + j + 1)
            img = load_img(row['path'])
            plt.imshow(img)
            plt.axis('off')
            if j == 0:
                plt.title(cls)
    plt.suptitle('Ejemplos por clase')
    plt.show()

# Mostrar ejemplos autom√°ticamente
show_examples(df, n_per_class=4)

# 6.2 - Extraer features para EDA
features_list = []
for idx, row in tqdm(df.iterrows(), total=len(df), desc="Extrayendo features para EDA"):
    try:
        img = load_img(row['path'])
        feats = compute_image_features(img)
    except Exception as e:
        print("Error leyendo", row['path'], e)
        feats = [np.nan] * 10
    features_list.append(feats)

feat_cols = ['mean_r', 'mean_g', 'mean_b', 'brightness', 'std', 'entropy', 'edge_sum', 'lbp_var', 'contrast', 'aspect']
feat_df = pd.DataFrame(features_list, columns=feat_cols)
eda_df = pd.concat([df.reset_index(drop=True), feat_df], axis=1)

# 6.3 - Estad√≠sticas y distribuci√≥n
print("\nEstad√≠sticas de features:")
display(eda_df[feat_cols].describe().T)

print("\nDistribuci√≥n de clases:")
display(eda_df['label'].value_counts())

# Rutas
path_benign = "/content/drive/MyDrive/p_1_image/benign"
path_malign = "/content/drive/MyDrive/p_1_image/malignant"

print("üìÇ ESTRUCTURA DE DIRECTORIOS:")
print(f" ‚Ä¢ Ruta benigno: {path_benign}")
print(f" ‚Ä¢ Ruta maligno: {path_malign}")

# Contar im√°genes
benign_files = [f for f in os.listdir(path_benign) if f.lower().endswith(('png','jpg','jpeg'))]
malign_files = [f for f in os.listdir(path_malign) if f.lower().endswith(('png','jpg','jpeg'))]

# Distribuci√≥n por clase
plt.figure(figsize=(6,4))
plt.bar(["Benigno","Maligno"], [len(benign_files), len(malign_files)])
plt.title("üìä DISTRIBUCI√ìN POR CLASE")
plt.ylabel("Cantidad de im√°genes")
plt.show()

# ===============================================
# üìå NUEVO: AN√ÅLISIS DE CONTRASTE POR CLASE
# ===============================================

def calcular_contraste(path, files, n=50):
    valores = []
    for img_name in files[:n]:
        img_path = os.path.join(path, img_name)
        img = cv2.imread(img_path, cv2.IMREAD_GRAYSCALE)
        if img is not None:
            valores.append(img.std())
    return valores

ben_contrast = calcular_contraste(path_benign, benign_files)
mal_contrast = calcular_contraste(path_malign, malign_files)

plt.figure(figsize=(8,5))
plt.boxplot([ben_contrast, mal_contrast], labels=["Benigno","Maligno"])
plt.title("üìà Contraste por Clase (Desviaci√≥n Est√°ndar de Intensidad)")
plt.ylabel("Contraste (std p√≠xeles)")
plt.show()

# ===============================================
# üìå NUEVO: HISTOGRAMAS DE INTENSIDAD POR CLASE
# ===============================================

def plot_histograms(image_paths, title):
    plt.figure(figsize=(8,5))
    all_pixels = []
    for img_name in image_paths[:50]:
        img = cv2.imread(os.path.join(path_benign if "Benigno" in title else path_malign, img_name),
                         cv2.IMREAD_GRAYSCALE)
        if img is not None:
            all_pixels.extend(img.flatten())

    plt.hist(all_pixels, bins=30, alpha=0.7)
    plt.title(title)
    plt.xlabel("Intensidad")
    plt.ylabel("Frecuencia")
    plt.show()

plot_histograms(benign_files, "üìä Histograma Intensidad ‚Äì Benigno")
plot_histograms(malign_files, "üìä Histograma Intensidad ‚Äì Maligno")

# 6.4 - Matriz de correlaci√≥n con pesos en cada cuadro (annotated heatmap)
corr = eda_df[feat_cols].corr()
plt.figure(figsize=(10, 8))
sns.heatmap(corr, annot=True, fmt=".2f", cmap='coolwarm', xticklabels=feat_cols, yticklabels=feat_cols)
plt.title('Matriz de correlaci√≥n (con pesos en cada celda)')
plt.show()


In [None]:
# ==== CELL 7: Preparar datasets con ImageDataPipeline ====
# Usamos image_dataset_from_directory para rapidez y facilidad (mantener etiquetas)
train_ds = tf.keras.preprocessing.image_dataset_from_directory(
    DATASET_DIR,
    labels='inferred',
    label_mode='categorical',
    class_names=classes,
    image_size=IMAGE_SIZE,
    batch_size=BATCH_SIZE,
    shuffle=True,
    seed=42,
    validation_split=0.2,
    subset='training'
)

val_ds = tf.keras.preprocessing.image_dataset_from_directory(
    DATASET_DIR,
    labels='inferred',
    label_mode='categorical',
    class_names=classes,
    image_size=IMAGE_SIZE,
    batch_size=BATCH_SIZE,
    shuffle=True,
    seed=42,
    validation_split=0.2,
    subset='validation'
)

AUTOTUNE = tf.data.AUTOTUNE
train_ds = train_ds.cache().prefetch(AUTOTUNE)
val_ds = val_ds.cache().prefetch(AUTOTUNE)

# Mostrar balance en train
labels_list = []
for x, y in train_ds.unbatch().as_numpy_iterator():
    labels_list.append(np.argmax(y))
counts = np.bincount(labels_list)
print("Conteo train (benign, malignant):", counts)

# Calcular class weights
try:
    y_vals = np.array(labels_list)
    class_weights = compute_class_weight('balanced', classes=np.unique(y_vals), y=y_vals)
    class_weight_dict = {i: w for i, w in enumerate(class_weights)}
except Exception:
    class_weight_dict = {0: 1.0, 1: 1.0}
print("Class weights:", class_weight_dict)


In [None]:
# ==== CELL 8: Modelos base (CNN transfer + RF embeddings) ====
def build_transfer_model(input_shape=IMAGE_SIZE + (3,), base_trainable=False, dropout=0.3):
    base = EfficientNetB0(include_top=False, input_shape=input_shape, weights='imagenet')
    base.trainable = base_trainable
    inputs = layers.Input(shape=input_shape)
    x = tf.keras.applications.efficientnet.preprocess_input(inputs)
    x = base(x, training=False)
    x = layers.GlobalAveragePooling2D()(x)
    x = layers.Dropout(dropout)(x)
    x = layers.Dense(128, activation='relu')(x)
    x = layers.Dropout(0.2)(x)
    outputs = layers.Dense(2, activation='softmax')(x)
    model = models.Model(inputs, outputs)
    return model

# CNN base (transfer learning, base frozen)
model = build_transfer_model(base_trainable=False)
model.compile(optimizer=tf.keras.optimizers.Adam(learning_rate=1e-4), loss='categorical_crossentropy', metrics=['accuracy'])
model.summary()

es = callbacks.EarlyStopping(monitor='val_loss', patience=3, restore_best_weights=True)
rlp = callbacks.ReduceLROnPlateau(monitor='val_loss', factor=0.5, patience=2)

EPOCHS = 50
history = model.fit(train_ds, validation_data=val_ds, epochs=EPOCHS, callbacks=[es, rlp], class_weight=class_weight_dict)


In [None]:
# ==== CELL 9: Evaluaci√≥n CNN base sobre datos de validaci√≥n (convertir val_ds a arrays) ====
val_images = []
val_labels = []
for x, y in val_ds.unbatch().as_numpy_iterator():
    val_images.append(x)
    val_labels.append(np.argmax(y))
val_images = np.array(val_images)
val_labels = np.array(val_labels)

preds = model.predict(val_images)
pred_labels = np.argmax(preds, axis=1)
acc_cnn_base = accuracy_score(val_labels, pred_labels)
print("Accuracy CNN base:", acc_cnn_base)


In [None]:
# ==== CELL 10: Extracci√≥n de embeddings para RF ====
feature_extractor_backbone = EfficientNetB0(include_top=False, input_shape=IMAGE_SIZE + (3,), weights='imagenet')
feature_extractor = models.Model(feature_extractor_backbone.input, layers.GlobalAveragePooling2D()(feature_extractor_backbone.output))
feature_extractor.trainable = False

X_feats = []
y_labels = []
for idx, row in tqdm(df.iterrows(), total=len(df), desc="Extrayendo embeddings para RF"):
    img = load_img(row['path'])
    img_arr = np.expand_dims(tf.keras.applications.efficientnet.preprocess_input(img.astype('float32')), axis=0)
    emb = feature_extractor.predict(img_arr)
    X_feats.append(emb.ravel())
    y_labels.append(0 if row['label'] == 'benign' else 1)

X_feats = np.array(X_feats)
y_labels = np.array(y_labels)

# Train-test split for RF
X_tr, X_te, y_tr, y_te = train_test_split(X_feats, y_labels, test_size=0.2, random_state=42, stratify=y_labels)

# Random Forest base (r√°pido)
rf = RandomForestClassifier(n_estimators=100, random_state=42)
rf.fit(X_tr, y_tr)
rf_pred = rf.predict(X_te)
acc_rf_base = accuracy_score(y_te, rf_pred)
print("Accuracy RF base:", acc_rf_base)


In [None]:
# ==== CELL 11: Balancear validaci√≥n para matriz de confusi√≥n CNN (1:1) ====
def make_balanced_val(images, labels):
    unique, counts = np.unique(labels, return_counts=True)
    max_cnt = counts.max()
    X_new = []
    Y_new = []
    for cls in np.unique(labels):
        idxs = np.where(labels == cls)[0]
        if len(idxs) == 0:
            continue
        reps = max_cnt - len(idxs)
        X_new.extend(images[idxs].tolist())
        Y_new.extend([cls] * len(idxs))
        if reps > 0:
            choice = np.random.choice(idxs, reps, replace=True)
            X_new.extend(images[choice].tolist())
            Y_new.extend([cls] * reps)
    X_new = np.array(X_new)
    Y_new = np.array(Y_new)
    p = np.random.permutation(len(Y_new))
    return X_new[p], Y_new[p]

val_images_bal, val_labels_bal = make_balanced_val(val_images, val_labels)
print("Validaci√≥n balanceada para matriz: ", np.bincount(val_labels_bal))


In [None]:
# ==== CELL 13: Optimizaci√≥n simple (Fine-tuning CNN + GridSearch RF) ====
# Guardamos accuracies antes para cuadro comparativo
before_accuracies = {'CNN_base': acc_cnn_base, 'RF_base': acc_rf_base}

# Fine-tuning CNN
def build_transfer_model_trainable(input_shape=IMAGE_SIZE + (3,), dropout=0.4):
    return build_transfer_model(input_shape=input_shape, base_trainable=True, dropout=dropout)

model_ft = build_transfer_model_trainable(dropout=0.4)

# Intentar localizar backbone para desbloquear √∫ltimas capas
try:
    base_backbone = model_ft.get_layer('efficientnetb0')
except Exception:
    base_backbone = None
    for layer in model_ft.layers:
        if isinstance(layer, tf.keras.Model) and 'efficientnet' in layer.name.lower():
            base_backbone = layer
            break

if base_backbone is None:
    for layer in model_ft.layers:
        try:
            if hasattr(layer.output_shape, '__len__') and len(layer.output_shape) == 4:
                base_backbone = layer
                break
        except Exception:
            continue

num_unlock = 20
if base_backbone is not None and hasattr(base_backbone, 'layers'):
    total_layers = len(base_backbone.layers)
    cutoff = max(1, total_layers - num_unlock)
    for i, layer in enumerate(base_backbone.layers):
        layer.trainable = True if i >= cutoff else False
else:
    print("Warning: no se encontr√≥ backbone por nombre; dejando todo como trainable=False por seguridad.")
    for layer in model_ft.layers:
        layer.trainable = False

model_ft.compile(optimizer=tf.keras.optimizers.Adam(learning_rate=5e-5), loss='categorical_crossentropy', metrics=['accuracy'])
EPOCHS_FT = 50
history_ft = model_ft.fit(train_ds, validation_data=val_ds, epochs=EPOCHS_FT, callbacks=[es, rlp], class_weight=class_weight_dict)

# Evaluar CNN fine-tuned
preds_ft = model_ft.predict(val_images)
pred_labels_ft = np.argmax(preds_ft, axis=1)
acc_cnn_ft = accuracy_score(val_labels, pred_labels_ft)
print("Accuracy CNN fine-tuned:", acc_cnn_ft)

# RF tuning (GridSearch)
param_grid = {'n_estimators': [100, 200], 'max_depth': [None, 10, 20]}
gs = GridSearchCV(RandomForestClassifier(random_state=42), param_grid, cv=3, n_jobs=2, scoring='accuracy')
gs.fit(X_tr, y_tr)
print("Mejores params RF:", gs.best_params_)
rf_best = gs.best_estimator_
rf_pred_best = rf_best.predict(X_te)
acc_rf_best = accuracy_score(y_te, rf_pred_best)
print("Accuracy RF optimizado:", acc_rf_best)


In [None]:
# ==== CELL 13.5: Matriz de confusi√≥n del Random Forest optimizado (solo RF) ====
from sklearn.metrics import ConfusionMatrixDisplay

# Asegurarnos de que rf_best y X_te, y_te existen
try:
    preds_rf_opt = rf_best.predict(X_te)
    cm_rf = confusion_matrix(y_te, preds_rf_opt)
    print("\nMatriz de confusi√≥n - Random Forest (optimizado):")
    disp = ConfusionMatrixDisplay(confusion_matrix=cm_rf, display_labels=['benign','malignant'])
    fig, ax = plt.subplots(figsize=(6,5))
    disp.plot(ax=ax, cmap='Blues', values_format='d')
    plt.title('Confusion matrix - RF optimizado')
    plt.show()

    # Mostrar m√©tricas de clasificaci√≥n
    print("\nReporte de clasificaci√≥n (RF optimizado):")
    print(classification_report(y_te, preds_rf_opt, target_names=['benign','malignant']))
except NameError as e:
    print("Variable no encontrada (rf_best o X_te/y_te). Aseg√∫rate de ejecutar la celda de GridSearchCV antes de esta celda.")
    print(e)


In [None]:
# ==== CELL 14: Resultados comparativos (antes/despu√©s) ====
after_accuracies = {'CNN_finetune': acc_cnn_ft, 'RF_opt': acc_rf_best}
results_table = pd.DataFrame({
    'Modelo': ['CNN_base', 'CNN_finetune', 'RF_base', 'RF_opt'],
    'Accuracy_before_after': [
        before_accuracies['CNN_base'],
        after_accuracies['CNN_finetune'],
        before_accuracies['RF_base'],
        after_accuracies['RF_opt']
    ]
})
print("\nCuadro antes/despues (accuracy):")
display(results_table)



# ============================
# üìå Alinear nombre del modelo final
# ============================

# Si existe "best_model" √∫salo
if 'best_model' in globals():
    model_to_save = best_model

# Si existe "grid" y no existe best_model
elif 'grid' in globals():
    try:
        model_to_save = grid.best_estimator_
    except:
        pass

# Si existe rf (modelo random forest entrenado)
elif 'rf' in globals():
    model_to_save = rf

# Si existe alg√∫n modelo CNN llamado "model"
elif 'model' in globals():
    model_to_save = model

else:
    raise NameError("‚ùå No se encontr√≥ ning√∫n modelo entrenado para guardar. Ejecuta primero la celda de entrenamiento.")

print("‚úÖ Modelo final alineado como: model_to_save")

# ============================
# üìå Guardar el modelo entrenado
# ============================

import pickle
from datetime import datetime
import os

# Crear carpeta si no existe
save_dir = "/content/drive/MyDrive/modelos_entrenados"
os.makedirs(save_dir, exist_ok=True)

# Nombre versi√≥nado
version_path = os.path.join(save_dir, f"model_v{datetime.now().strftime('%Y%m%d_%H%M%S')}.pkl")

with open(version_path, 'wb') as f:
    pickle.dump(model_to_save, f)

print(f"üìÅ Modelo guardado como versi√≥n: {version_path}")

# Guardar tambi√©n como "best_model.pkl"
best_path = os.path.join(save_dir, "best_model.pkl")

with open(best_path, 'wb') as f:
    pickle.dump(model_to_save, f)

print(f"üèÜ Modelo guardado como best_model.pkl")




In [None]:
# ==== CELL 15: Subir imagen para diagn√≥stico (alta resoluci√≥n) y predecir ====
from google.colab import files
print("Sube una imagen para diagn√≥stico (se mostrar√° en alta resoluci√≥n y se generar√° un informe A4 horizontal).")
uploaded = files.upload()

if len(uploaded) > 0:
    fname = list(uploaded.keys())[0]
    img_pil = skio.imread(fname)
    # Mostrar en alta resoluci√≥n
    plt.figure(figsize=(8, 8))
    if img_pil.ndim == 2:
        plt.imshow(img_pil, cmap='gray')
    else:
        plt.imshow(img_pil)
    plt.axis('off')
    plt.title(f'Imagen subida: {fname} (alta resoluci√≥n)')
    plt.show()

    # --- Preprocesamiento robusto (sin romper shapes) ---
    img_np = np.array(img_pil)
    if img_np.ndim == 2:
        img_np = np.stack([img_np, img_np, img_np], axis=-1)
    if img_np.ndim == 4:
        img_np = img_np[0]
    if img_np.ndim != 3:
        raise ValueError(f"Imagen inv√°lida: dimensiones {img_np.shape}, se esperaba (H,W,3)")

    # Resize y preprocess
    img_resized = skresize(img_np, IMAGE_SIZE, anti_aliasing=True)
    img_resized = (img_resized * 255.0).astype('float32')
    img_proc = tf.keras.applications.efficientnet.preprocess_input(img_resized)

    # Asegurar batch dimension
    img_batch = np.expand_dims(img_proc, axis=0).astype('float32')

    # Predicci√≥n CNN fine-tuned (modelo final usado)
    prob_cnn = model_ft(img_batch, training=False).numpy()[0]
    pred_idx_cnn = int(np.argmax(prob_cnn))
    pred_label_cnn = classes[pred_idx_cnn]
    prob_value_cnn = float(prob_cnn[pred_idx_cnn])

    # Predicci√≥n RF sobre embeddings
    emb = feature_extractor.predict(img_batch)
    rf_prob = rf_best.predict_proba(emb)[0]
    pred_idx_rf = int(np.argmax(rf_prob))
    pred_label_rf = classes[pred_idx_rf]
    prob_value_rf = float(rf_prob[pred_idx_rf])

    # Confianza combinada (simple promedio de probabilidades en clase "malignant")
    prob_malignant_combined = (prob_cnn[1] + rf_prob[1]) / 2.0
    prob_benign_combined = 1.0 - prob_malignant_combined

    print(f"\nPredicci√≥n CNN (fine-tuned): {pred_label_cnn} (p={prob_value_cnn:.3f})")
    print(f"Predicci√≥n RF: {pred_label_rf} (p={prob_value_rf:.3f})")
    print(f"Probabilidad combinada malignidad: {prob_malignant_combined:.3f}")

    # ==== CELL 16: Generar diagn√≥stico profesional en A4 horizontal con barra porcentual ====
    out_pdf_path = '/content/drive/MyDrive/diagnostico_tiroides_informe_A4_landscape.pdf'
    hospital_name = "Cl√≠nica de An√°lisis Avanzado de Im√°genes (IA)"
    firma_text = "Especialista"
    doctor_name = "Dr./Dra. Especialista"
    fecha_str = datetime.now().strftime("%Y-%m-%d %H:%M:%S")

    # Crear PDF (A4 landscape)
    W, H = landscape(A4)
    c = canvas.Canvas(out_pdf_path, pagesize=(W, H))
    left_margin = 15 * mm
    right_margin = 15 * mm
    top_margin = 12 * mm
    bottom_margin = 12 * mm

    header_y = H - top_margin
    c.setFont("Helvetica-Bold", 16)
    c.drawCentredString(W / 2, header_y, hospital_name)
    c.setFont("Helvetica", 10)
    c.drawCentredString(W / 2, header_y - 16, "Informe Asistido por IA - An√°lisis de Imagen Tiroidea")
    c.line(left_margin, header_y - 22, W - right_margin, header_y - 22)
    c.setFont("Helvetica", 8)
    c.drawRightString(W - right_margin, header_y - 6, f"Fecha: {fecha_str}")

    img_area_x = left_margin
    img_area_w = (W - left_margin - right_margin) * 0.60
    img_area_y_top = header_y - 40
    img_area_h = H - top_margin - bottom_margin - 40

    try:
        draw_image_keep_aspect(c, img_pil if 'img_pil' in globals() else img_np, img_area_x, img_area_y_top, img_area_w, img_area_h)
    except Exception:
        c.setStrokeColorRGB(0.7, 0.7, 0.7)
        c.rect(img_area_x, img_area_y_top - img_area_h, img_area_w, img_area_h)
        c.setFont("Helvetica-Oblique", 9)
        c.drawString(img_area_x + 6, img_area_y_top - 20, "Imagen no disponible (error al renderizar).")

    text_x = left_margin + img_area_w + 12 * mm
    text_w = W - text_x - right_margin
    text_y_top = img_area_y_top

    c.setFont("Helvetica-Bold", 14)
    c.drawString(text_x, text_y_top, "Informe diagn√≥stico")

    from reportlab.lib.styles import getSampleStyleSheet
    styles = getSampleStyleSheet()
    ps = ParagraphStyle(name='normal', fontName='Helvetica', fontSize=10, leading=12)
    bold_ps = ParagraphStyle(name='bold', fontName='Helvetica-Bold', fontSize=11, leading=13)

    result_html = f"<b>Resultado combinado:</b> Malignidad {prob_malignant_combined * 100:.1f}% / Benigno {(1.0 - prob_malignant_combined) * 100:.1f}%"
    p = Paragraph(result_html, ps)

    cnn_line = f"<b>CNN (fine-tuned):</b> {pred_label_cnn} (p={prob_value_cnn:.3f})"
    rf_line = f"<b>RandomForest (embeddings):</b> {pred_label_rf} (p={prob_value_rf:.3f})"
    p_cnn = Paragraph(cnn_line, ps)
    p_rf = Paragraph(rf_line, ps)

    if prob_malignant_combined >= 0.5:
        suggested_action = ("Sospecha de malignidad: Correlacionar con hallazgos cl√≠nicos y considerar biopsia (FNAB) para confirmaci√≥n histol√≥gica. "
                            "Si FNAB confirma malignidad, referir a equipo oncol√≥gico/cirug√≠a seg√∫n protocolo institucional.")
    else:
        suggested_action = ("Caracter√≠sticas sugestivas de benignidad: recomendar seguimiento ecogr√°fico peri√≥dico y evaluaci√≥n cl√≠nica. "
                            "Si existe crecimiento o s√≠ntomas, considerar FNAB para confirmaci√≥n.")
    p_reco = Paragraph(f"<b>Recomendaci√≥n cl√≠nica:</b> {suggested_action}", ps)

    frame_height = img_area_h
    frame = Frame(text_x, img_area_y_top - frame_height, text_w, frame_height, showBoundary=0)
    story = [p, Paragraph("<br/>", ps), p_cnn, Paragraph("<br/>", ps), p_rf, Paragraph("<br/><br/>", ps), p_reco]
    frame.addFromList(story, c)

    bar_x = text_x
    bar_y = img_area_y_top - frame_height + 14 * mm
    bar_w = text_w
    bar_h = 10 * mm

    c.setFillColorRGB(0.95, 0.95, 0.95)
    c.rect(bar_x, bar_y, bar_w, bar_h, fill=1, stroke=0)

    mal_pct = prob_malignant_combined * 100.0
    ben_pct = 100.0 - mal_pct
    mal_w = bar_w * (mal_pct / 100.0)
    ben_w = bar_w - mal_w

    c.setFillColorRGB(0.82, 0.1, 0.1)  # rojo
    c.rect(bar_x, bar_y, mal_w, bar_h, fill=1, stroke=0)
    c.setFillColorRGB(0.12, 0.55, 0.12)  # verde
    c.rect(bar_x + mal_w, bar_y, ben_w, bar_h, fill=1, stroke=0)

    c.setFont("Helvetica-Bold", 9)
    c.setFillColorRGB(0, 0, 0)
    c.drawString(bar_x, bar_y + bar_h + 4, f"Malignidad: {mal_pct:.1f}%    Benigno: {ben_pct:.1f}%")

    sig_x = W - right_margin - 70 * mm
    sig_y = bottom_margin + 18 * mm
    c.setFont("Times-Italic", 12)
    c.drawString(sig_x, sig_y + 12, firma_text)
    c.setFont("Helvetica", 9)
    c.drawString(sig_x, sig_y - 2, doctor_name)
    c.drawString(sig_x, sig_y - 14, "Especialidad: Radiolog√≠a/Imagen")

    c.setFont("Helvetica-Oblique", 7.5)
    disclaimer = ("Este informe es una ayuda al diagn√≥stico adquirido por un modelo de IA y no reemplaza la evaluaci√≥n cl√≠nica ni el informe histopatol√≥gico. "
                  "Correlacionar con antecedentes y pruebas complementarias.")
    c.drawCentredString(W / 2, bottom_margin, disclaimer)

    c.showPage()
    c.save()
    print(f"\nInforme PDF guardado en: {out_pdf_path}")

    try:
        from IPython.display import IFrame
        display(IFrame(out_pdf_path, width=900, height=500))
    except Exception:
        display(files.download(out_pdf_path))

else:
    print("No se subi√≥ ninguna imagen; por favor vuelve a ejecutar la celda y carga una imagen.")


# ==== (dentro de la misma celda de subida y predicci√≥n) ====
# ... (toda la parte previa que ya ten√≠as: upload, preprocess, predicciones, creaci√≥n y guardado del PDF)
# (aqu√≠ asumimos que ya se calcularon prob_malignant_combined, prob_benign_combined, pred_label_cnn, pred_label_rf, prob_value_cnn, prob_value_rf, img_np, img_pil, out_pdf_path)

# (CONSERVA el c√≥digo que crea y guarda el PDF - ya lo tienes arriba)
# Ahora: ADICIONALMENTE mostrar el diagn√≥stico profesional en la celda (imagen + texto + barra)

try:
    # Prepare left image (resized for notebook)
    disp_img = img_np.copy() if 'img_np' in globals() else (img_pil if 'img_pil' in globals() else None)
    if disp_img is None:
        print("No hay imagen para mostrar inline.")
    else:
        # Convert to uint8 and ensure shape
        disp_img = (np.array(disp_img)).astype('uint8')
        # Create a matplotlib figure similar to PDF layout
        fig = plt.figure(figsize=(11,6))
        gs = fig.add_gridspec(1, 2, width_ratios=[3,2], wspace=0.12)

        # Left: image
        ax0 = fig.add_subplot(gs[0,0])
        ax0.imshow(disp_img)
        ax0.axis('off')
        ax0.set_title(f'Imagen analizada: {fname}' if 'fname' in globals() else 'Imagen analizada')

        # Right: text and bar
        ax1 = fig.add_subplot(gs[0,1])
        ax1.axis('off')

        # Text block
        lines = [
            f"Informe Asistido por IA - Cl√≠nica de An√°lisis Avanzado de Im√°genes (IA)",
            "",
            f"Fecha: {fecha_str if 'fecha_str' in globals() else datetime.now().strftime('%Y-%m-%d %H:%M:%S')}",
            "",
            f"Resultado combinado: Malignidad {prob_malignant_combined*100:.1f}%  /  Benigno {prob_benign_combined*100:.1f}%",
            f"CNN (fine-tuned): {pred_label_cnn} (p={prob_value_cnn:.3f})",
            f"RandomForest (embeddings): {pred_label_rf} (p={prob_value_rf:.3f})",
            "",
            "Recomendaci√≥n cl√≠nica:",
            ( "Sospecha de malignidad: Correlacionar con hallazgos cl√≠nicos y considerar biopsia (FNAB) para confirmaci√≥n histol√≥gica."
              if prob_malignant_combined >= 0.5
              else "Caracter√≠sticas sugestivas de benignidad: recomendar seguimiento ecogr√°fico peri√≥dico y evaluaci√≥n cl√≠nica.")
        ]

        # Render text lines
        y0 = 0.95
        for line in lines:
            ax1.text(0, y0, line, fontsize=10, va='top')
            y0 -= 0.095

        # Draw horizontal percentage bar (malignidad vs benignidad) below text
        bar_ax = fig.add_axes([0.70, 0.10, 0.22, 0.05])  # relative coords (adjust to fit)
        bar_ax.barh([0], [prob_malignant_combined*100], height=0.6)
        bar_ax.barh([0], [prob_benign_combined*100], left=[prob_malignant_combined*100], height=0.6, color='green')
        bar_ax.set_xlim(0,100)
        bar_ax.set_yticks([])
        bar_ax.set_xticks([0,25,50,75,100])
        bar_ax.set_xlabel('Porcentaje (%) - Malignidad (izq) / Benigno (der)')
        bar_ax.text(prob_malignant_combined*100 + 1, 0, f"{prob_malignant_combined*100:.1f}% maligno", va='center')

        plt.suptitle("Diagn√≥stico profesional (visualizado en notebook y guardado en PDF)", fontsize=12, weight='bold')
        plt.show()

except Exception as e:
    print("Error mostrando diagn√≥stico inline:", e)

# Finalmente mostrar enlace / IFrame al PDF guardado (ya lo ten√≠as)
try:
    from IPython.display import IFrame
    display(IFrame(out_pdf_path, width=900, height=500))
except Exception:
    display(files.download(out_pdf_path))


In [None]:
# ==== CELL 17: Notas finales ====
print("\nNotas finales:")
print("- Se presenta la matriz de confusi√≥n del CNN (balanceada 1:1) con colores por cuadrante.")
print("- La matriz de correlaci√≥n muestra pesos (valores num√©ricos) en cada celda.")
print("- Se aplic√≥ fine-tuning simple al CNN y GridSearch al RF; cuadro antes/despu√©s mostrado.")
print("- El informe profesional se genera en A4 horizontal (guardado como PDF en Drive).")

