In [13]:
import tensorflow as tf
from tensorflow.keras import layers, models
from tensorflow.keras.optimizers import Adam

def unet_model(input_size=(224, 224, 3)):
    inputs = layers.Input(input_size)
    
    # Codificador (Encoder)
    c1 = layers.Conv2D(64, (3, 3), activation='relu', padding='same')(inputs)
    c1 = layers.Conv2D(64, (3, 3), activation='relu', padding='same')(c1)
    p1 = layers.MaxPooling2D((2, 2))(c1)

    c2 = layers.Conv2D(128, (3, 3), activation='relu', padding='same')(p1)
    c2 = layers.Conv2D(128, (3, 3), activation='relu', padding='same')(c2)
    p2 = layers.MaxPooling2D((2, 2))(c2)

    c3 = layers.Conv2D(256, (3, 3), activation='relu', padding='same')(p2)
    c3 = layers.Conv2D(256, (3, 3), activation='relu', padding='same')(c3)
    p3 = layers.MaxPooling2D((2, 2))(c3)

    c4 = layers.Conv2D(512, (3, 3), activation='relu', padding='same')(p3)
    c4 = layers.Conv2D(512, (3, 3), activation='relu', padding='same')(c4)
    p4 = layers.MaxPooling2D((2, 2))(c4)

    c5 = layers.Conv2D(1024, (3, 3), activation='relu', padding='same')(p4)
    c5 = layers.Conv2D(1024, (3, 3), activation='relu', padding='same')(c5)

    # Decodificador (Decoder)
    u6 = layers.UpSampling2D((2, 2))(c5)
    u6 = layers.Concatenate()([u6, c4])
    c6 = layers.Conv2D(512, (3, 3), activation='relu', padding='same')(u6)
    c6 = layers.Conv2D(512, (3, 3), activation='relu', padding='same')(c6)

    u7 = layers.UpSampling2D((2, 2))(c6)
    u7 = layers.Concatenate()([u7, c3])
    c7 = layers.Conv2D(256, (3, 3), activation='relu', padding='same')(u7)
    c7 = layers.Conv2D(256, (3, 3), activation='relu', padding='same')(c7)

    u8 = layers.UpSampling2D((2, 2))(c7)
    u8 = layers.Concatenate()([u8, c2])
    c8 = layers.Conv2D(128, (3, 3), activation='relu', padding='same')(u8)
    c8 = layers.Conv2D(128, (3, 3), activation='relu', padding='same')(c8)

    u9 = layers.UpSampling2D((2, 2))(c8)
    u9 = layers.Concatenate()([u9, c1])
    c9 = layers.Conv2D(64, (3, 3), activation='relu', padding='same')(u9)
    c9 = layers.Conv2D(64, (3, 3), activation='relu', padding='same')(c9)

    # Capa de salida (máscara binaria)
    outputs = layers.Conv2D(1, (1, 1), activation='sigmoid')(c9)  # Salida binaria (mano o fondo)

    model = models.Model(inputs, outputs)
    
    model.compile(optimizer=Adam(), loss='binary_crossentropy', metrics=['accuracy'])

    return model

# Crear el modelo
model = unet_model()

# Resumen del modelo
model.summary()


Model: "model_6"
__________________________________________________________________________________________________
 Layer (type)                Output Shape                 Param #   Connected to                  
 input_7 (InputLayer)        [(None, 224, 224, 3)]        0         []                            
                                                                                                  
 conv2d_114 (Conv2D)         (None, 224, 224, 64)         1792      ['input_7[0][0]']             
                                                                                                  
 conv2d_115 (Conv2D)         (None, 224, 224, 64)         36928     ['conv2d_114[0][0]']          
                                                                                                  
 max_pooling2d_24 (MaxPooli  (None, 112, 112, 64)         0         ['conv2d_115[0][0]']          
 ng2D)                                                                                      

In [14]:
import cv2
import numpy as np

# Cargar la imagen
image_path = '/home/ale/Documentos/PID/genero_manos/dataset/image/train/Hand_0000003.jpg'
image = cv2.imread(image_path)

# Redimensionar la imagen a 224x224 (o el tamaño que use tu modelo)
image_resized = cv2.resize(image, (224, 224))

# Normalizar la imagen si es necesario (es importante asegurarse de que la imagen esté en el rango adecuado)
image_normalized = image_resized.astype('float32') / 255.0

# Añadir la dimensión del batch (1, 224, 224, 3)
image_batch = np.expand_dims(image_normalized, axis=0)

# Hacer la predicción con el modelo U-Net
segmentation_output = model.predict(image_batch)

# Ver la salida del modelo antes de umbralizarla
print(segmentation_output[0])  # Inspeccionar los valores de la salida

# Ver el resultado (usualmente una máscara binaria)
segmentation_mask = (segmentation_output[0] > 0.5).astype(np.uint8)  # Umbral para obtener la máscara binaria

# Guardar o mostrar la máscara generada
mask_path = "Hand_0000003_mask.png"
cv2.imwrite(mask_path, segmentation_mask * 255)  # Multiplicamos por 255 para la visualización
cv2.imshow("Segmentation Mask", segmentation_mask * 255)  # Mostrar la máscara (en blanco y negro)
cv2.waitKey(0)
cv2.destroyAllWindows()

[[[0.4928369 ]
  [0.495609  ]
  [0.49395376]
  ...
  [0.49340445]
  [0.50069714]
  [0.5044775 ]]

 [[0.48831594]
  [0.4911253 ]
  [0.49275926]
  ...
  [0.4899694 ]
  [0.49470422]
  [0.49084032]]

 [[0.48428336]
  [0.49932358]
  [0.49640414]
  ...
  [0.49567422]
  [0.49840173]
  [0.49553105]]

 ...

 [[0.48235312]
  [0.49139673]
  [0.48484084]
  ...
  [0.49094924]
  [0.49337214]
  [0.48690045]]

 [[0.49029508]
  [0.49668542]
  [0.49689272]
  ...
  [0.50125086]
  [0.49599266]
  [0.49272853]]

 [[0.48698977]
  [0.48614514]
  [0.48252997]
  ...
  [0.48819333]
  [0.48993576]
  [0.4951504 ]]]
