# Preprocesado

## Dependencias y Constantes

In [1]:
%pip install matplotlib numpy tensorflow-cpu opencv-python scikit-learn

Note: you may need to restart the kernel to use updated packages.


In [2]:
import os
import shutil

import cv2
import numpy as np
import torchvision.transforms as transforms

from PIL import ImageFilter, Image
from torchvision.utils import save_image

In [3]:
IMAGE_DIR = "../storage/raw/blood-cell"
SEGMENTER_DIR = "../storage/clean/blood_cell/segmenter"
TRANSFORMER_DIR = "../storage/clean/blood_cell/transformer"

## Cargar imagenes

In [4]:
min_side = 99999999
count_load = 0

filenames = []
for filename in os.listdir(IMAGE_DIR):
    image_path = os.path.join(IMAGE_DIR, filename)
    
    try:
        pil_image = Image.open(image_path).convert("RGB")

        min_side = min(min(pil_image.size), min_side)
        count_load += 1

        filenames.append(filename)
    except:
        print(f"Error al cargar la imagen {filename}")
        continue

print(f"{count_load} imágenes cargadas.")
print(f"Dimensión más pequeña {min_side}")

5000 imágenes cargadas.
Dimensión más pequeña 360


## Transformer

In [5]:
transform = transforms.Compose([
    transforms.Lambda(lambda img: img.filter(ImageFilter.EDGE_ENHANCE_MORE)),
    transforms.Resize(min_side),
    transforms.CenterCrop(min_side),
    transforms.ToTensor(),
    transforms.Normalize(mean=[0.485, 0.456, 0.406], std=[0.229, 0.224, 0.225]),
])

In [6]:
if os.path.exists(TRANSFORMER_DIR):
    shutil.rmtree(TRANSFORMER_DIR)

os.makedirs(TRANSFORMER_DIR)

for filename in filenames:
    image_path = os.path.join(IMAGE_DIR, filename)
    pil_image = Image.open(image_path).convert("RGB")

    image = transform(pil_image)
    image = image.detach().cpu()
    save_image(image, os.path.join(TRANSFORMER_DIR, filename))

## Segmentación

In [7]:
def create_rectangular_mask(shape, center, width, height):
    mask = np.zeros(shape, dtype=np.uint8)
    cX, cY = center
    x1 = max(cX - width // 2, 0)
    y1 = max(cY - height // 2, 0)
    x2 = min(cX + width // 2, shape[1])
    y2 = min(cY + height // 2, shape[0])
    mask[y1:y2, x1:x2] = 255
    return mask

### Con Kmeans

In [8]:
if os.path.exists(SEGMENTER_DIR):
    shutil.rmtree(SEGMENTER_DIR)

os.makedirs(SEGMENTER_DIR)

K = 2
masks = []
for filename in filenames:
    image_path = os.path.join(TRANSFORMER_DIR, filename)
    pil_image = Image.open(image_path)

    image = np.array(pil_image)
    image = cv2.cvtColor(image, cv2.COLOR_RGB2LAB)

    Z = image.reshape((-1, 3))
    Z = np.float32(Z)

    criteria = (cv2.TERM_CRITERIA_EPS + cv2.TERM_CRITERIA_MAX_ITER, 10, 1.0)
    _, labels, _ = cv2.kmeans(Z, K, None, criteria, 10, cv2.KMEANS_RANDOM_CENTERS)

    labels = labels.flatten().reshape(image.shape[:2])

    cluster_means = [np.mean(image[labels == i, 0]) for i in range(K)]
    cell_cluster = np.argmin(cluster_means)

    mask = np.uint8((labels == cell_cluster) * 255)

    kernel = np.ones((10, 10), np.uint8)
    mask = cv2.dilate(mask, kernel, iterations=1)

    contours, _ = cv2.findContours(mask, cv2.RETR_EXTERNAL, cv2.CHAIN_APPROX_SIMPLE)
    if contours:
        largest_contour = max(contours, key=cv2.contourArea)
        M = cv2.moments(largest_contour)
        if M["m00"] != 0:
            cX = int(M["m10"] / M["m00"])
            cY = int(M["m01"] / M["m00"])
        else:
            cX, cY = 0, 0

        mask = create_rectangular_mask(mask.shape, (cX, cY), 230, 230)

    img_bgr = cv2.cvtColor(np.array(pil_image), cv2.COLOR_RGB2BGR)
    result = cv2.bitwise_and(img_bgr, img_bgr, mask=mask)
    result = cv2.cvtColor(result, cv2.COLOR_BGR2RGB)

    masks.append((filename, mask))
    Image.fromarray(result).save(os.path.join(SEGMENTER_DIR, filename))

#### Calcular entropia

In [9]:
mean = 0
for filename, mask_img in masks:
    mask = np.array(mask_img)
    mask_binary = (mask > 128).astype(np.uint8)

    values, counts = np.unique(mask_binary, return_counts=True)
    total = counts.sum()
    probabilities = counts / total

    entropy = -np.sum(probabilities * np.log(probabilities + 1e-10))
    mean += entropy

    print(f"Entropy for {filename}: {entropy:.5f}")

print(f"mean: {(mean/len(masks)):.5f}\n")

Entropy for BA_145728.jpg: 0.67619
Entropy for BA_34424.jpg: 0.67619
Entropy for MYO_1397.jpg: 0.67619
Entropy for ERB_506873.jpg: 0.67619
Entropy for BA_363451.jpg: 0.67619
Entropy for MO_998662.jpg: 0.67619
Entropy for MYO_0362.jpg: 0.67619
Entropy for ERB_312715.jpg: 0.67619
Entropy for MYO_0315.jpg: 0.67619
Entropy for MYO_1225.jpg: 0.67619
Entropy for BA_362723.jpg: 0.67619
Entropy for NGS_6355.jpg: 0.67619
Entropy for MYO_1667.jpg: 0.67619
Entropy for BA_412942.jpg: 0.67619
Entropy for MYO_1612.jpg: 0.67619
Entropy for MYO_1802.jpg: 0.67619
Entropy for NGS_3358.jpg: 0.67619
Entropy for BA_6162.jpg: 0.67619
Entropy for NGS_6809.jpg: 0.67619
Entropy for BA_642041.jpg: 0.67619
Entropy for BA_99303.jpg: 0.67619
Entropy for ERB_425990.jpg: 0.67619
Entropy for ERB_353758.jpg: 0.67619
Entropy for MO_250099.jpg: 0.67619
Entropy for ERB_256257.jpg: 0.67619
Entropy for ERB_54236.jpg: 0.63170
Entropy for BA_709347.jpg: 0.67619
Entropy for BA_260100.jpg: 0.67619
Entropy for NGS_3466.jpg: 0.6

### Con cv2

In [10]:
if os.path.exists(SEGMENTER_DIR):
    shutil.rmtree(SEGMENTER_DIR)

os.makedirs(SEGMENTER_DIR)

masks = []
for filename in filenames:
    image_path = os.path.join(TRANSFORMER_DIR, filename)
    pil_image = Image.open(image_path)

    img = np.array(pil_image)
    img = cv2.cvtColor(img, cv2.COLOR_RGB2BGR)

    gray = cv2.cvtColor(img, cv2.COLOR_BGR2GRAY)
    blur = cv2.GaussianBlur(gray, (5, 5), 0)
    _, thresh = cv2.threshold(blur, 0, 255, cv2.THRESH_BINARY_INV + cv2.THRESH_OTSU)

    contours, _ = cv2.findContours(thresh, cv2.RETR_EXTERNAL, cv2.CHAIN_APPROX_SIMPLE)

    mask = np.zeros_like(gray)
    rectangular_mask = np.zeros_like(gray)
    if contours:
        largest_contour = max(contours, key=cv2.contourArea)
        cv2.drawContours(mask, [largest_contour], -1, 255, thickness=-1)

        kernel = np.ones((10, 10), np.uint8)
        mask = cv2.dilate(mask, kernel, iterations=1)

        M = cv2.moments(largest_contour)
        if M["m00"] != 0:
            cX = int(M["m10"] / M["m00"])
            cY = int(M["m01"] / M["m00"])
        else:
            cX, cY = 0, 0

        mask = create_rectangular_mask(mask.shape, (cX, cY), 230, 230)

    segmented = cv2.bitwise_and(img, img, mask=mask)
    segmented_rgb = cv2.cvtColor(segmented, cv2.COLOR_BGR2RGB)

    masks.append((filename, mask))
    Image.fromarray(segmented_rgb).save(os.path.join(SEGMENTER_DIR, filename))

#### Calcular entropia

In [11]:
mean = 0
for filename, mask_img in masks:
    mask = np.array(mask_img)
    mask_binary = (mask > 128).astype(np.uint8)

    values, counts = np.unique(mask_binary, return_counts=True)
    total = counts.sum()
    probabilities = counts / total

    entropy = -np.sum(probabilities * np.log(probabilities + 1e-10))
    mean += entropy

    print(f"Entropy for {filename}: {entropy:.5f}")

print(f"mean: {(mean/len(masks)):.5f}\n")

Entropy for BA_145728.jpg: 0.67619
Entropy for BA_34424.jpg: 0.67619
Entropy for MYO_1397.jpg: 0.67619
Entropy for ERB_506873.jpg: 0.67619
Entropy for BA_363451.jpg: 0.67619
Entropy for MO_998662.jpg: 0.67619
Entropy for MYO_0362.jpg: 0.67619
Entropy for ERB_312715.jpg: 0.67619
Entropy for MYO_0315.jpg: 0.67619
Entropy for MYO_1225.jpg: 0.67619
Entropy for BA_362723.jpg: 0.67619
Entropy for NGS_6355.jpg: 0.67619
Entropy for MYO_1667.jpg: 0.67619
Entropy for BA_412942.jpg: 0.67619
Entropy for MYO_1612.jpg: 0.67619
Entropy for MYO_1802.jpg: 0.67619
Entropy for NGS_3358.jpg: 0.67619
Entropy for BA_6162.jpg: 0.67619
Entropy for NGS_6809.jpg: 0.67619
Entropy for BA_642041.jpg: 0.67619
Entropy for BA_99303.jpg: 0.67619
Entropy for ERB_425990.jpg: 0.67619
Entropy for ERB_353758.jpg: 0.67619
Entropy for MO_250099.jpg: 0.67619
Entropy for ERB_256257.jpg: 0.67619
Entropy for ERB_54236.jpg: 0.67619
Entropy for BA_709347.jpg: 0.67619
Entropy for BA_260100.jpg: 0.67619
Entropy for NGS_3466.jpg: 0.6