In [None]:
from google.colab import drive
drive.mount('/content/drive')

Mounted at /content/drive


# Using paddleocr

In [None]:
!pip install paddlepaddle
!pip install paddleocr==2.10.0

In [None]:
import os
!pip install segmentation_models==1.0.1
!pip install simple-lama-inpainting
os._exit(0)
!pip uninstall -y numpy
!pip install numpy==1.25.2
os._exit(0)

In [None]:
!unzip "<contact_to_our_corresponding_email>" -d /content/

In [None]:
import cv2
import numpy as np
import os
import gc
import matplotlib.pyplot as plt
from PIL import Image
from simple_lama_inpainting import SimpleLama
import time

os.environ["CUDA_VISIBLE_DEVICES"] = "-1"  # Hide all GPUs
os.environ["TF_CPP_MIN_LOG_LEVEL"] = "2"  # Reduce TensorFlow logging
os.environ["FLAGS_fraction_of_gpu_memory_to_use"] = "0.0"  # Don't use GPU memory for Paddle
os.environ["SM_FRAMEWORK"] = "tf.keras"  # For segmentation models

import tensorflow as tf
import paddle
import segmentation_models as sm

paddle.set_device('cpu')
sm.set_framework('tf.keras')

from paddleocr import PaddleOCR

VIS_DIR = "pipeline_viz"

IMAGE_SIZE = 256
LEFT_CUT_SIZE = 0.235
RIGHT_CUT_SIZE = 0.945

simple_lama = SimpleLama()

class EndoscopyImageProcessor:
    def __init__(self):
        base_dir = os.getcwd()

        # Create output directories structure
        masks_root = os.path.join(base_dir, "masks")
        os.makedirs(masks_root, exist_ok=True)

        self.output_folder_masks = {
            "highlight": os.path.join(masks_root, "highlight_masks"),
            "instrument": os.path.join(masks_root, "instrument_masks"),
            "textbox":    os.path.join(masks_root, "textbox_masks"),
            "combined":   os.path.join(masks_root, "combined_masks"),
            "black_frame":os.path.join(masks_root, "black_frame_masks")
        }

        self.output_folder_processed = os.path.join(base_dir, "processed_images")
        self.output_folder_comparison = os.path.join(base_dir, "comparison_images")

        # Create all directories
        for folder in self.output_folder_masks.values():
            os.makedirs(folder, exist_ok=True)
        os.makedirs(self.output_folder_processed, exist_ok=True)
        os.makedirs(self.output_folder_comparison, exist_ok=True)

        self.ocr_engine = None
        self.instrument_model = None

        self.simple_lama = SimpleLama()

    def load_ocr_engine(self):
        """Lazy loading for OCR engine on CPU"""
        if self.ocr_engine is None:

            if hasattr(tf.keras.backend, 'clear_session'):
                tf.keras.backend.clear_session()
            gc.collect()

            self.ocr_engine = PaddleOCR(
                use_angle_cls=True,   # Disable angle classifier to avoid warnings
                lang='en',
                show_log=False
            )

    def load_instrument_detector(self):
        """
        Lazy-load model, khắc phục lỗi 'groups' và tránh compile.
        """
        if self.instrument_model is not None:
            return

        gc.collect()

        def dice_coef(y_true, y_pred):
            y_true_f = tf.keras.backend.flatten(y_true)
            y_pred_f = tf.keras.backend.flatten(y_pred)
            inter    = tf.keras.backend.sum(y_true_f * y_pred_f)
            return (2. * inter + 1) / (tf.keras.backend.sum(y_true_f) +
                                      tf.keras.backend.sum(y_pred_f) + 1)

        def dice_coef_loss(y_true, y_pred):
            return -dice_coef(y_true, y_pred)

        from tensorflow.keras.layers import Conv2DTranspose as KConv2DT

        class Conv2DTransposeCompat(KConv2DT):
            def __init__(self, *args, groups=1, **kwargs):
                super().__init__(*args, **kwargs)

            @classmethod
            def from_config(cls, config):
                config.pop("groups", None)
                return super().from_config(config)

        model_path = "<contact_to_our_corresponding_email>"
        if not os.path.exists(model_path):
            raise FileNotFoundError(f"Không tìm thấy model: {model_path}")

        from tensorflow.keras.models import load_model
        with tf.device('/CPU:0'):
          self.instrument_model = load_model(
              model_path,
              custom_objects={
                  "dice_coef":        dice_coef,
                  "dice_coef_loss":   dice_coef_loss,
                  "Conv2DTranspose":  Conv2DTransposeCompat
              },
              compile=False            # <-- quan trọng
          )
          self.instrument_model.make_predict_function()

    def get_outliers(self, data, m=17.):
        """Find outliers in data using median absolute deviation"""
        if len(data) == 0:
            return np.array([])

        data = np.array(data)
        d = np.abs(data - np.median(data))
        mdev = np.median(d)

        if mdev == 0:
            return np.array([])

        s = d / mdev
        return data[s >= m]

    def create_highlight_mask(self, image):
        grey = cv2.cvtColor(image.copy(), cv2.COLOR_BGR2GRAY)
        _, thresh = cv2.threshold(grey, 220, 255, cv2.THRESH_BINARY)

        thresh_dilated = cv2.dilate(thresh.copy(), None, iterations=3)

        contours, _ = cv2.findContours(thresh_dilated.copy(), cv2.RETR_LIST, cv2.CHAIN_APPROX_SIMPLE)

        if not contours:
            return np.zeros_like(grey)

        a0 = [cv2.contourArea(contour) for contour in contours]
        outlier_areas = [i for i in self.get_outliers(a0) if i > 7500]
        mask_after_removal = thresh_dilated.copy()

        if outlier_areas:
            outlier_indices = [a0.index(i) for i in outlier_areas if i in a0]
            for i in outlier_indices:
                if 0 <= i < len(contours):
                    mask_after_removal = cv2.drawContours(mask_after_removal, [contours[i]], -1, (0, 0, 0), -1)

        mask_eroded = cv2.erode(mask_after_removal.copy(), None, iterations=3)
        final_mask = cv2.dilate(mask_eroded.copy(), None, iterations=2)

        return final_mask

    def create_instrument_mask(self, image):
        """Create instrument mask from input image"""
        self.load_instrument_detector()
        model = self.instrument_model

        input_h, input_w = 256, 256  # Based on the model definition

        img_rgb = cv2.cvtColor(image, cv2.COLOR_BGR2RGB)
        img_resized = cv2.resize(img_rgb, (input_w, input_h), interpolation=cv2.INTER_LINEAR)

        img_normalized = img_resized / 255.0

        img_input = np.expand_dims(img_normalized, axis=0)  # shape: (1, 256, 256, 3)
        # Predict mask
        prediction = model.predict_on_batch(img_input)

        predicted_mask = np.mean(prediction[0], axis=2)

        # Apply thresholding to get binary mask
        binary_mask = (predicted_mask > 0.5).astype(np.float32)

        # Resize mask back to original image dimensions
        mask_final = cv2.resize(binary_mask, (image.shape[1], image.shape[0]),
                              interpolation=cv2.INTER_LINEAR)

        # Convert to 8-bit format for visualization
        return (mask_final * 255).astype(np.uint8)

    def create_textbox_mask(self, image_path):
        """Create text mask using PaddleOCR"""
        # Memory cleanup before OCR
        if hasattr(tf.keras.backend, 'clear_session'):
            tf.keras.backend.clear_session()
        gc.collect()

        # Ensure OCR engine is loaded
        self.load_ocr_engine()

        # Read image
        img = cv2.imread(image_path)
        if img is None:
            return np.zeros((100, 100), dtype=np.uint8)

        # Create empty mask with same dimensions as image
        mask = np.zeros(img.shape[:2], dtype=np.uint8)

        # Run OCR detection
        result = self.ocr_engine.ocr(img, cls=True)

        # Check for valid results
        if result is None or len(result) == 0 or not result[0]:
            return mask  # Return empty mask if no text is detected

        # Process detected text regions
        for line in result[0]:
            if line and len(line) > 1:
                bbox = line[0]  # Get the bounding box of the text

                # Validate bbox structure and draw the polygon
                if len(bbox) == 4 and all(isinstance(pt, (list, tuple)) and len(pt) == 2 for pt in bbox):
                    pts = np.array(bbox, dtype=np.int32)
                    cv2.fillPoly(mask, [pts], 255)  # Fill the detected text area in the mask

        # Memory cleanup after OCR
        gc.collect()

        return mask

    def detect_green_box(self, image):
        hsv_image = cv2.cvtColor(image, cv2.COLOR_BGR2HSV)
        lower_green = np.array([35, 50, 40])
        upper_green = np.array([85, 255, 255])

        # Create mask for green color range
        mask = cv2.inRange(hsv_image, lower_green, upper_green)

        # Morphological operations to clean the mask
        kernel = np.ones((5, 5), np.uint8)
        mask = cv2.morphologyEx(mask, cv2.MORPH_OPEN, kernel)

        return mask

    def combine_masks(self, highlight_mask, instrument_mask, textbox_mask, green_box_mask=None):
        h, w = highlight_mask.shape[:2]

        instrument_mask_resized = cv2.resize(instrument_mask, (w, h))
        textbox_mask_resized = cv2.resize(textbox_mask, (w, h))

        exclude_mask = cv2.bitwise_or(instrument_mask_resized, textbox_mask_resized)

        if green_box_mask is not None:
            green_box_resized = cv2.resize(green_box_mask, (w, h))
            exclude_mask = cv2.bitwise_or(exclude_mask, green_box_resized)

        combined_mask = cv2.bitwise_and(highlight_mask, cv2.bitwise_not(exclude_mask))

        return combined_mask

    # === IMAGE PROCESSING FUNCTIONS ===

    def simple_inpaint_highlight(self, image, mask, method=cv2.INPAINT_TELEA):
        blur = np.copy(image)
        for i in range(20):
            mask = cv2.GaussianBlur(mask, (3, 3), 3)
            blur = cv2.GaussianBlur(blur, (3, 3), 3)
        image[mask > 0] = blur[mask > 0]
        return cv2.inpaint(image, mask, 3, flags=cv2.INPAINT_NS)

    def simple_inpaint_border(self, image, mask,
                              morph_k=15, dilate_k=31, dilate_iter=1, feather_k=31):
        # 1) Chuẩn bị image RGB và mask nhị phân
        img_rgb = cv2.cvtColor(image, cv2.COLOR_BGR2RGB)
        h, w = img_rgb.shape[:2]
        mask_gray = cv2.cvtColor(mask, cv2.COLOR_BGR2GRAY) \
                    if mask.ndim==3 else mask
        _, mask_bin = cv2.threshold(mask_gray, 127, 255, cv2.THRESH_BINARY)
        mask_bin = cv2.resize(mask_bin, (w, h), interpolation=cv2.INTER_NEAREST)

        # 2) Morphological closing (điền kín các góc lõm)
        ellip = cv2.getStructuringElement(cv2.MORPH_ELLIPSE, (morph_k, morph_k))
        mask_closed = cv2.morphologyEx(mask_bin, cv2.MORPH_CLOSE, ellip)

        # 3) Dilate để “ăn sâu” thêm vào trong
        kernel = cv2.getStructuringElement(cv2.MORPH_ELLIPSE, (dilate_k, dilate_k))
        mask_dilated = cv2.dilate(mask_closed, kernel, iterations=dilate_iter)

        # 4) Inpaint với Simple-LaMa
        pil_img  = Image.fromarray(img_rgb)
        pil_mask = Image.fromarray(mask_dilated)
        inpainted_pil = self.simple_lama(pil_img, pil_mask)
        inpainted_rgb = np.array(inpainted_pil)
        if inpainted_rgb.shape[:2] != (h, w):
            inpainted_rgb = cv2.resize(inpainted_rgb, (w, h), interpolation=cv2.INTER_LINEAR)

        # 5) Tạo soft mask và blend
        mask_f = mask_dilated.astype(np.float32) / 255.0
        soft = cv2.GaussianBlur(mask_f, (feather_k, feather_k), 0)
        soft = soft[...,None]      # shape (h,w,1)
        result_rgb = (img_rgb*(1-soft) + inpainted_rgb*soft).astype(np.uint8)

        # 6) Trả về BGR
        return cv2.cvtColor(result_rgb, cv2.COLOR_RGB2BGR)

    def restore_text_without_black(self, processed_img, original_img, text_mask):

        result = processed_img.copy()

        original_hsv = cv2.cvtColor(original_img, cv2.COLOR_BGR2HSV)
        v_channel = original_hsv[:, :, 2]  # Value channel in HSV

        non_black_mask = (v_channel > 30) & (text_mask > 0)

        result[non_black_mask] = original_img[non_black_mask]

        return result

    # === BLACK FRAME DETECTION AND REMOVAL ===

    def process_border_width(self, c, c1):
        """Calculate border width based on two detections"""
        if c1 <= 1 and c <= 1:
            return 5
        return c1 - c + 6

    def get_border_width(self, thresh0):
        thresh1 = thresh0
        tmpw1 = thresh1[int(thresh1.shape[0] * 0.5):int(thresh1.shape[0] * 0.6), :]
        tmph1 = thresh1[:, int(thresh1.shape[1] * 0.25):int(thresh1.shape[1] * 0.75)]
        cl1 = [i for i in range(tmpw1.shape[1]) if 0 in tmpw1[:, i]][0]
        cr1 = tmpw1.shape[1] - [i for i in range(tmpw1.shape[1]) if 0 in tmpw1[:, i]][-1]
        ct1 = [i for i in range(tmph1.shape[0]) if 0 in tmph1[i, :]][0]
        cb1 = tmph1.shape[0] - [i for i in range(tmph1.shape[0]) if 0 in tmph1[i, :]][-1]


        thresh1 = cv2.erode(thresh0, None, iterations=5)
        tmpw = thresh1[int(thresh1.shape[0] * 0.5):int(thresh1.shape[0] * 0.6), :]
        tmph = thresh1[:, int(thresh1.shape[1] * 0.25):int(thresh1.shape[1] * 0.75)]
        cl = [i for i in range(tmpw.shape[1]) if 0 in tmpw[:, i]][0]
        cr = tmpw.shape[1] - [i for i in range(tmpw.shape[1]) if 0 in tmpw[:, i]][-1]
        ct = [i for i in range(tmph.shape[0]) if 0 in tmph[i, :]][0]
        cb = tmph.shape[0] - [i for i in range(tmph.shape[0]) if 0 in tmph[i, :]][-1]

        # Process border widths
        cl += self.process_border_width(cl, cl1)
        cr += self.process_border_width(cr, cr1) + 2
        ct += self.process_border_width(ct, ct1)
        cb += self.process_border_width(cb, cb1)

        return int(cl), int(cr), int(ct), int(cb), thresh1

    def get_corner_width(self, thresh0):
        diag1 = np.diag(thresh0)
        diag2 = np.diag(np.fliplr(thresh0))
        ctl = [i for i in range(len(diag1)) if diag1[i] == 0][0]
        cbr = len(diag1) - [i for i in range(len(diag1)) if diag1[i] == 0][-1]
        ctr = [i for i in range(len(diag2)) if diag2[i] == 0][0]
        cbl = len(diag2) - [i for i in range(len(diag2)) if diag2[i] == 0][-1]
        return ctl, ctr, cbl, cbr

    def preprocess_border_mask1(self, image, thresh0):
        FLAG = 0

        cl, cr, ct, cb, thresh1 = self.get_border_width(thresh0)

        # Tạo hình chữ nhật trắng trên nền đen
        rectangle = 255 * np.ones(thresh0.shape, dtype="uint8")
        cv2.rectangle(rectangle, (cl, ct), (thresh0.shape[1] - cr, thresh0.shape[0] - cb), 0, -1)
        # Tạo hình oval trắng trên nền đen
        oval = 255 * np.ones(thresh0.shape, dtype="uint8")
        center = (int((cl + thresh0.shape[1] - cr) / 2), int((ct + thresh0.shape[0] - cb) / 2))
        axes = (int((thresh0.shape[1] - cl - cr) / 2 * 1.08), int((thresh0.shape[0] - ct - cb) / 2 * 1.18))
        cv2.ellipse(oval, center, axes, 0, 0, 360, 0, -1)

        # Kết hợp hình chữ nhật và oval
        border_mask = cv2.bitwise_or(rectangle, oval)
        try:
            ctl, ctr, cbl, cbr = self.get_corner_width(thresh0)
        except IndexError:
            ctl, ctr, cbl, cbr = 0, 0, 0, 0

        white_rect_width = thresh0.shape[1] - cl - cr
        white_rect_height = thresh0.shape[0] - ct - cb
        white_rect_area = white_rect_width * white_rect_height
        black_rect_size = (border_mask.shape[0] // 3, border_mask.shape[1] // 3)
        triangle_area = white_rect_area / 19
        triangle_base_height = int(np.sqrt(2 * triangle_area))
        triangle_area2 = white_rect_area / 16
        triangle_base_height2 = int(np.sqrt(2 * triangle_area2))
        triangle_area3 = white_rect_area / 13
        triangle_base_height3 = int(np.sqrt(2 * triangle_area3))
        triangle_area4 = white_rect_area / 10
        triangle_base_height4 = int(np.sqrt(2 * triangle_area4))
        # Góc trên trái
        if ctl > 0 and ctl < 36 and border_mask[ctl, ctl] == 0:
            pts = np.array([[0, 0], [triangle_base_height, 0], [0, triangle_base_height]])
            cv2.fillPoly(border_mask, [pts], 255)
        elif ctl >= 36 and ctl < 46 and border_mask[ctl, ctl] == 0:
            pts = np.array([[0, 0], [triangle_base_height2, 0], [0, triangle_base_height2]])
            cv2.fillPoly(border_mask, [pts], 255)
        elif ctl >= 46 and ctl < 55 and border_mask[ctl, ctl] == 0:
            pts = np.array([[0, 0], [triangle_base_height3, 0], [0, triangle_base_height3]])
            cv2.fillPoly(border_mask, [pts], 255)
        elif ctl >= 55 and border_mask[ctl, ctl] == 0:
            pts = np.array([[0, 0], [triangle_base_height4, 0], [0, triangle_base_height4]])
            cv2.fillPoly(border_mask, [pts], 255)
        # Góc trên phải
        if ctr > 0 and ctr < 36 and border_mask[ctr, border_mask.shape[1] - ctr - 1] == 0:
            pts = np.array([[border_mask.shape[1] - 1, 0], [border_mask.shape[1] - 1 - triangle_base_height, 0], [border_mask.shape[1] - 1, triangle_base_height]])
            cv2.fillPoly(border_mask, [pts], 255)
        elif ctr >= 36 and ctr < 46 and border_mask[ctr, border_mask.shape[1] - ctr - 1] == 0:
            pts = np.array([[border_mask.shape[1] - 1, 0], [border_mask.shape[1] - 1 - triangle_base_height2, 0], [border_mask.shape[1] - 1, triangle_base_height2]])
            cv2.fillPoly(border_mask, [pts], 255)
        elif ctr >= 46 and ctr < 55 and border_mask[ctr, border_mask.shape[1] - ctr - 1] == 0:
            pts = np.array([[border_mask.shape[1] - 1, 0], [border_mask.shape[1] - 1 - triangle_base_height3, 0], [border_mask.shape[1] - 1, triangle_base_height3]])
            cv2.fillPoly(border_mask, [pts], 255)
        elif ctr >= 55 and border_mask[ctr, border_mask.shape[1] - ctr - 1] == 0:
            pts = np.array([[border_mask.shape[1] - 1, 0], [border_mask.shape[1] - 1 - triangle_base_height4, 0], [border_mask.shape[1] - 1, triangle_base_height4]])
            cv2.fillPoly(border_mask, [pts], 255)
        # Góc dưới trái
        if cbl > 0 and cbl < 50 and border_mask[border_mask.shape[0] - cbl - 1, cbl] == 0:
            pts = np.array([[0, border_mask.shape[0] - 1], [triangle_base_height2, border_mask.shape[0] - 1], [0, border_mask.shape[0] - 1 - triangle_base_height2]])
            cv2.fillPoly(border_mask, [pts], 255)
        # Góc dưới phải
        if cbr > 0 and cbr < 36 and border_mask[border_mask.shape[0] - cbr - 1, border_mask.shape[1] - cbr - 1] == 0:
            pts = np.array([[border_mask.shape[1] - 1, border_mask.shape[0] - 1], [border_mask.shape[1] - 1 - triangle_base_height, border_mask.shape[0] - 1], [border_mask.shape[1] - 1, border_mask.shape[0] - 1 - triangle_base_height]])
            cv2.fillPoly(border_mask, [pts], 255)
        elif cbr >= 36 and cbr < 46 and border_mask[border_mask.shape[0] - cbr - 1, border_mask.shape[1] - cbr - 1] == 0:
            pts = np.array([[border_mask.shape[1] - 1, border_mask.shape[0] - 1], [border_mask.shape[1] - 1 - triangle_base_height2, border_mask.shape[0] - 1], [border_mask.shape[1] - 1, border_mask.shape[0] - 1 - triangle_base_height2]])
            cv2.fillPoly(border_mask, [pts], 255)
        elif cbr >= 46 and cbr < 55 and border_mask[border_mask.shape[0] - cbr - 1, border_mask.shape[1] - cbr - 1] == 0:
            pts = np.array([[border_mask.shape[1] - 1, border_mask.shape[0] - 1], [border_mask.shape[1] - 1 - triangle_base_height3, border_mask.shape[0] - 1], [border_mask.shape[1] - 1, border_mask.shape[0] - 1 - triangle_base_height3]])
            cv2.fillPoly(border_mask, [pts], 255)
        elif cbr >= 55 and border_mask[border_mask.shape[0] - cbr - 1, border_mask.shape[1] - cbr - 1] == 0:
            pts = np.array([[border_mask.shape[1] - 1, border_mask.shape[0] - 1], [border_mask.shape[1] - 1 - triangle_base_height4, border_mask.shape[0] - 1], [border_mask.shape[1] - 1, border_mask.shape[0] - 1 - triangle_base_height4]])
            cv2.fillPoly(border_mask, [pts], 255)
        # Phát hiện black box ở góc dưới trái
        if cbl > 0 and cbl >= 50 and border_mask[border_mask.shape[0] - cbl - 1, cbl] == 0:
            black_rect = np.zeros(black_rect_size, dtype="uint8")
            border_mask[border_mask.shape[0] - cb - black_rect_size[0]:border_mask.shape[0] - cb, cl:cl + black_rect_size[1]] = black_rect

        # Phát hiện và xử lý các vùng màu xanh lá cây
        green_mask = self.detect_green_box(image)
        height, width = green_mask.shape
        if np.any(green_mask[int(height * 0.75):, :int(width * 0.25)] == 255):
            black_rect = np.zeros((triangle_base_height, triangle_base_height), dtype="uint8")
            border_mask[border_mask.shape[0] - cb - triangle_base_height:border_mask.shape[0] - cb,
                        cl:cl + triangle_base_height] = black_rect

        return border_mask, None, cl, cr, ct, cb, ctl, ctr, cbl, cbr, thresh1, green_mask, FLAG

    def create_text_mask_from_threshold(self, thresh_image, left_crop_ratio=LEFT_CUT_SIZE):
        thresh_image_copy = np.copy(thresh_image)

        width = thresh_image_copy.shape[1]
        left_crop = int(width * left_crop_ratio)

        thresh_image_copy[:, left_crop:] = 255

        return thresh_image_copy

    def preprocess_border_mask_rectangle(self, image, thresh0):

        FLAG = 0

        cl, cr, ct, cb, thresh1 = self.get_border_width(thresh0)
        rectangle = 255 * np.ones(thresh0.shape, dtype="uint8")
        cv2.rectangle(rectangle, (cl, ct), (thresh0.shape[1] - cr, thresh0.shape[0] - cb), 0, -1)
        oval = 255 * np.ones(thresh0.shape, dtype="uint8")
        center = (int((cl + thresh0.shape[1] - cr) / 2), int((ct + thresh0.shape[0] - cb) / 2))
        axes = (int((thresh0.shape[1] - cl - cr) / 2 * 1.08), int((thresh0.shape[0] - ct - cb) / 2 * 1.18))
        cv2.ellipse(oval, center, axes, 0, 0, 360, 0, -1)
        border_mask = cv2.bitwise_or(rectangle, oval)

        if border_mask.shape == (1072, 1920):
          FLAG = 1
          border_mask = border_mask[:, int(border_mask.shape[1] * LEFT_CUT_SIZE):int(border_mask.shape[1] * RIGHT_CUT_SIZE)]
        return border_mask, None, cl, cr, ct, cb, thresh1, FLAG

    def create_rectangle_mask(self, image_path,
                              min_area        = 100,
                              max_area_ratio  = 0.5,
                              corner_tol_ratio= 0.04):   # 5 % kích thước ảnh
        """
        Chỉ detect hình vuông/chữ nhật ở góc trái trên và
        diện tích < 50 % ảnh.
        """
        image = cv2.imread(image_path)
        gray  = cv2.cvtColor(image, cv2.COLOR_BGR2GRAY)
        blur  = cv2.GaussianBlur(gray, (5, 5), 0)

        _, thresh = cv2.threshold(blur, 10, 255, cv2.THRESH_BINARY_INV)
        contours, _ = cv2.findContours(thresh,
                                      cv2.RETR_EXTERNAL,
                                      cv2.CHAIN_APPROX_SIMPLE)

        h, w = gray.shape
        img_area   = h * w
        tol_x      = int(w * corner_tol_ratio)   # sai số mép trái
        tol_y      = int(h * corner_tol_ratio)   # sai số mép trên

        mask            = np.zeros_like(gray)
        detected_shapes = []

        for cnt in contours:
            area = cv2.contourArea(cnt)
            if area < min_area or area > max_area_ratio * img_area:
                continue

            peri   = cv2.arcLength(cnt, True)
            approx = cv2.approxPolyDP(cnt, 0.04 * peri, True)

            if len(approx) != 4:
                continue                                    # không phải tứ giác

            x, y, bw, bh = cv2.boundingRect(approx)

            # ---- CHỈ NHẬN BOX Ở GÓC DƯỚI ----
            is_left_edge = x <= tol_x
            is_bottom_edge = y + bh >= h - tol_y
            if not (is_left_edge and is_bottom_edge):
                continue
            # --------------------------------------

            cv2.rectangle(mask, (x, y), (x + bw, y + bh), 255, -1)

            aspect = bw / float(bh)
            detected_shapes.append({
                'type'         : 'Square' if 0.9 <= aspect <= 1.1 else 'Rectangle',
                'coordinates'  : (x, y, bw, bh),
                'area'         : int(area),
                'position'     : 'bottem‑Left'
            })

        return mask, detected_shapes

    # === IMAGE PROCESSING PIPELINE ===

    def process_image(self, image_path):
        # Read image
        orig = cv2.imread(image_path)

        h0, w0 = orig.shape[:2]
        base_name = os.path.splitext(os.path.basename(image_path))[0]

        orig_backup = orig.copy()

        # Create textbox mask
        textbox_mask = self.create_textbox_mask(image_path)
        textbox_mask = cv2.resize(textbox_mask, (w0, h0), interpolation=cv2.INTER_NEAREST)

        if np.any(textbox_mask):
            text_content = cv2.bitwise_and(orig_backup, orig_backup, mask=textbox_mask)
        else:
            text_content = None

        highlight_mask = self.create_highlight_mask(orig)

        # Free memory before instrument detection
        gc.collect()

        instrument_mask = self.create_instrument_mask(orig)

        exclude_mask = (cv2.bitwise_or(instrument_mask, textbox_mask)
                       if textbox_mask is not None else instrument_mask)

        final_highlight_mask = cv2.bitwise_and(
            highlight_mask, cv2.bitwise_not(exclude_mask))

        img_no_highlight = self.simple_inpaint_highlight(orig.copy(), final_highlight_mask)

        # Free memory before border processing
        gc.collect()

        # — PHASE 2: Black-frame removal
        threshold = 80
        img_border = np.copy(img_no_highlight)
        if img_no_highlight.shape != (1072, 1920, 3):
          img_border = cv2.resize(img_border, (IMAGE_SIZE, IMAGE_SIZE))
          threshold = 10

        gray = cv2.cvtColor(img_border, cv2.COLOR_BGR2GRAY)
        _, th0 = cv2.threshold(gray, threshold, 255, cv2.THRESH_BINARY_INV)

        mask, detected_shapes = self.create_rectangle_mask(image_path)

        mask = cv2.bitwise_not(mask)

        if mask.shape != th0.shape:
          mask = cv2.resize(mask, (th0.shape[1], th0.shape[0]))

        th0 = cv2.bitwise_and(th0, mask)

        if img_no_highlight.shape != (1072, 1920, 3):
          border_mask, _, cl, cr, ct, cb, ctl, ctr, cbl, cbr, thresh1, green_mask, flag = self.preprocess_border_mask1(img_border, th0)
        else:
          border_mask, _, cl, cr, ct, cb, thresh1, flag = self.preprocess_border_mask_rectangle(img_border, th0)

        if 0 in mask:
          border_mask = cv2.bitwise_and(border_mask, mask)

        if flag == 1:
            image_copy_1 = np.copy(img_no_highlight)
            image_copy_2 = np.copy(img_no_highlight)
            image_copy_1 = image_copy_1[:, int(image_copy_1.shape[1] * LEFT_CUT_SIZE):int(image_copy_1.shape[1] * RIGHT_CUT_SIZE)]
            image_copy_2 = image_copy_2[:, :int(image_copy_2.shape[1]) - int(image_copy_2.shape[1] * 0.2905)]
            text_mask = self.create_text_mask_from_threshold(th0)
            textbox_mask = text_mask[:, :int(text_mask.shape[1]) - int(text_mask.shape[1] * 0.2905)]
            inpainted_image = self.simple_inpaint_border(image_copy_1, border_mask)
            image_copy_3 = cv2.cvtColor(image_copy_1, cv2.COLOR_BGR2RGB)
            img_no_blackframe = inpainted_image.copy()
            final_img = image_copy_2.copy()
            final_img[textbox_mask > 0] = img_no_blackframe[textbox_mask > 20]

        else:
            border_mask_resized = cv2.resize(border_mask, (img_no_highlight.shape[1], img_no_highlight.shape[0]))
            img_no_blackframe = self.simple_inpaint_border(img_no_highlight, border_mask_resized)
            final_img = img_no_blackframe.copy()
            if textbox_mask is not None and flag != 1:
                final_img = self.restore_text_without_black(
                    final_img, orig_backup, textbox_mask)

        # Clean up memory
        gc.collect()
        if flag == 1:
          green_mask = cv2.bitwise_not(image_copy_1)
          mask = image_copy_2

        return orig, highlight_mask, instrument_mask, textbox_mask, mask, cv2.bitwise_not(green_mask), final_highlight_mask, img_no_highlight, th0, border_mask, img_no_blackframe, final_img

    def visualize_pipeline(self, image_path):
        """
        Visualize and save each intermediate result from process_image side by side.
        """
        import os
        import time
        import cv2
        import matplotlib.pyplot as plt

        start_time = time.time()

        # Run full processing
        (orig,
        highlight_mask,
        instrument_mask,
        textbox_mask,
        box_mask,
        green_mask,
        final_highlight_mask,
        img_no_highlight,
        th0,
        border_mask,
        img_no_blackframe,
        final_img) = self.process_image(image_path)

        # Prepare output directories
        base_name = os.path.splitext(os.path.basename(image_path))[0]
        subdirs = {
            'original': orig,
            'highlight_mask': highlight_mask,
            'instrument_mask': instrument_mask,
            'textbox_mask': textbox_mask,
            'box_mask': box_mask,
            'green_mask': green_mask,
            'final_highlight_mask': final_highlight_mask,
            'th0': th0,
            'border_mask': border_mask,
            'img_no_highlight': img_no_highlight,
            'img_no_blackframe': img_no_blackframe,
            'final_restored': final_img,
        }

        # ensure base viz folder exists
        os.makedirs(VIS_DIR, exist_ok=True)
        for name in subdirs:
            d = os.path.join(VIS_DIR, name)
            os.makedirs(d, exist_ok=True)
            path = os.path.join(d, f"{base_name}.png")
            # convert masks/th if needed to 3-channel images
            img = subdirs[name]
            if len(img.shape) == 2:
                img_to_save = cv2.cvtColor(img, cv2.COLOR_GRAY2BGR)
            else:
                img_to_save = img
            cv2.imwrite(path, img_to_save)

        elapsed = time.time() - start_time
        print(f"Pipeline for {base_name} completed in {elapsed:.2f}s, saved in {VIS_DIR}")

        # Display all steps
        n = len(subdirs)
        cols = 4
        rows = (n + cols - 1) // cols
        fig, axes = plt.subplots(rows, cols, figsize=(4*cols, 3*rows))
        axes = axes.flatten()
        for ax, (name, img) in zip(axes, subdirs.items()):
            if len(img.shape) == 2:
                display_img = cv2.cvtColor(img, cv2.COLOR_GRAY2RGB)
            else:
                display_img = cv2.cvtColor(img, cv2.COLOR_BGR2RGB)
            ax.imshow(display_img)
            ax.set_title(name)
            ax.axis('off')
        # hide extra axes
        for ax in axes[n:]:
            ax.axis('off')

        plt.tight_layout()
        plt.show()


IMAGE_DIR = "/content/non_missing_images"
VIS_DIR = "pipeline_viz"
os.makedirs(IMAGE_DIR, exist_ok=True)
os.makedirs(VIS_DIR, exist_ok=True)

processor = EndoscopyImageProcessor()

list_ids = [
      'cla820gmms603071u3p4l07iv',
]

all_files = sorted([f for f in os.listdir(IMAGE_DIR) if f.lower().endswith(('.jpg'))])

selected_files = []
for img_id in list_ids:
    found = False
    for f in all_files:
        if os.path.splitext(f)[0] == img_id:
            selected_files.append(f)
            found = True
            break
    if not found:
        print(f"Không tìm thấy ảnh cho ID: {img_id}")

for img_file in os.listdir(IMAGE_DIR)[60:]:
    img_path = os.path.join(IMAGE_DIR, img_file)
    processor.visualize_pipeline(img_path)