<a href="https://colab.research.google.com/github/CoderAPS/HuBMAP/blob/main/kaggle_submission_20230713.ipynb" target="_parent"><img src="https://colab.research.google.com/assets/colab-badge.svg" alt="Open In Colab"/></a>

In [1]:
# Mount google drive
from google.colab import drive
drive.mount('/gdrive')

Mounted at /gdrive


In [2]:
!ls /gdrive/MyDrive/DATA/HuBMAP/test/


72e40acccadf.tif  ff434af74304.tif  ffd3d193c71e.tif


In [3]:
%cp /gdrive/MyDrive/python_modules/normalize_image_color.py .

In [4]:
from IPython import display

In [5]:
!pip install ultralytics
display.clear_output()


In [6]:
!pip install pycocotools
display.clear_output()

In [7]:
# Import libraries
import os
import base64
import numpy as np
import torch
from pycocotools import _mask as coco_mask
import typing as t
import zlib
import pandas as pd
import torchvision.transforms as T
from ultralytics import YOLO
from PIL import Image
import cv2

import normalize_image_color as nic



In [8]:
!ls /gdrive/MyDrive/DATA/HuBMAP/test


72e40acccadf.tif  ff434af74304.tif  ffd3d193c71e.tif


In [9]:
class EncodeBinaryMask:
    @staticmethod
    def __checking_mask(mask: np.ndarray) -> np.ndarray:
        if mask.dtype != np.bool:
            raise ValueError(
                "expects a binary mask, received dtype == %s" %
                mask.dtype
            )
        return mask

    @staticmethod
    def __convert_mask(mask: np.ndarray):
        mask_to_encode = mask.astype(np.uint8)
        mask_to_encode = np.asfortranarray(mask_to_encode)
        return mask_to_encode

    @staticmethod
    def __compress_encode(encoded_mask) -> t.Text:
        binary_str = zlib.compress(encoded_mask, zlib.Z_BEST_COMPRESSION)
        base64_str = base64.b64encode(binary_str)
        return base64_str

    def __call__(self, mask: np.ndarray) -> t.Text:
        mask = self.__checking_mask(mask)
        mask_to_encode = self.__convert_mask(mask)
        encoded_mask = coco_mask.encode(mask_to_encode)[0]["counts"]
        base64_str = self.__compress_encode(encoded_mask)
        return base64_str

In [10]:
class Submission:
    def __init__(self, dirpath: str, model: torch.nn.Module):
        self.__eval_transforms = self.get_transforms()
        self.__model = model
        self.__encoder = EncodeBinaryMask()
        self.__dirpath = dirpath
        self.__filenames = os.listdir(dirpath)
        self.height = 512
        self.width = 512

        self.__submission_dict = {
            "id": [],
            "height": [],
            "width": [],
            "prediction_string": []
        }

        self.submission = None

    @staticmethod
    def get_transforms():
        return T.Compose([
            T.ToTensor(),
            T.Resize(size=(512, 512)),
            T.Normalize(mean=[0.485, 0.456, 0.406],
                        std=[0.229, 0.224, 0.225])
        ])

    def __len__(self):
        return len(self.__filenames)

    def __standardize_image_color(self, path: str) -> np.ndarray:
      image = cv2.imread(path)
      n,b,r = nic.process_color(image)
      return r


    def __get_columns(self) -> None:
        for filename in self.__filenames:
            path = self.__get_image_path(filename)

            # standardize image color
            image_standardized = self.__standardize_image_color(path)

            masks = self.__forward(image_standardized)
            identifier, height, width, prediction_string = self.__get_cells(filename, masks)
            self.__update_columns(identifier, height, width, prediction_string)

    def __update_columns(self, identifier: str, height: int, width: int, prediction_string: str) -> None:
        self.__submission_dict["id"].append(identifier)
        self.__submission_dict["height"].append(height)
        self.__submission_dict["width"].append(width)
        self.__submission_dict["prediction_string"].append(prediction_string)

    def __get_cells(self, filename: str, masks: list):
        prediction_string = ""
        prediction_string = self.__get_prediction_string(masks, prediction_string)
        identifier = filename.split(".")[0]
        return identifier, self.height, self.width, prediction_string

    def __get_prediction_string(self, masks: list, prediction_string: str) -> str:
        if masks:
            for outputs in masks:
                mask = outputs["mask"]
                mask = np.where(mask > 0.5, 1, 0).astype(np.bool)
                base64_str = self.__encoder(mask)
                confidence = outputs["confidence"]
                prediction_string += f"0 {confidence} {base64_str.decode('utf-8')} "
        else:
            return ""
        return prediction_string

    def __get_image_path(self, filename: str) -> str:
        return os.path.join(
            self.__dirpath, filename
        )

    def __get_image(self, path: str) -> torch.Tensor:
        image = Image.open(path)
        image = np.asarray(image)
        image = self.__eval_transforms(image)
        return image

    def __forward(self, image: np.ndarray) -> list:
        masks = self.__model(image)
        return masks

    def submit(self) -> None:
        if not self.submission:
            self.__get_columns()
            self.submission = pd.DataFrame(self.__submission_dict)
            self.submission = self.submission.set_index('id')
            self.submission.to_csv("submission.csv")

In [11]:
class BestYolo:
    def __init__(self, conf: float = 0.05):
        self.model_path = "/gdrive/MyDrive/saved_models_weights/HuBMAP/YOLOv8_20230711/best.pt"
        self.model = self.get_model()
        self.conf = conf

    def get_model(self) -> YOLO:
        return YOLO(self.model_path)

    def __call__(self, source) -> list[dict, ...]:
        sublist = []
        result = self.model(source)[0]
        if result.masks:
            for i in range(len(result.masks.data)):
                conf = round(float(result.boxes.conf[i]), 2)
                mask = np.expand_dims(result.masks.data[i].cpu().numpy(), axis=0).transpose(1,2,0)

                if int(result.boxes.cls[i]) == 0 and conf >= self.conf:
                    sublist.append({"mask": mask, "confidence": conf})
                else:
                    continue
            return sublist
        else:
            return None

In [12]:
__TEST_PATH = "/gdrive/MyDrive/DATA/HuBMAP/test/"
model = BestYolo()
sub = Submission(dirpath=__TEST_PATH, model=model)
sub.submit()


0: 512x512 8 blood_vessels, 1 unsure, 4364.5ms
Speed: 6.6ms preprocess, 4364.5ms inference, 91.5ms postprocess per image at shape (1, 3, 512, 512)
Deprecated in NumPy 1.20; for more details and guidance: https://numpy.org/devdocs/release/1.20.0-notes.html#deprecations
  mask = np.where(mask > 0.5, 1, 0).astype(np.bool)
Deprecated in NumPy 1.20; for more details and guidance: https://numpy.org/devdocs/release/1.20.0-notes.html#deprecations
  if mask.dtype != np.bool:

0: 512x512 13 blood_vessels, 2 glomeruluss, 5290.3ms
Speed: 9.9ms preprocess, 5290.3ms inference, 39.6ms postprocess per image at shape (1, 3, 512, 512)

0: 512x512 11 blood_vessels, 3790.3ms
Speed: 10.5ms preprocess, 3790.3ms inference, 24.8ms postprocess per image at shape (1, 3, 512, 512)


In [13]:
sub.submission.head()

Unnamed: 0_level_0,height,width,prediction_string
id,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1
72e40acccadf,512,512,0 0.81 eNqLiAgytU6yN/Mx9Dc0MPCHkggWjARBP2M/IA8...
ffd3d193c71e,512,512,0 0.71 eNqLjYswsUq1N/Yz9EdAAwMQhgKooJ+Rv5EfkAR...
ff434af74304,512,512,0 0.67 eNoLjc0wsEyzN/I39DfwNzSAYUM/QzATAiCCMGk...
