# Nucleus Instance Segmentation

In [None]:
# %%bash
# apt-get -y install libopenjp2-7-dev libopenjp2-tools openslide-tools libpixman-1-dev | tail -n 1
# pip install git+https://github.com/TissueImageAnalytics/tiatoolbox.git@develop | tail -n 1
# echo "Installation is done."

In [1]:
"""Import modules required to run the Jupyter notebook."""
# Clear logger to use tiatoolbox.logger
import logging
import warnings

from PIL import Image

if logging.getLogger().hasHandlers():
    logging.getLogger().handlers.clear()

from tiatoolbox.data import stain_norm_target
from tiatoolbox.tools.stainnorm import get_normalizer
import shutil
import slideio

import cv2
import joblib
import matplotlib as mpl
import matplotlib.pyplot as plt
import numpy as np



from matplotlib import colormaps
from numpy.typing import ArrayLike
from PIL import Image, ImageFilter, ImageOps
from shapely import speedups
from shapely.geometry import Polygon

from tiatoolbox import logger
from tiatoolbox.annotation.storage import Annotation, AnnotationStore

if speedups.available:  # pragma: no branch
    speedups.enable()


from tiatoolbox import logger
from tiatoolbox.models.engine.nucleus_instance_segmentor import NucleusInstanceSegmentor
from tiatoolbox.utils.misc import download_data, imread

# We need this function to visualize the nuclear predictions
from tiatoolbox.utils.visualization import (
    overlay_prediction_contours,
)
from tiatoolbox.wsicore.wsireader import WSIReader

warnings.filterwarnings("ignore")
mpl.rcParams["figure.dpi"] = 300  # for high resolution figure in notebook
mpl.rcParams["figure.facecolor"] = "white"  # To make sure text is visible in dark mode
plt.rcParams.update({"font.size": 5})

from src.visualize import overlay_prediction_contours

  @numba.jit()

  @numba.jit()

  @numba.jit()

  from .autonotebook import tqdm as notebook_tqdm

  @numba.jit()

  speedups.enable()



> ![image](https://tiatoolbox.dcs.warwick.ac.uk/notebook/hovernet_samples.PNG)


In [None]:
import pandas as pd
df2= pd.read_csv("/home/amrit/data/proj_data/MLG_project/DLBCL-Morph/TMA/annotations_clean.csv")
print(df2['stain'].unique())
# df2 = df2[df2['patient_id'] == 17666]
# df2 = df2[df2['patient_id'] == 13968]
# df2 = df2[df2['patient_id'] == 17658]

print(df2['stain'].unique())
print(df2['tma_id'].unique())


df2 = df2.sort_values("patient_id")
# df2 = df2[df2['tma_id'] == 'TA292']
df2['area'] = (df2['xe'] - df2['xs']) *  (df2['ye'] - df2['ys'])/10000
df2 = df2[df2['area'] >= 150]  

df2 = df2[df2['stain'] == 'MYC']
df2 = df2[df2['xs']  >=0 ]
df2 = df2[df2['ys']  >=0 ]
df2 = df2[df2['xe']  >=0 ]
df2 = df2[df2['ye']  >=0 ]

df2 = df2.reset_index()


df2

In [None]:
df2.describe()

In [None]:
df2[240:260]

# Single run

In [None]:


ON_GPU= True
patch_size = 224
output_size = patch_size*8

# index = np.random.choice(df2.index)

import glob
# glob.glob(f"/home/amrit/data/proj_data/MLG_project/DLBCL-Morph/outputs/images/**/*HE**.png", recursive=True)

start_index = 10
end_index = len(df2.index)


#PanNuke Dataset - H/E   'hovernet_fast-pannuke'   #19 different tissue types.  481 visual fields, 20K WSI at different magnifications, data sources https://paperswithcode.com/dataset/pannuke
#CoNSeP Dataset -   'micronet_hovernet-consep'  41 H&E stained image tile   https://paperswithcode.com/dataset/consep
#MoNuSAC Dataset - H/E   'hovernet_fast-monusac'  #  4 different organs (Lung, Prostate, Kidney, and Breast)  #https://monusac-2020.grand-challenge.org/Data/
#Kumar Dataset- H&E staining    'hovernet_original_kumar'  #https://monuseg.grand-challenge.org/Data/


model_name = "hovernet_fast-pannuke" #: #, "hovernet_fast-monusac"]
# Tile prediction
inst_segmentor = NucleusInstanceSegmentor(
    pretrained_model=model_name , #",  #hovernet_fast-pannuke", hovernet_fast-monusac
    num_loader_workers=4,
    num_postproc_workers=4,
    batch_size=8,
)


import pandas as pd

# Create an empty list to store information for failed cases
failed_cases = []

for index in range(start_index,end_index) : #len(df2.index)):
    try:
        # index = np.random.choice(df2.index)
        # index = 1626

        print( "index", index)
        print(df2.iloc[index])


        df_index = df2['index'][index]
        patient_id = df2['patient_id'][index]
        stain = df2['stain'][index]
        tma_id = df2['tma_id'][index]
        xs	,ys	,xe	,ye = df2[['xs'	,'ys'	,'xe'	,'ye']].iloc[index].to_list()

        print(patient_id, stain, tma_id, xs, ys, xe, ye, xe-xs, ye-ys)

        svs_path = f"/home/amrit/data/proj_data/MLG_project/DLBCL-Morph/TMA/{stain}/{tma_id}.svs"
        save_dir = f"/home/amrit/data/proj_data/MLG_project/DLBCL-Morph/outputs/files/{stain}/{patient_id}/{df_index}/"
        parent_folder= f"/home/amrit/data/proj_data/MLG_project/DLBCL-Morph/outputs/images/{patient_id}/"

        import os
        try:
            os.mkdir(parent_folder)
        except:
            pass

        img_file_name = f"{parent_folder}{patient_id}_{stain}_{tma_id}_{output_size}_{df_index}.png"
        kmeans_cluster_vis_file = f"{parent_folder}{patient_id}_{stain}_{tma_id}_{output_size}_{df_index}_kmeans_cluster_vis.png"
        overlay_image_file_name = f"{parent_folder}{patient_id}_{stain}_{tma_id}_{output_size}_{df_index}_overlay.npy"
        overlay_mask_file_name = f"{parent_folder}{patient_id}_{stain}_{tma_id}_{output_size}_{df_index}_mask.npy"
        overlay_vis_file_name =  f"{parent_folder}{patient_id}_{stain}_{tma_id}_{output_size}_{df_index}_vis.png"

        # slide = slideio.open_slide(svs_path,'SVS')
        # scene = slide.get_scene(0)
        # print(index , slide.num_scenes , slide.raw_metadata)

        reader = WSIReader.open(svs_path)
        info_dict = reader.info.as_dict()
        print(info_dict)  # noqa: T203

        buffer = 150
        # Specify the bounds in terms of rectangle (left, top, right, bottom)
        bounds = (xs - buffer,ys - buffer	,xe  + buffer	,ye + buffer)

        # Read the region using wsi reader's read bounds at level 0
        img_array = reader.read_bounds(bounds, resolution=0, units="level")
        print(img_array.shape)
        # plt.imshow(img_array)
        # plt.axis("off")
        # plt.show()

        image_pil = Image.fromarray(img_array)
        image_pil.save(img_file_name)

        # plot_index = 1
        # for x_index in range(0,img_array.shape[0],150):
        #     plt.subplot(5,5, plot_index)
        #     plt.imshow(img_array[x_index:x_index+patch_size,x_index:x_index+patch_size])
        #     plot_index+= 1
        # plt.show()


        try:
            shutil.rmtree(save_dir)
        except:
            pass

        inst_segmentor.ioconfig.tile_shape = [output_size,output_size]
        # inst_segmentor.ioconfig.patch_input_shape = [patch_size,patch_size]
        # inst_segmentor.ioconfig.patch_output_shape = [patch_size,patch_size]
        # inst_segmentor.ioconfig.margin = [patch_size,patch_size]

        # inst_segmentor.ioconfig.highest_input_resolution = {'units': 'mpp', 'resolution': 0.1}

        # inst_segmentor.ioconfig.patch_input_shape = [224,224]

        # stain_norm_method = 'ruifrok'
        stain_norm_method = "None"

        if stain_norm_method != "None":
            target_image = stain_norm_target()
            # "reinhard", "custom", "ruifrok", "macenko" or "vahadane".
            stain_normalizer = get_normalizer(stain_norm_method) 
            stain_normalizer.fit(target_image)

            def stain_norm_func(img: np.ndarray) -> np.ndarray:
                """Helper function to perform stain normalization."""
                return stain_normalizer.transform(img)

            inst_segmentor.model.preproc_func = stain_norm_func

        tile_output = inst_segmentor.predict(
            [img_file_name],
            save_dir= save_dir,
            mode="tile",
            on_gpu=ON_GPU,
            crash_on_exception=True,
        )

        tile_preds = joblib.load(f"{tile_output[0][1]}.dat")
        logger.info(f"Number of detected nuclei: {len(tile_preds)}")

        # import json 
            
        # with open(json_file_name, "w") as outfile: 
        # 	json.dump(tile_preds, outfile)
            
        # Extracting the nucleus IDs and select the first one
        nuc_id_list = list(tile_preds.keys())
        selected_nuc_id = nuc_id_list[0]
        logger.info(f"Nucleus prediction structure for nucleus ID: {selected_nuc_id}")
        sample_nuc = tile_preds[selected_nuc_id]
        sample_nuc_keys = list(sample_nuc)
        logger.info(
            "Keys in the output dictionary: [%s, %s, %s, %s, %s]",
            sample_nuc_keys[0],
            sample_nuc_keys[1],
            sample_nuc_keys[2],
            sample_nuc_keys[3],
            sample_nuc_keys[4],
        )
        logger.info(
            "Bounding box: (%d, %d, %d, %d)",
            sample_nuc["box"][0],
            sample_nuc["box"][1],
            sample_nuc["box"][2],
            sample_nuc["box"][3],
        )
        logger.info(
            "Centroid: (%d, %d)",
            sample_nuc["centroid"][0],
            sample_nuc["centroid"][1],
        )

        tile_img = imread(img_file_name)

        color_dict = {
            0: ("neoplastic epithelial", (255, 0, 0)),
            1: ("Inflammatory", (255, 255, 0)),
            2: ("Connective", (0, 255, 0)),
            3: ("Dead", (0, 0, 0)),
            4: ("non-neoplastic epithelial", (0, 0, 255)),
        }


        # Filter dictionary cell types
        count_dict = {0: 0,
                    1:0,
                    2:0,
                    3:0,
                    4:0,
                    5:0}
        selected_count_dict = {0: 0,
                    1:0,
                    2:0,
                    3:0,
                    4:0,
                    5:0}

        cell_prob_list = []
        new_tile_preds = {}
        for x in tile_preds:
            cell_type = tile_preds[x]['type']
            cell_prob = tile_preds[x]['prob']
            cell_prob_list.append(cell_prob)

            count_dict[cell_type] += 1
            if cell_prob > 0.5: #cell_type != 0 and 
                new_tile_preds[x] = tile_preds[x]
                selected_count_dict[cell_type] += 1


        print("count_dict" , count_dict)
        print("selected_count_dict" , selected_count_dict)

        overlay_mask, overlay_image = overlay_prediction_contours(
            canvas=tile_img,
            inst_dict=tile_preds,
            draw_dot=False,
            type_colours=color_dict,
            line_thickness=2,
        )

        # overlay_mask_selected, overlay_image_selected  = overlay_prediction_contours(
        #     canvas=tile_img,
        #     inst_dict=new_tile_preds,
        #     draw_dot=False,
        #     type_colours=color_dict,
        #     line_thickness=2,
        # )

        # showing processed results alongside the original images
        fig = plt.figure(figsize=(15,6))
        ax1 = plt.subplot(1, 3, 1), plt.imshow(tile_img), plt.axis("off")
        ax3 = plt.subplot(1, 3, 2), plt.imshow(overlay_mask), plt.axis("off")
        ax2 = plt.subplot(1, 3, 3), plt.imshow(overlay_image), plt.axis("off")
        # ax3 = plt.subplot(1, 3, 3), plt.imshow(overlay_mask_selected), plt.axis("off")

        plt.title(f"Stain_norm_method_{stain_norm_method}")
        plt.tight_layout()
        plt.savefig(overlay_vis_file_name, dpi=200)
        # plt.show()
        plt.clf()

        # np.save( overlay_image_file_name, overlay_image)
        np.save(overlay_mask_file_name, overlay_mask)

    except Exception as e:

        failed_cases.append({'file_name': img_file_name, 'index': index, 'df2_index': df_index, 'patient_id': patient_id,
                            "stain": stain, "tma_id": tma_id})
        failed_df = pd.DataFrame(failed_cases)
        failed_df.to_csv('failed_cases.csv', index=False)

        print("***********************************************")
        print("***********************************************")
        print("***********************************************")
        print("***********************************************")
        print(f"AAAAA Failed case: {index} ")
        print(f"AAAAA Failed case: {index} ")
        print(f"AAAAA Failed case: {index} ")
        print(f"AAAAA Failed case: {index} ")

        print("***********************************************")
        print("***********************************************")
        print("***********************************************")
        print("***********************************************")
        print("***********************************************")
        print("***********************************************")

