In [None]:
# Install the OpenSlide C library and Python bindings
# After installing these libraries, use `Runtime -> restart and run all` on the menu
!apt-get install openslide-tools
!pip install openslide-python

Reading package lists... Done
Building dependency tree       
Reading state information... Done
openslide-tools is already the newest version (3.4.1+dfsg-2).
The following package was automatically installed and is no longer required:
  libnvidia-common-460
Use 'apt autoremove' to remove it.
0 upgraded, 0 newly installed, 0 to remove and 20 not upgraded.
Looking in indexes: https://pypi.org/simple, https://us-python.pkg.dev/colab-wheels/public/simple/


In [None]:
#@title Load Package

import gc
import re
import os
import random
import numpy as np
import pandas as pd
import tensorflow as tf
from tensorflow import keras
from tensorflow.keras import datasets, layers, models, losses
from tensorflow.keras.applications.inception_v3 import InceptionV3
from tensorflow.keras.utils import plot_model

seed = 1
random.seed(seed)
np.random.seed(seed)
tf.random.set_seed(seed)

%matplotlib inline
import matplotlib.pyplot as plt
import seaborn as sns
import plotly
import plotly.express as px
import plotly.graph_objects as go
from plotly.subplots import make_subplots

from openslide import open_slide, __library_version__ as openslide_version
from PIL import Image
from skimage.color import rgb2gray

from google.colab import drive
drive.mount('/content/drive')

Drive already mounted at /content/drive; to attempt to forcibly remount, call drive.mount("/content/drive", force_remount=True).


In [None]:
#@title Load custom functions

import sys
sys.path.insert(0, '/content/drive/MyDrive/Colab Notebooks/ADL/project')

from config import RAW_DATA_DIR, PROCESSED_DATA_DIR, POS_DATA_DIR, NEG_DATA_DIR, MODEL_DIR, RES_DIR
from processing import read_full_image, allocate_ones, allocate_zeros, get_patches_from_center, get_target, delete_gray
from create_model import create_three_zooms_model
from plot_heatmap import label_prediction, label_heatmap_multizooms

train_file = ["101", "094", "110", "016", "078", "031", "064"]
test_file = ["091", "075", "094", "084"]

In [None]:
sampling_times = 5
nsamples = 200
level_lst = [5, 6, 7]

In [None]:
#@title Training dataset

# for slide_num in train_file:
#   print("\nStart Processing for", slide_num)
#   multi_slide_images = []
#   multi_mask_images = []
#   for level in level_lst:
#     slide_image = read_full_image(slide_num, level=level, ifmask=False)
#     mask_image = read_full_image(slide_num, level=level, ifmask=True)
#     multi_slide_images.append(slide_image)
#     multi_mask_images.append(mask_image)

#   all_ones = allocate_ones(multi_mask_images[-1])
#   all_zeros = allocate_zeros(multi_slide_images[-1], multi_mask_images[-1])

#   # Generate samples and train model
#   neg_i = 0
#   factor = 2 ** level_lst[-1]

#   for s_i in range(sampling_times):
#     print("Sampling", s_i)
#     start_ind = s_i * nsamples
#     end_ind = (s_i+1) * nsamples

#     if end_ind <= len(all_ones):

#       # positive samples
#       print("Generating positive samples......")
#       pos_file = os.path.join(POS_DATA_DIR, f"{slide_num}_357_{s_i}.npy")

#       if not os.path.exists(pos_file):
#         print("------Process samples-------")
#         selected_centers = all_ones[start_ind: end_ind]

#         pos_pitch_lst = [[] for _ in range(len(level_lst))]
#         for x_cen, y_cen in selected_centers:
#           multi_check_images = get_patches_from_center(multi_mask_images, x_cen*factor, y_cen*factor, level_lst=level_lst)
#           multi_tumor_regions = get_patches_from_center(multi_slide_images, x_cen*factor, y_cen*factor, level_lst=level_lst)
#           assert multi_check_images[0].sum() != 0
#           assert get_target(multi_check_images[0]) == 1.0

#           for i in range(len(level_lst)):
#             pos_pitch_lst[i].append(multi_tumor_regions[i] / 255.0)

#         pos_pitch_lst = np.array(pos_pitch_lst)
#         np.save(pos_file, pos_pitch_lst)

#         del pos_pitch_lst
#         gc.collect()

#       # negative samples
#       print("Generating negative samples......")
#       neg_file = os.path.join(NEG_DATA_DIR, f"{slide_num}_357_{s_i}.npy")

#       if not os.path.exists(neg_file):
#         print("------Process samples-------")
#         neg_pitch_lst = [[] for _ in range(len(level_lst))]

#         while len(neg_pitch_lst[0]) < nsamples:
#           x_cen, y_cen = all_zeros[neg_i]

#           multi_tumor_regions = get_patches_from_center(multi_slide_images, x_cen*factor, y_cen*factor, level_lst=level_lst)

#           # Make sure that minimum zoom level have enough tissue
#           if delete_gray(multi_tumor_regions[0]):
#             multi_check_images = get_patches_from_center(multi_mask_images, x_cen*factor, y_cen*factor, level_lst=level_lst)

#             # Make sure that minimum zoom level have tumor
#             label = get_target(multi_check_images[0])
#             if label == 0.0:
#               for i in range(len(level_lst)):
#                 neg_pitch_lst[i].append(multi_tumor_regions[i] / 255.0)
#           neg_i += 1

#         neg_pitch_lst = np.array(neg_pitch_lst)
#         np.save(neg_file, neg_pitch_lst)
        
#         del neg_pitch_lst
#         gc.collect()



Start Processing for 101
Read WSI from /content/drive/MyDrive/Colab Notebooks/ADL/project/raw_data/Copy of tumor_101.tif with width: 17408, height: 8960
Read WSI from /content/drive/MyDrive/Colab Notebooks/ADL/project/raw_data/Copy of tumor_101_mask.tif with width: 17408, height: 8960
Read WSI from /content/drive/MyDrive/Colab Notebooks/ADL/project/raw_data/Copy of tumor_101.tif with width: 4352, height: 2240
Read WSI from /content/drive/MyDrive/Colab Notebooks/ADL/project/raw_data/Copy of tumor_101_mask.tif with width: 4352, height: 2240
Read WSI from /content/drive/MyDrive/Colab Notebooks/ADL/project/raw_data/Copy of tumor_101.tif with width: 1088, height: 560
Read WSI from /content/drive/MyDrive/Colab Notebooks/ADL/project/raw_data/Copy of tumor_101_mask.tif with width: 1088, height: 560
The number of tumor pixels 11756
The number of tumor pixels after filter 6350
The number of tissue 150818
The number of tumor 11756
The number of normal tissue 139078
The number of normal tissue af

In [None]:
#@title Testing dataset

for slide_num in test_file:
  print("\nStart Processing for", slide_num)
  multi_slide_images = []
  multi_mask_images = []
  for level in level_lst:
    slide_image = read_full_image(slide_num, level=level, ifmask=False)
    mask_image = read_full_image(slide_num, level=level, ifmask=True)
    multi_slide_images.append(slide_image)
    multi_mask_images.append(mask_image)

  all_ones = allocate_ones(multi_mask_images[-1])
  all_zeros = allocate_zeros(multi_slide_images[-1], multi_mask_images[-1])

  # Generate samples and train model
  neg_i = 0
  factor = 2 ** level_lst[-1]

  for s_i in range(sampling_times):
    print("Sampling", s_i)
    start_ind = s_i * nsamples
    end_ind = (s_i+1) * nsamples

    if end_ind <= len(all_ones):

      # positive samples
      print("Generating positive samples......")
      pos_file = os.path.join(POS_DATA_DIR, f"{slide_num}_567_{s_i}.npy")

      if not os.path.exists(pos_file):
        print("------Process samples-------")
        selected_centers = all_ones[start_ind: end_ind]

        pos_pitch_lst = [[] for _ in range(len(level_lst))]
        for x_cen, y_cen in selected_centers:
          multi_check_images = get_patches_from_center(multi_mask_images, x_cen*factor, y_cen*factor, level_lst=level_lst)
          multi_tumor_regions = get_patches_from_center(multi_slide_images, x_cen*factor, y_cen*factor, level_lst=level_lst)
          assert multi_check_images[0].sum() != 0
          assert get_target(multi_check_images[0]) == 1.0

          for i in range(len(level_lst)):
            pos_pitch_lst[i].append(multi_tumor_regions[i] / 255.0)

        pos_pitch_lst = np.array(pos_pitch_lst)
        np.save(pos_file, pos_pitch_lst)

        del pos_pitch_lst
        gc.collect()

      # negative samples
      print("Generating negative samples......")
      neg_file = os.path.join(NEG_DATA_DIR, f"{slide_num}_567_{s_i}.npy")

      if not os.path.exists(neg_file):
        print("------Process samples-------")
        neg_pitch_lst = [[] for _ in range(len(level_lst))]

        while len(neg_pitch_lst[0]) < nsamples:
          x_cen, y_cen = all_zeros[neg_i]

          multi_tumor_regions = get_patches_from_center(multi_slide_images, x_cen*factor, y_cen*factor, level_lst=level_lst)

          # Make sure that minimum zoom level have enough tissue
          if delete_gray(multi_tumor_regions[0]):
            multi_check_images = get_patches_from_center(multi_mask_images, x_cen*factor, y_cen*factor, level_lst=level_lst)

            # Make sure that minimum zoom level have tumor
            label = get_target(multi_check_images[0])
            if label == 0.0:
              for i in range(len(level_lst)):
                neg_pitch_lst[i].append(multi_tumor_regions[i] / 255.0)
          neg_i += 1

        neg_pitch_lst = np.array(neg_pitch_lst)
        np.save(neg_file, neg_pitch_lst)
        
        del neg_pitch_lst
        gc.collect()



Start Processing for 091
Read WSI from /content/drive/MyDrive/Colab Notebooks/ADL/project/raw_data/Copy of tumor_091.tif with width: 1920, height: 1680
Read WSI from /content/drive/MyDrive/Colab Notebooks/ADL/project/raw_data/Copy of tumor_091_mask.tif with width: 1920, height: 1680
Read WSI from /content/drive/MyDrive/Colab Notebooks/ADL/project/raw_data/Copy of tumor_091.tif with width: 960, height: 840
Read WSI from /content/drive/MyDrive/Colab Notebooks/ADL/project/raw_data/Copy of tumor_091_mask.tif with width: 960, height: 840
Read WSI from /content/drive/MyDrive/Colab Notebooks/ADL/project/raw_data/Copy of tumor_091.tif with width: 480, height: 420
Read WSI from /content/drive/MyDrive/Colab Notebooks/ADL/project/raw_data/Copy of tumor_091_mask.tif with width: 480, height: 420
The number of tumor pixels 2924
The number of tumor pixels after filter 1210
The number of tissue 62589
The number of tumor 2924
The number of normal tissue 59667
The number of normal tissue after filter 1