### 1.1 Preprocessing - Reinhard Normalization and WSI Tiling

As a first preprocessing step, all slides were color normalized with respect to a reference image selected by an expert neuropathologist. Color normalization was performed using the method described by [Reinhard et. al](https://ieeexplore.ieee.org/document/946629).

The resulting color normalized whole slide images were tiled using PyVips to generate 1536 x 1536 images patches.

In [1]:
import os
import glob
import numpy as np
import cv2
import matplotlib.pyplot as plt
import pyvips as Vips
from tqdm import tqdm

from utils import vips_utils, normalize

In [2]:
TRAIN_WSI_DIR = 'data/Dataset 1a Development_train/'              # WSIs in the training set
#VAL_WSI_DIR = 'data/Dataset 1b Development_validation/'           # WSIs in the validation set
TEST_WSI_DIR = 'data/box/'

SAVE_DIR = 'data/norm_tiles/'

In [3]:
if not os.path.exists(SAVE_DIR):
        os.makedirs(SAVE_DIR)

In [4]:
ref_imagename = 'NA5002_2AB.svs'
#ref_imagename = 'NA3777-02_AB.svs'

In [5]:
wsi_train = os.listdir(TRAIN_WSI_DIR)
#wsi_val = os.listdir(VAL_WSI_DIR)
wsi_test = os.listdir(TEST_WSI_DIR)

#imagenames = sorted(wsi_val + wsi_train)
imagenames = sorted(wsi_test + [ref_imagename])
#imagenames.remove('NA5005-02_AB.svs')             # this WSI was digitalized at 40x, need resize down to 20x
#imagenames.append('NA5005-02_AB.svs')
print(imagenames)

['NA3777-02_AB.svs', 'NA4077-02_AB.svs', 'NA4092-02_AB.svs', 'NA4107-02_AB.svs', 'NA4160-02_AB.svs', 'NA4195-02_AB.svs', 'NA4256-02_AB.svs', 'NA4299-02_AB.svs', 'NA4391-02_AB.svs', 'NA4450-02_AB.svs', 'NA4463-02_AB.svs', 'NA4471-02_AB.svs', 'NA4553-02_AB.svs', 'NA4626-02_AB.svs', 'NA4672-02_AB.svs', 'NA4675-02_AB.svs', 'NA4691-02_AB.svs', 'NA4695-02_AB.svs', 'NA5002_2AB.svs']


In [6]:
%%time
# Load reference image, fit Reinhard normalizer
ref_image = Vips.Image.new_from_file(TRAIN_WSI_DIR + ref_imagename, level=0)
#ref_image = Vips.Image.new_from_file(TEST_WSI_DIR + ref_imagename, level=0)

normalizer = normalize.Reinhard()
normalizer.fit(ref_image)

CPU times: user 35min 44s, sys: 3min 13s, total: 38min 58s
Wall time: 4min 43s


In [7]:
stats_dict = {}
for imagename in tqdm(imagenames[:-1]):
    try:
        vips_img = Vips.Image.new_from_file(TRAIN_WSI_DIR + imagename, level=0)
        print("Loaded Image: " + TRAIN_WSI_DIR + imagename)
        #vips_img = Vips.Image.new_from_file(TEST_WSI_DIR + imagename, level=0)
    except:
        #vips_img = Vips.Image.new_from_file(VAL_WSI_DIR + imagename, level=0)
        vips_img = Vips.Image.new_from_file(TEST_WSI_DIR + imagename, level=0)
        print("Loaded Image: " + TEST_WSI_DIR + imagename)
    out = normalizer.transform(vips_img)
    out.filename = vips_img.filename
    vips_utils.save_and_tile(out, SAVE_DIR)
    stats_dict[imagename] = normalizer.image_stats

  0%|          | 0/18 [00:00<?, ?it/s]

Loaded Image: data/box/NA3777-02_AB.svs


  6%|▌         | 1/18 [07:04<2:00:09, 424.07s/it]

Loaded Image: data/box/NA4077-02_AB.svs


 11%|█         | 2/18 [16:37<2:05:01, 468.82s/it]

Loaded Image: data/box/NA4092-02_AB.svs


 17%|█▋        | 3/18 [25:19<2:01:14, 484.95s/it]

Loaded Image: data/box/NA4107-02_AB.svs


 22%|██▏       | 4/18 [32:10<1:47:56, 462.61s/it]

Loaded Image: data/box/NA4160-02_AB.svs


 28%|██▊       | 5/18 [39:10<1:37:27, 449.77s/it]

Loaded Image: data/box/NA4195-02_AB.svs


 33%|███▎      | 6/18 [45:29<1:25:43, 428.60s/it]

Loaded Image: data/box/NA4256-02_AB.svs


 39%|███▉      | 7/18 [53:47<1:22:23, 449.45s/it]

Loaded Image: data/box/NA4299-02_AB.svs


 44%|████▍     | 8/18 [1:01:01<1:14:09, 444.96s/it]

Loaded Image: data/box/NA4391-02_AB.svs


 50%|█████     | 9/18 [1:07:49<1:05:02, 433.65s/it]

Loaded Image: data/box/NA4450-02_AB.svs


 56%|█████▌    | 10/18 [1:15:37<59:13, 444.16s/it] 

Loaded Image: data/box/NA4463-02_AB.svs


 61%|██████    | 11/18 [1:22:40<51:03, 437.71s/it]

Loaded Image: data/Dataset 1a Development_train/NA4471-02_AB.svs


 67%|██████▋   | 12/18 [1:29:29<42:54, 429.10s/it]

Loaded Image: data/box/NA4553-02_AB.svs


 72%|███████▏  | 13/18 [1:38:14<38:09, 457.94s/it]

Loaded Image: data/box/NA4626-02_AB.svs


 78%|███████▊  | 14/18 [1:43:56<28:11, 422.99s/it]

Loaded Image: data/box/NA4672-02_AB.svs


 83%|████████▎ | 15/18 [1:48:59<19:21, 387.17s/it]

Loaded Image: data/box/NA4675-02_AB.svs


 89%|████████▉ | 16/18 [1:56:03<13:16, 398.20s/it]

Loaded Image: data/box/NA4691-02_AB.svs


 94%|█████████▍| 17/18 [2:02:25<06:33, 393.11s/it]

Loaded Image: data/box/NA4695-02_AB.svs


100%|██████████| 18/18 [2:09:02<00:00, 394.54s/it]


In [8]:
# Resize the single 40x image down to 20x
# for imagename in tqdm(imagenames[-1:]):
#     vips_img = Vips.Image.new_from_file(TRAIN_WSI_DIR + imagename, level=0)
#     vips_img = vips_img.resize(0.5)
#     out = normalizer.transform(vips_img)
#     out.filename = vips_img.filename
#     vips_utils.save_and_tile(out, SAVE_DIR)
#     stats_dict[imagename] = normalizer.image_stats

In [9]:
import pandas as pd
stats = pd.DataFrame(stats_dict)

In [10]:
stats = stats.transpose()

In [11]:
stats.columns = 'means', 'stds'

In [12]:
print(stats)

                                                              means  \
NA3777-02_AB.svs  (84.8768307838968, 1.8838342760856104, 6.54897...   
NA4077-02_AB.svs  (91.3131558033885, 0.7904653477813409, 0.51400...   
NA4092-02_AB.svs  (87.88871953616047, 1.1075666993207716, 0.8033...   
NA4107-02_AB.svs  (90.94294739155603, 0.5887239399026774, 0.9401...   
NA4160-02_AB.svs  (88.89899260669029, 0.9974418293237802, 0.3308...   
NA4195-02_AB.svs  (86.47197838382841, 1.6366618861228261, -0.850...   
NA4256-02_AB.svs  (87.56450328065948, 0.8650946384410637, 0.6600...   
NA4299-02_AB.svs  (86.76713213578492, 1.450629252548881, -1.0392...   
NA4391-02_AB.svs  (82.77101907259605, 1.8684124681141927, -2.024...   
NA4450-02_AB.svs  (88.85473144493126, 0.6891529168668056, 0.2871...   
NA4463-02_AB.svs  (87.63791836872299, 1.2089569343004831, -2.268...   
NA4471-02_AB.svs  (91.11351936053985, 0.6167910832749284, 1.1381...   
NA4553-02_AB.svs  (89.44513337546904, 0.8088347968056662, -2.102...   
NA4626