### 1.1 Preprocessing - Reinhard Normalization and WSI Tiling

As a first preprocessing step, all slides were color normalized with respect to a reference image selected by an expert neuropathologist. Color normalization was performed using the method described by [Reinhard et. al](https://ieeexplore.ieee.org/document/946629).

The resulting color normalized whole slide images were tiled using PyVips to generate 1536 x 1536 images patches.

In [1]:
import os
import glob
import numpy as np
import cv2
import matplotlib.pyplot as plt
import pyvips as Vips
from tqdm import tqdm

import config as cfg
from utils import vips_utils, normalize

In [2]:
TRAIN_WSI_DIR = os.path.join(cfg.data_dir ,'Dataset 1a Development_train')              # WSIs in the training set
VAL_WSI_DIR = os.path.join(cfg.data_dir ,'Dataset 1b Development_validation')           # WSIs in the validation set

SAVE_DIR = os.path.join(cfg.data_dir ,'norm_tiles')

In [3]:
if not os.path.exists(SAVE_DIR):
        os.makedirs(SAVE_DIR)

In [4]:
ref_imagename = 'NA5002_2AB.svs'

In [5]:
print(os.listdir(TRAIN_WSI_DIR))

['NA4229-02_AB.svs', 'NA5001_2AB.svs', 'NA4749-02_AB.svs', 'NA_4865_02_AB1-40.svs', 'NA_4871_02_AB.svs', 'NA5005-02_AB.svs', 'NA5003_2AB.svs', 'NA4312-02_AB.svs', 'NA4757-02_AB.svs', 'NA4471-02_AB.svs', 'NA4751-02_AB.svs', 'NA4185-02_AB.svs', 'NA4898-02_AB17-24.svs', 'NA4711-02_AB.svs', 'NA4885-02_AB17-24.svs', 'NA4137-02_AB.svs', 'NA4722-02_AB.svs', 'NA5002_2AB.svs', 'NA4072-02_AB.svs', 'NA_4882_02_AB.svs', 'NA5004_02_AB.svs', 'NA4144-02_AB.svs', 'NA_4883_02_AB.svs', 'NA4259-02_AB.svs', 'NA_4888_02_AB17-24.svs', 'NA4009-02_AB.svs', 'NA4918-02_AB17-24.svs', 'NA4619-02_AB.svs', 'NA4951-02_AB17-24.svs']


In [6]:
wsi_train = os.listdir(TRAIN_WSI_DIR)
wsi_val = os.listdir(VAL_WSI_DIR)

imagenames = sorted(wsi_val + wsi_train)
imagenames.remove('NA5005-02_AB.svs')             # this WSI was digitalized at 40x, need resize down to 20x
imagenames.append('NA5005-02_AB.svs')

In [7]:
%%time
# Load reference image, fit Reinhard normalizer
ref_image = Vips.Image.new_from_file(os.path.join(TRAIN_WSI_DIR, ref_imagename), level=0)

normalizer = normalize.Reinhard()
normalizer.fit(ref_image)

CPU times: user 48min 40s, sys: 1min, total: 49min 41s
Wall time: 1min 24s


In [8]:
stats_dict = {}
for imagename in tqdm(imagenames[:-1]):
    try:
        vips_img = Vips.Image.new_from_file(os.path.join(TRAIN_WSI_DIR, imagename), level=0)
    except:
        print('Exception {}'.format(imagename))
        vips_img = Vips.Image.new_from_file(os.path.join(VAL_WSI_DIR, imagename), level=0)
    out = normalizer.transform(vips_img)
    out.filename = vips_img.filename
    vips_utils.save_and_tile(out, SAVE_DIR)
    stats_dict[imagename] = normalizer.image_stats

 88%|████████▊ | 28/32 [59:25<08:42, 130.50s/it] 

Exception NA_4896_02_AB17-24.svs


 91%|█████████ | 29/32 [1:01:44<06:39, 133.15s/it]

Exception NA_4928_02_AB17-24.svs


 94%|█████████▍| 30/32 [1:04:12<04:35, 137.53s/it]

Exception NA_4930_02_AB14-24.svs


 97%|█████████▋| 31/32 [1:06:12<02:12, 132.24s/it]

Exception NA_4933_02_AB17-24.svs


100%|██████████| 32/32 [1:07:57<00:00, 127.43s/it]


In [9]:
# Resize the single 40x image down to 20x
for imagename in tqdm(imagenames[-1:]):
    vips_img = Vips.Image.new_from_file(os.path.join(TRAIN_WSI_DIR, imagename), level=0)
    vips_img = vips_img.resize(0.5)
    out = normalizer.transform(vips_img)
    out.filename = vips_img.filename
    vips_utils.save_and_tile(out, SAVE_DIR)
    stats_dict[imagename] = normalizer.image_stats

100%|██████████| 1/1 [04:52<00:00, 292.61s/it]


In [10]:
import pandas as pd
stats = pd.DataFrame(stats_dict)

In [11]:
stats = stats.transpose()

In [12]:
stats.columns = 'means', 'stds'

In [13]:
stats

Unnamed: 0,means,stds
NA4009-02_AB.svs,"(88.79989640676122, 0.7796811779427335, 1.5307...","(13.309693197292201, 3.614022951491436, 5.6388..."
NA4072-02_AB.svs,"(85.45899856981691, 1.0290791360964993, -1.003...","(18.809617640025696, 3.386271428449547, 4.0524..."
NA4137-02_AB.svs,"(89.37619838926987, 1.1594221235255688, -1.090...","(9.656529501479323, 3.717221278802593, 4.60034..."
NA4144-02_AB.svs,"(87.65374174713389, 1.5541811566135384, -0.218...","(10.825812591988118, 4.0374352279298185, 6.132..."
NA4185-02_AB.svs,"(90.2536016711242, 0.7139148105981887, 0.49942...","(9.081633753173017, 3.532839740620599, 5.06540..."
NA4229-02_AB.svs,"(89.39547476784755, 0.4691240658320603, 1.1783...","(8.686648322680137, 3.555328175935113, 6.07685..."
NA4259-02_AB.svs,"(88.9692294878472, 0.6069457818993456, -0.0603...","(10.01264175613563, 3.5489507210693523, 5.4370..."
NA4312-02_AB.svs,"(91.08530689405714, 0.40543170409731627, 1.256...","(7.662059392580917, 2.3359701345727513, 4.3557..."
NA4471-02_AB.svs,"(91.11351936053985, 0.6167910832749283, 1.1381...","(7.327277629001215, 2.154893433506231, 3.47540..."
NA4619-02_AB.svs,"(87.36757718025666, 0.9721181358150474, -0.514...","(14.552723842003907, 3.4633622879807118, 5.078..."
