### 1.1 Preprocessing - Reinhard Normalization and WSI Tiling

As a first preprocessing step, all slides were color normalized with respect to a reference image selected by an expert neuropathologist. Color normalization was performed using the method described by [Reinhard et. al](https://ieeexplore.ieee.org/document/946629).

The resulting color normalized whole slide images were tiled using PyVips to generate 1536 x 1536 images patches.

In [15]:
import os
import glob
import numpy as np
import cv2
import matplotlib.pyplot as plt
import pyvips as Vips
from tqdm import tqdm

import config as cfg
from utils import vips_utils, normalize

In [16]:
TRAIN_WSI_DIR = os.path.join(cfg.data_dir ,'Dataset 1a Development_train')              # WSIs in the training set
VAL_WSI_DIR = os.path.join(cfg.data_dir ,'Dataset 1b Development_validation')           # WSIs in the validation set

SAVE_DIR = os.path.join(cfg.data_dir ,'norm_tiles')

In [17]:
if not os.path.exists(SAVE_DIR):
        os.makedirs(SAVE_DIR)

In [18]:
ref_imagename = 'NA5002_2AB.svs'

In [19]:
print(os.listdir(TRAIN_WSI_DIR))

['NA4229-02_AB.svs', 'NA5001_2AB.svs', 'NA4749-02_AB.svs', 'NA_4865_02_AB1-40.svs', 'NA_4871_02_AB.svs', 'NA5005-02_AB.svs', 'NA5003_2AB.svs', 'NA4312-02_AB.svs', 'NA4757-02_AB.svs', 'NA4471-02_AB.svs', 'NA4751-02_AB.svs', 'NA4185-02_AB.svs', 'NA4898-02_AB17-24.svs', 'NA4711-02_AB.svs', 'NA4885-02_AB17-24.svs', 'NA4137-02_AB.svs', 'NA4722-02_AB.svs', 'NA5002_2AB.svs', 'NA4072-02_AB.svs', 'NA_4882_02_AB.svs', 'NA5004_02_AB.svs', 'NA4144-02_AB.svs', 'NA_4883_02_AB.svs', 'NA4259-02_AB.svs', 'NA_4888_02_AB17-24.svs', 'NA4009-02_AB.svs', 'NA4918-02_AB17-24.svs', 'NA4619-02_AB.svs', 'NA4951-02_AB17-24.svs']


In [20]:
wsi_train = os.listdir(TRAIN_WSI_DIR)
wsi_val = os.listdir(VAL_WSI_DIR)

imagenames = sorted(wsi_val + wsi_train)
imagenames.remove('NA5005-02_AB.svs')             # this WSI was digitalized at 40x, need resize down to 20x
imagenames.append('NA5005-02_AB.svs')

In [21]:
%%time
# Load reference image, fit Reinhard normalizer
ref_image = Vips.Image.new_from_file(os.path.join(TRAIN_WSI_DIR, ref_imagename), level=0)

normalizer = normalize.Reinhard()
normalizer.fit(ref_image)

CPU times: user 53min 3s, sys: 41.4 s, total: 53min 44s
Wall time: 8min 6s


In [None]:
stats_dict = {}
for imagename in tqdm(imagenames[:-1]):
    try:
        vips_img = Vips.Image.new_from_file(os.path.join(TRAIN_WSI_DIR, imagename), level=0)
    except:
        print('Exception {}'.format(imagename))
        vips_img = Vips.Image.new_from_file(os.path.join(VAL_WSI_DIR, imagename), level=0)
    out = normalizer.transform(vips_img)
    out.filename = vips_img.filename
    vips_utils.save_and_tile(out, SAVE_DIR)
    stats_dict[imagename] = normalizer.image_stats

  9%|▉         | 3/32 [43:09<6:39:01, 825.56s/it]

In [None]:
# Resize the single 40x image down to 20x
for imagename in tqdm(imagenames[-1:]):
    vips_img = Vips.Image.new_from_file(os.path.join(TRAIN_WSI_DIR, imagename), level=0)
    vips_img = vips_img.resize(0.5)
    out = normalizer.transform(vips_img)
    out.filename = vips_img.filename
    vips_utils.save_and_tile(out, SAVE_DIR)
    stats_dict[imagename] = normalizer.image_stats

In [None]:
import pandas as pd
stats = pd.DataFrame(stats_dict)

In [None]:
stats = stats.transpose()

In [None]:
stats.columns = 'means', 'stds'