# Image loading and generation notebook

## Notebook setup

In [None]:
# noqa
import os
COLAB = 'DATALAB_DEBUG' in os.environ

if COLAB:
    !apt-get update
    !apt-get install git
    !git clone https://gist.github.com/oskopek/e27ca34cb2b813cae614520e8374e741 bstrap
    import bstrap.bootstrap as bootstrap
else:
    wd = %%pwd
    if wd.endswith('notebooks'):
        print('Current directory:', wd)
        %cd ..
        %pwd
    import resources.our_colab_utils.bootstrap as bootstrap

bootstrap.bootstrap(branch='master', packages='dotmap==1.2.20 keras==2.1.4 pydicom==1.0.2 Pillow==5.0.0')

if COLAB:
    !rm -rf bstrap

## Actual notebook

In [None]:
# noqa
import csv
import os
from dotmap import DotMap

import matplotlib
import matplotlib.pyplot as plt
import numpy as np
from PIL import Image
import pydicom
import skimage.transform

import resources.data.loader as loader
import resources.image_utils as imutils
import resources.synthetic_data as synth_data

%load_ext autoreload
%autoreload 2
%matplotlib inline
matplotlib.rcParams['figure.figsize'] = (10, 10)
plt.rcParams['image.cmap'] = 'gray' # 'viridis', 'gray'

## Data setup

In [None]:
breast_prefix = os.path.abspath('/home/oskopek/local/Breasts')
loader.init(breast_prefix)

## Define custom conversion and plotting

In [None]:
def convert(img, img_meta):
    img = imutils.standardize(img, img_meta)
    img = imutils.downsample(img)

    img_norm = imutils.normalize_gaussian(img)
    return img, img_norm


def show_img(img):
    f = plt.figure(figsize=(16, 8))
    ax = f.add_subplot(1, 2, 1)
    ax2 = f.add_subplot(1, 2, 2)
    ax.imshow(img)
    ax2.hist(np.ravel(img))
    plt.show()

## inBreast test

In [None]:
images, patients = loader.load_inbreast()
filter_id = 'cc9e66c5b31baab8'

for pid, p in patients.items():
    if not pid.startswith(filter_id):
        continue
    print("PatientID:", pid, "#images:", len(p.image_metadata))
    for i, img_meta in enumerate(p.image_metadata.values()):
        print(i + 1, "\t", "Laterality:", img_meta.laterality, "View:", img_meta.view, "BiRads:", img_meta.birads,
              "Cancer:", img_meta.cancer)
        img = imutils.load_image(img_meta.image_path)
        img_small, img_small_gaussian = convert(img, img_meta)
        show_img(img_small_gaussian)


## bcdr test

In [None]:
images, patients = loader.load_bcdr('BCDR-D01')  # 'BCDR-D02', 'BCDR-DN01'
filter_id = 3

for pid, p in patients.items():
    if pid != filter_id:
        continue
    print("PatientID:", pid, "#images:", len(p.image_metadata))
    for i, img_meta in enumerate(p.image_metadata.values()):
        print(i + 1, "\t", "Laterality:", img_meta.laterality, "View:", img_meta.view, "Age:", img_meta.age, "Cancer:",
              img_meta.cancer)
        img = imutils.load_image(img_meta.image_path)
        img_small, img_small_gaussian = convert(img, img_meta)
        show_img(img_small_gaussian)


## Dataset summary stats

In [None]:
def print_info(patients):
    print("Patients:", len(patients))
    print("Images:", sum([len(p.image_metadata.values()) for p in patients.values()]))

    def f(p):
        return [1 if i.cancer else 0 for i in p.image_metadata.values()]

    cancer = [sum(f(p)) for p in patients.values()]
    print("Cancer:", sum(cancer))


In [None]:
print("Inbreast")
_, patients_inb = loader.load_inbreast()
print_info(patients_inb)
print()

print("BCDR-D01")
_, patients_d01 = loader.load_bcdr('BCDR-D01')
print_info(patients_d01)
print()

print("BCDR-D02")
_, patients_d02 = loader.load_bcdr('BCDR-D02')
print_info(patients_d02)
print()

print("BCDR-DN01")
_, patients_dn01 = loader.load_bcdr('BCDR-DN01')
print_info(patients_dn01)
print()

size = (800, 800)
print("Gigabytes for all images in total with size {}: {}".format(
    size, (410 + 260 + 704 + 200) * (size[0] * size[1]) * 8 / 1024**3))


## Convert datasets to trainable CycleGan Images

* Add labels to BCDR images
* Method to filter out CC view and resize and split according to label bcdr
* Add labels to inBreast images
* Method to filter out CC view and resize and split according to label inBreast
* Merge them and copy the images to 2 folders based on label

In [None]:
def filter_view(images):
    res_healthy = []
    res_cancer = []
    for i, image in images.items():
        if image.view == 'CC':
            if image.cancer:
                res_cancer.append(image)
            else:
                res_healthy.append(image)
    return res_healthy, res_cancer


print("Inbreast")
images_inb, _ = loader.load_inbreast()
inb_healthy, inb_cancer = filter_view(images_inb)
print("Healthy:", len(inb_healthy), "Cancer:", len(inb_cancer))
print()

print("BCDR-D01")
images_d01, _ = loader.load_bcdr('BCDR-D01')
d01_healthy, d01_cancer = filter_view(images_d01)
print("Healthy:", len(d01_healthy), "Cancer:", len(d01_cancer))
print()

print("BCDR-D02")
images_d02, _ = loader.load_bcdr('BCDR-D02')
d02_healthy, d02_cancer = filter_view(images_d02)
print("Healthy:", len(d02_healthy), "Cancer:", len(d02_cancer))
print()

print("Overall")
healthy = inb_healthy + d01_healthy + d02_healthy
cancer = inb_cancer + d01_cancer + d02_cancer
print("Healthy:", len(healthy), "Cancer:", len(cancer))

from multiprocessing import Pool as ThreadPool
import imgaug
from imgaug import augmenters as iaa
from imgaug import parameters as iap
from itertools import repeat

SEED = 42

imgaug.seed(SEED)
aug = iaa.Sequential([
    iaa.Affine(rotate=(-4, 4)),
    iaa.Affine(scale={
        "x": (0.98, 1.13),
        "y": (0.98, 1.13)
    }),
    iaa.ContrastNormalization((0.08, 1.2), per_channel=False)
])


def transform_img(img, img_meta, augment=False):
    img = imutils.standardize(img, img_meta)
    img = imutils.downsample(img)

    if augment:
        img = imutils.normalize_gaussian(img)
        img = imutils.normalize(img, new_min=0, new_max=255)
        img = aug.augment_image(img)

    img = imutils.normalize_gaussian(img)
    img = imutils.normalize(img, new_min=-1, new_max=1)
    return img


def f(inp):
    lst, folder, run_id, augment = inp
    imgaug.seed(SEED * run_id)
    for i, img_meta in lst:
        try:
            img = imutils.load_image(img_meta.image_path)
        except:
            print("Failed to load image", img_meta.image_path)
            continue
        img = transform_img(img, img_meta, augment=augment)
        fname = "{:02}_{:03}.dat".format(run_id, i)
        img.tofile(os.path.join(folder, fname))


def transform(lst, folder, run_id, augment):
    THREADS = 8
    batch_size = len(lst) // THREADS + 1
    lst = list(enumerate(lst))
    lst = [lst[i:i + batch_size] for i in range(0, len(lst), batch_size)]
    lst = list(zip(lst, repeat(folder), repeat(run_id), repeat(augment)))
    print("Transforming ({})".format(run_id))
    pool = ThreadPool(THREADS)
    results = pool.map(f, lst)
    print("Transformed ({})".format(run_id))


transformed = os.path.join(breast_prefix, "small_all_256x256")
cancer_folder = os.path.join(transformed, "cancer")
healthy_folder = os.path.join(transformed, "healthy")
if not os.path.exists(healthy_folder):
    os.makedirs(healthy_folder)
if not os.path.exists(cancer_folder):
    os.makedirs(cancer_folder)

for run_id in range(10):
    transform(cancer, cancer_folder, run_id, run_id != 0)

for run_id in range(10):
    transform(healthy, healthy_folder, run_id, run_id != 0)


In [None]:
import matplotlib.pyplot as plt
a = np.fromfile(healthy_folder + '/00_001.dat')
a = np.reshape(a, (256, 256))
show_img(a)

## Synthetic data

In [None]:
data_gen = synth_data.generate_synth(size=(256, 256), max_thresh=2.5)
for i in range(5):
    img, mask, img_meta = next(data_gen)
    # Go from img to img+mask in the GAN
    show_img(img + mask)
    show_img(mask)