In [1]:
%reload_ext autoreload
%autoreload 2
%matplotlib inline

In [2]:
import warnings
warnings.filterwarnings("ignore")

In [3]:
import sqlite3
import numpy as np
from SlideRunner.dataAccess.database import Database
from tqdm import tqdm
from pathlib import Path
import openslide
from random import randint
from Detection.data_loader import *
import pickle
import uuid
import json
import cv2
import tensorflow as tf
import os

In [4]:
path = Path('/data/Datasets/EIPH_WSI/')

database = Database()
database.open(str(path/'EIPH.sqlite'))

files = []
lbl_bbox = []
size = 600
level = 1
num_examples_per_image = 250

In [5]:
getslides = """SELECT uid, filename FROM Slides"""
for currslide, filename in tqdm(database.execute(getslides).fetchall()):
    database.loadIntoMemory(currslide)

    check = True if 'erliner' in filename else False
    slidetype = 'Berliner Blau/' if check else 'Turnbull Blue/'

    slide_path = path / slidetype / filename

    slide = openslide.open_slide(str(slide_path))
    level = level
    level_dimension = slide.level_dimensions[level]
    down_factor = slide.level_downsamples[level]

    classes = {3: 0, 4: 1, 5: 2, 6: 3, 7: 4}
    labels, bboxes = [], []
    for id, annotation in database.annotations.items():
        if annotation.labels[0].classId in classes:
            d = 2 * annotation.r / down_factor
            x_min = (annotation.x1 - annotation.r) / down_factor
            y_min = (annotation.y1 - annotation.r) / down_factor
            x_max = x_min + d
            y_max = y_min + d
            label = classes[annotation.labels[0].classId]

            bboxes.append([int(x_min), int(y_min), int(x_max), int(y_max)])
            labels.append(label)

    if len(bboxes) > 0:
        lbl_bbox.append([bboxes, labels])
        files.append(SlideContainer(slide_path, [[0], [1]] ,level, size, size))

  0%|          | 0/24 [00:00<?, ?it/s]

Loading DB into memory ...


  4%|▍         | 1/24 [00:00<00:10,  2.21it/s]

Loading DB into memory ...


  8%|▊         | 2/24 [00:01<00:20,  1.07it/s]

Loading DB into memory ...


 12%|█▎        | 3/24 [00:02<00:16,  1.24it/s]

Loading DB into memory ...


 21%|██        | 5/24 [00:03<00:11,  1.64it/s]

Loading DB into memory ...
Loading DB into memory ...
Loading DB into memory ...


 29%|██▉       | 7/24 [00:03<00:08,  1.92it/s]

Loading DB into memory ...


 38%|███▊      | 9/24 [00:04<00:06,  2.22it/s]

Loading DB into memory ...
Loading DB into memory ...


 46%|████▌     | 11/24 [00:04<00:05,  2.53it/s]

Loading DB into memory ...
Loading DB into memory ...


 58%|█████▊    | 14/24 [00:04<00:03,  2.88it/s]

Loading DB into memory ...
Loading DB into memory ...
Loading DB into memory ...
Loading DB into memory ...


 67%|██████▋   | 16/24 [00:05<00:02,  2.85it/s]

Loading DB into memory ...


 75%|███████▌  | 18/24 [00:05<00:01,  3.01it/s]

Loading DB into memory ...
Loading DB into memory ...


 79%|███████▉  | 19/24 [00:06<00:01,  3.11it/s]

Loading DB into memory ...
Loading DB into memory ...


 88%|████████▊ | 21/24 [00:06<00:00,  3.07it/s]

Loading DB into memory ...
Loading DB into memory ...


100%|██████████| 24/24 [00:07<00:00,  3.00it/s]

Loading DB into memory ...





In [6]:
img2bbox = dict(zip(files, np.array(lbl_bbox)))
get_y_func = lambda o: img2bbox[o]
w, h = size, size

train_files = files[4:]
valid_files = files[:4]

In [7]:
def extract_image_with_boxes(file: SlideContainer, boxes, labels, classes, num_examples_per_image):
    image_x, image_y = [], []

    for i in range(num_examples_per_image):
        class_id = np.random.choice(classes, 1)[0]
        ids = labels == class_id
        xmin, ymin, xmax, ymax = np.array(boxes)[ids][randint(0, np.count_nonzero(ids) - 1)]

        x = int(xmin - w / 2)
        y = int(ymin - h / 2)

        # select_boxes
        select_boxes = np.copy(boxes)
        select_boxes[:, [0, 2]] = select_boxes[:, [0, 2]] - x
        select_boxes[:, [1, 3]] = select_boxes[:, [1, 3]] - y

        bb_widths = (select_boxes[:, 2] - select_boxes[:, 0]) / 2
        bb_heights = (select_boxes[:, 3] - select_boxes[:, 1]) / 2

        ids = ((select_boxes[:, 0] + bb_widths) > 0) \
              & ((select_boxes[:, 1] + bb_heights) > 0) \
              & ((select_boxes[:, 2] - bb_widths) < w) \
              & ((select_boxes[:, 3] - bb_heights) < h)

        select_labels = np.copy(labels)[ids]
        select_boxes = np.copy(select_boxes)[ids]

        patch = file.get_patch(x,y)

        image_x.append(patch)
        image_y.append([select_boxes, select_labels])

    return image_x, image_y

In [8]:
def image_to_feature_dict(image, y, size, image_id, image_format="png"):
    boxes, labels = y

    filename = image_id

    encoded_jpg = cv2.imencode('.{}'.format(image_format), image[:, :, [2,1,0]])[1].tostring()

    key = hashlib.sha256(encoded_jpg).hexdigest()

    encoded_image_data = encoded_jpg  # Encoded image bytes
    image_format = b'png' if image_format == "png" else b'jpeg'

    xmins = []  # List of normalized left x coordinates in bounding box (1 per box)
    xmaxs = []  # List of normalized right x coordinates in bounding box
    # (1 per box)
    ymins = []  # List of normalized top y coordinates in bounding box (1 per box)
    ymaxs = []  # List of normalized bottom y coordinates in bounding box
    # (1 per box)
    classes_text = []  # List of string class name of bounding box (1 per box)
    classes = []  # List of integer class id of bounding box (1 per box)
    encoded_mask_png_list = []  # for each rect the mask as png encoded

    for box, label in zip(boxes, labels):
        classes_text.append(str(label).encode('utf8'))
        classes.append(label + 1)

        ##calculate BBoxes
        x_min = max(0, int(box[0]))
        y_min = max(0, int(box[1]))

        x_max = min(size, (int(box[2])))
        y_max = min(size, (int(box[3])))

        xmins.append(float(x_min / size))
        ymins.append(float(y_min / size))

        xmaxs.append(float(x_max / size))
        ymaxs.append(float(y_max / size))

    feature_dict = {
        'image/height': tf.train.Feature(int64_list=tf.train.Int64List(value=[size])),
        'image/width': tf.train.Feature(int64_list=tf.train.Int64List(value=[size])),
        'image/filename': tf.train.Feature(bytes_list=tf.train.BytesList(value=[filename.encode('utf8')])),
        'image/source_id': tf.train.Feature(bytes_list=tf.train.BytesList(value=[filename.encode('utf8')])),
        'image/key/sha256': tf.train.Feature(bytes_list=tf.train.BytesList(value=[key.encode('utf8')])),
        'image/encoded': tf.train.Feature(bytes_list=tf.train.BytesList(value=[encoded_image_data])),
        'image/format': tf.train.Feature(bytes_list=tf.train.BytesList(value=[image_format])),
        'image/object/bbox/xmin': tf.train.Feature(float_list=tf.train.FloatList(value=xmins)),
        'image/object/bbox/xmax': tf.train.Feature(float_list=tf.train.FloatList(value=xmaxs)),
        'image/object/bbox/ymin': tf.train.Feature(float_list=tf.train.FloatList(value=ymins)),
        'image/object/bbox/ymax': tf.train.Feature(float_list=tf.train.FloatList(value=ymaxs)),
        'image/object/class/text': tf.train.Feature(bytes_list=tf.train.BytesList(value=classes_text)),
        'image/object/class/label': tf.train.Feature(int64_list=tf.train.Int64List(value=classes)),
    }

    return tf.train.Example(features=tf.train.Features(feature=feature_dict))

In [9]:
def convert_images_to_tfrecord(files, path):

    for file in tqdm(files):
        boxes, labels = get_y_func(file)
        boxes = np.array(boxes)
        labels = np.array(labels)
        classes = list(set(labels))

        x_batch, y_batch = extract_image_with_boxes(file,
                                                    boxes,
                                                    labels,
                                                    classes,
                                                    num_examples_per_image)

        filename = file.file.stem
        writer = tf.python_io.TFRecordWriter(str(path) + "/" + filename + ".tfrecord")

        image_id = 0
        for image, y in zip(x_batch, y_batch):
            tf_example = image_to_feature_dict(image, y, size, filename + "_" + str(image_id) + ".png")
            writer.write(tf_example.SerializeToString())

            image_id += 1

        writer.close()

In [10]:
train_path = Path('/data/Datasets/EIPH_WSI/RCNN-Patches/{0}_{1}_API/train/'.format(size, level))
val_path = Path('/data/Datasets/EIPH_WSI/RCNN-Patches/{0}_{1}_API/val/'.format(size, level))

In [11]:
for folder in [train_path, val_path]:
    if not os.path.exists(folder):
        os.makedirs(folder)

In [12]:
convert_images_to_tfrecord(train_files, train_path)

100%|██████████| 13/13 [04:19<00:00, 19.98s/it]


In [13]:
convert_images_to_tfrecord(valid_files, val_path)

100%|██████████| 4/4 [01:22<00:00, 20.56s/it]


### Train comands


#### Model:
export CUDA_VISIBLE_DEVICES=0

python /home/c.marzahl@de.eu.local/ProgProjekte/Demo/models/research/object_detection/legacy/train.py --logtostderr --pipeline_config_path=pipeline.config --train_dir=train/baseline

#### Eval:
export CUDA_VISIBLE_DEVICES=1

python /home/c.marzahl@de.eu.local/ProgProjekte/Demo/models/research/object_detection/legacy/eval.py --logtostderr --pipeline_config_path=pipeline.config --checkpoint_dir=train/baseline --eval_dir=eval/baseline

#### Tensorboard:
tensorboard --logdir=./ --port=6007

#### Compile:
python /home/c.marzahl@de.eu.local/ProgProjekte/Demo/models/research/object_detection/export_inference_graph.py --input_type image_tensor --pipeline_config_path=pipeline.config --trained_checkpoint_prefix=model.ckpt-20000 --output_directory=inference