In [41]:
import numpy as np
import pandas as pd 
import io 
import random
import itertools
import numpy as np
import cv2
import time
import torchvision.transforms as T
import gc
from bokeh.plotting import ColumnDataSource, figure, output_notebook, show
from bokeh.models import Text, Rect, HoverTool, Range1d
import panel as pn
from fastai.vision import *
from PIL import Image as PILImage
from ipywidgets import Label, Button, FileUpload, Output, VBox, AppLayout, Layout, Dropdown
from IPython.core.display import HTML
import warnings
warnings.filterwarnings('ignore')

VERBOSE=False

TEXTS = {"plot_title" : {"en" : "Detected Gleason Patterns", "de" : "Erkannte Gleason Muster"},
         "dpd_lang" : {"en" : "Language", "de" : "Sprache"},
         "btn_doc" : {"en" : "Show Documentation", "de" : "Dokumentation Anzeigen"},
         "btn_karolinska" : {"en" : "Karolinksa Sample Biopsy", "de" : "Karolinksa-Biopsie Beispiel"},
         "btn_radboud" : {"en" : "Radboud Sample Biopsy", "de" : "Radboud-Biopsie Beispiel"},
         "btn_header" : {"en" : "Gleason Pattern Spotter", "de" : "Gleason Pattern Spotter"},
         "btn_upload" : {"en" : "Upload Biopsy", "de" : "Biopsie hochladen"},
         "btn_status_init" : {"en" : "", "de" : ""},
         "btn_status_progress" : {"en" : "Processing - ", "de" : "Verarbeitung läuft - "},
         "btn_status_default" : {"en" : "Please Wait", "de" : "Bitte Warten"},
         "btn_status_load" : {"en" : "Loading Image", "de" : "Bild wird geladen"},
         "btn_status_detect" : {"en" : "Detecting Gleason Patterns", "de" : "Gleason Muster werden ausgewertet"},
         "btn_status_ready" : {"en" : "Ready For User Input", "de" : "Bereit für Benutzereingabe"},
         "btn_status_error" : {"en" : "Error Loading Image", "de" : "Fehler beim Abruf des Bildes"},
        }

HTML_EN = """<div class="jp-RenderedHTMLCommon jp-RenderedMarkdown jp-MarkdownOutput " data-mime-type="text/markdown">
<h1 id="Gleason-Pattern-Spotter">Gleason Pattern Spotter<a class="anchor-link" href="#Gleason-Pattern-Spotter">¶</a></h1><p>The Gleason Pattern Spotter is a web application that takes a prostate cancer image, divides the image into a grid of several boxes, and then detects the Gleason Pattern for each box.</p>
<p>Wait, what is a Gleason Pattern? This application is the result of my approach to the <a href="https://www.kaggle.com/c/prostate-cancer-grade-assessment">PANDA Challenge</a>. Please refer to the <a href="https://www.kaggle.com/c/prostate-cancer-grade-assessment">competition website</a> for more information.</p>
<p>Due to bugs in my code I wasn't able to submit the approach to the competition. This application is designed to inspect the image, the boxes and the detected patterns.</p>
<p><font size="3" color="red">!!! Please do not take the predicted Gleason patterns for real. The results are likely to be wrong !!!</font></p>
<p>By pressing one of the buttons "Karolinska Biopsy" or "Radboud Biopsy", a random image of the one those Data Providers will be taken, processed and displayed below. (Additionally you can upload your own Biopsy by using the "Upload Biopsy" Button).</p>
<p>The image and its predictions, will apear below and can be interactively analyzed using the toolbar in the top right corner.</p>
<table>
<thead><tr>
<th>Icon</th>
<th>Name</th>
<th>Description</th>
</tr>
</thead>
<tbody>
<tr>
<td><img src="https://docs.bokeh.org/en/latest/_images/Pan.png" alt=""></td>
<td>Pan</td>
<td>The pan tool allows the user to pan the plot by left-dragging a mouse or dragging a finger across the plot region.</td>
</tr>
<tr>
<td><img src="https://docs.bokeh.org/en/latest/_images/BoxZoom.png" alt=""></td>
<td>BoxZoom</td>
<td>The box zoom tool allows the user to define a rectangular region to zoom the plot bounds to. This is done by left-dragging a mouse, or dragging a finger across the plot area.</td>
</tr>
<tr>
<td><img src="https://docs.bokeh.org/en/latest/_images/WheelZoom.png" alt=""></td>
<td>WheelZoom</td>
<td>The wheel zoom tool will zoom the plot in and out, centered on the current mouse location. It will respect any min and max values and ranges, preventing zooming in and out beyond these values.</td>
</tr>
<tr>
<td><img src="https://docs.bokeh.org/en/latest/_images/Save.png" alt=""></td>
<td>Save</td>
<td>The save tool pops up a modal dialog that allows the user to save a PNG image of the plot.</td>
</tr>
<tr>
<td><img src="https://docs.bokeh.org/en/latest/_images/Reset.png" alt=""></td>
<td>Reset</td>
<td>The reset tool will restore the plot ranges to their original values.</td>
</tr>
</tbody>
</table>
</div>"""

HTML_DE = """<div class="jp-RenderedHTMLCommon jp-RenderedMarkdown jp-MarkdownOutput " data-mime-type="text/markdown">
<h1 id="Gleason-Pattern-Spotter">Gleason Pattern Spotter<a class="anchor-link" href="#Gleason-Pattern-Spotter">¶</a></h1><p>Der Gleason Pattern Spotter ist eine Webanwendung, die ein Prostatakrebsbild aufnimmt, das Bild in ein Gitter aus mehreren Kästchen unterteilt und dann das Gleason-Muster für jedes Kästchen ermittelt.</p>
<p>Was ist ein Gleason-Muster? Diese Anwendung ist das Ergebnis meines Ansatzes bei der <a href="https://www.kaggle.com/c/prostate-cancer-grade-assessment">PANDA Challenge</a>. Weitere Informationen finden Sie auf der <a href="https://www.kaggle.com/c/prostate-cancer-grade-assessment">Wettbewerbs-Website</a>.</p>
<p>Aufgrund von Fehlern in meinem Code war es mir nicht möglich, den Ansatz für den Wettbewerb einzureichen. Diese Anwendung dient dazu, das Bild, die Kästchen und die erkannten Muster zu untersuchen.</p>
<p><font size="3" color="red">!!! Bitte trauen Sie den ermittelten Gleason-Muster nicht. Die Ergebnisse werden wahrscheinlich fehlerhaft sein !!!</font></p>
<p>Durch Drücken einer der Schaltflächen "Karolinska-Biopsie" oder "Radboud-Biopsie" wird ein zufälliges Bild von demjenigen dieser Datenlieferanten aufgenommen, verarbeitet und unten angezeigt. (Zusätzlich können Sie Ihre eigene Biopsie hochladen, indem Sie den Button "Biopsie hochladen" betätigen).</p>
<p>Das Bild und seine Vorhersagen können interaktiv mit Hilfe der Symbolleiste in der oberen rechten Ecke des Diagramms analysiert werden.</p>
<table>
<thead><tr>
<th>Icon</th>
<th>Name</th>
<th>Beschreibung</th>
</tr>
</thead>
<tbody>
<tr>
<td><img src="https://docs.bokeh.org/en/latest/_images/Pan.png" alt=""></td>
<td>Verschieben</td>
<td>Mit dem Verschieben-Werkzeug kann der Benutzer den das Bild verschieben.</td>
</tr>
<tr>
<td><img src="https://docs.bokeh.org/en/latest/_images/BoxZoom.png" alt=""></td>
<td>Box-Zoom</td>
<td>Mit dem Box-Zoom-Werkzeug kann der Benutzer einen rechteckigen Bereich definieren, auf den die Diagrammgrenzen gezoomt werden. Dies geschieht durch Ziehen mit der linken Maustaste oder durch Ziehen mit dem Finger über den Diagrammbereich.</td>
</tr>
<tr>
<td><img src="https://docs.bokeh.org/en/latest/_images/WheelZoom.png" alt=""></td>
<td>Rad-Zoom</td>
<td>Mit dem Rad-Zoom-Werkzeug wird die Darstellung ein- und ausgezoomt, zentriert auf die aktuelle Mausposition.</td>
</tr>
<tr>
<td><img src="https://docs.bokeh.org/en/latest/_images/Save.png" alt=""></td>
<td>Speichern</td>
<td>Das Speicherwerkzeug öffnet einen Dialog, der es dem Benutzer ermöglicht, ein PNG-Bild des Diagramms zu speichern.</td>
</tr>
<tr>
<td><img src="https://docs.bokeh.org/en/latest/_images/Reset.png" alt=""></td>
<td>Zurücksetzen</td>
<td>Das Rücksetzwerkzeug stellt das Diagramm auf ihre ursprünglichen Werte zurück.</td>
</tr>
</tbody>
</table>
</div>"""

DOC = {"en" : HTML_EN,
       "de" : HTML_DE}

lang="en"

def accuracy_multi(inp, targ, thresh=0.5, sigmoid=True):
    "Compute accuracy when `inp` and `targ` are the same size."
    if sigmoid: inp = inp.sigmoid()
    return ((inp>thresh)==targ.bool()).float().mean()

In [42]:
# Load model and define prediction function
learn = load_learner('', 'trained_model_fastai_resnet50.pkl')

def predictPatch(image, x, y, size, confidence):
    patch = image.crop((x, y, x+size, y+size))
    c, l, preds = learn.predict(Image(T.ToTensor()(patch.convert('RGB'))))
    mostconfident = learn.data.classes[preds.argmax()]
    pname = ';'.join(cls for cls,onehot in zip(learn.data.classes,l) if onehot==1)
    tmp = dict(zip(learn.data.classes, ["{0:.2%}".format(p) for p in preds]))
    tmp['pred'] = pname
    tmp['x'] = x+size//2
    tmp['y'] = y+size//2
    tmp['line_clr'] = 'grey' if preds.argmax()<confidence else COLORMAP[mostconfident]
    
    return tmp

In [43]:
# Hack to get the dynamic urls of the images
import urllib
import requests
from bs4 import BeautifulSoup

IMAGE_PROVIDER_MAP = {'658be675223ffa5509203009eb7bba89.png' : 'radboud',
                      'c306e9bf52ed33801003d17ae3f60d32.png' : 'radboud',
                      'b0ee38976e71dfbfba705974713ee612.png' : 'radboud',
                      '8a21fc0aed2fd06a8b3fe4824c53413e.png' : 'radboud',
                      '6a5cc0b66b8d4af9c13f1303c6de5ffb.png' : 'radboud',
                      '58d13c5b7c1d86814e7a9451fef0d9d5.png' : 'radboud',
                      '36eeee47a868b8ac66b6ece9e6372984.png' : 'radboud',
                      '35f629e359399d425289242b698b3f66.png' : 'radboud',
                      '345f3e86a3dcc26dce73ceeacc5a4a0a.png' : 'radboud',
                      'c4b7ca39cc464a7af7f80fb9f4bc05c3.png' : 'karolinska',
                      'acc79fc48cdb1a2c730a0b2c15b0355d.png' : 'karolinska',
                      'a6a07e971c280f5cee808a92a5b06e70.png' : 'karolinska',
                      '89d137c7fb29df7596e6824181b7d5c9.png' : 'karolinska',
                      'fe1d0b156ddba991585791769dc9ede9.png' : 'karolinska',
                      '5d35e9a318e04684020a42c3a2ca19fa.png' : 'karolinska',
                      '2665dd5db932525f171f931c16112c65.png' : 'karolinska',
                      '0d7666fdc3372c68a080c612081e2b45.png' : 'karolinska',
                      'fd1deb6f87696b6372ea9c9b109b4ccf.png' : 'karolinska'}

provider_urls = {'radboud': [],
                 'karolinska': []
                }

url = "https://www.kaggle.com/jackbyte/sample-gleason-biopsy-pictures"
hack_failed = False

try:
    r = urllib.request.urlopen(url).read()

    soup = BeautifulSoup(r, 'html.parser')

    splits = str(soup).split('img')
    urls = [s for s in splits if s in [img for img in IMAGE_PROVIDER_MAP]]
    len(urls)



    splits = str(soup).split(',')

    dl_urls = [s[15:-1] for s in splits if 'downloadUrl' in s]

    for img in IMAGE_PROVIDER_MAP:
        provider = IMAGE_PROVIDER_MAP[img]
        for i, url in enumerate(dl_urls):
            if img in url:
                provider_urls[provider] = provider_urls[provider] + [dl_urls[i]]
except:
    hack_failed = True

In [44]:
# Image and grid functions
def getSampleImageFromUrl(provider='radboud'):
    """Given a 'provider' this function will return an sample/random image
    from the notebook https://www.kaggle.com/jackbyte/sample-gleason-biopsy-pictures."""
    url = random.sample(list(provider_urls[provider]),1)[0]
    
    return PILImage.open(requests.get(url, stream=True).raw)

def calculateGridCoords(w, h, 
                        max_box_amount=100, 
                        init_pixels=256,
                        max_overlap=0.5):
    """Given widht 'w' and height 'h' this function will return a list
    of the x,y coords of (square) boxes and the pixel size of those boxes.
    
    The size of the boxes will be 'init_pixels' if the amount of the boxes is
    max_box_amount. If not, the size will be doubled until the amount is less
    than max_box_amount.
    x and ys of thiswithin the
    limits of 'w' and 'h'"""
    pixels = init_pixels
    coords = [i for i in range(max_box_amount)]
    too_many_boxes = True
    while too_many_boxes:
        
        # find x and y positions of box
        xes = [i*pixels for i in range(w//pixels)] 
        ys = [i*pixels for i in range(h//pixels)]
        
        # Add additional boxes if there is space left
        # and the additional box doesn't overlap more
        # than 'max_overlap'
        if (w%pixels!=0)&((w%pixels)/pixels>max_overlap):
            xes = xes + [w-pixels]
        if (h%pixels!=0)&((h%pixels)/pixels>max_overlap):
            ys = ys + [h-pixels]
        
        # list of all coordinates that have at least pixel_score pixels that are on the mask
        coords = [(x,y) for (x,y) in list(itertools.product(xes, ys)) ]
        
        
        if len(coords)>max_box_amount:
            pixels = pixels*2
        else:
            too_many_boxes = False
    
    return pixels, coords

In [45]:
# cv2 magic for only giving boxes that not white background
def maskoff(img, coords, box_size, thr1=200, thr2=200, 
            krnlr=3,krnlc=3, target_dim=(8192, 26368), 
            pixel_score=0.25, verbose=False):
    """Given 'img', 'coords', and 'box_size' this function will  to return 
    - a resized version of the image
    - a matching image mask for the resized image
    - a image mask for the original image
    - a list of the x,y coords of (square 'box_size(d)') boxes 
      that overlap with the foreground of the original sized mask"""
    
    resized = cv2.resize(img, (256,256), interpolation = cv2.INTER_AREA)
    
    edges = cv2.Canny(resized, thr1,thr2)
    kernel = np.ones((krnlr,krnlc), np.uint8)
    closing = cv2.morphologyEx(edges, cv2.MORPH_CLOSE, kernel, iterations=3)
    erosion = cv2.morphologyEx(closing, cv2.MORPH_ERODE, kernel, iterations=1)

    # When using Grabcut the mask image should be:
    #    0 - sure background
    #    1 - sure foreground
    #    2 - unknown

    mask = np.zeros(resized.shape[:2], np.uint8)
    mask[:] = 2
    mask[erosion == 255] = 1

    bgdmodel = np.zeros((1, 65), np.float64)
    fgdmodel = np.zeros((1, 65), np.float64)

    out_mask = mask.copy()
    start = time.time()
    if verbose: print('[maskoff] resized.shape', resized.shape)
    out_mask, _, _ = cv2.grabCut(resized,out_mask,None,bgdmodel,fgdmodel,1,cv2.GC_INIT_WITH_MASK)
    end = time.time()
    if verbose: print('[maskoff] grabcut took', end-start)
    out_mask = np.where((out_mask==2)|(out_mask==0),0,1).astype('uint8')
    
    out_mask_small = out_mask.copy()
    
    out_mask = cv2.resize(out_mask, target_dim, interpolation = cv2.INTER_CUBIC)
    
    # list of all coordinates that have at least pixel_score pixels that are on the mask
    out_coords = [(x,y) for (x,y) in coords if out_mask[y:y+box_size,x:x+box_size].sum()/(box_size*box_size)>pixel_score]
    
    return resized, out_mask_small, out_mask, out_coords

In [46]:
# data viz stuff
TOOLTIPS = [("Classification", "@pred"),
            ("Gleason_3 confidence", "@Gleason_3"),
            ("Gleason_4 confidence", "@Gleason_4"),
            ("Gleason_5 confidence", "@Gleason_5"),
            ("benign_epithelium confidence", "@benign_epithelium"),
           ]

class_confidence = [(f'{c}  confidence', f'@{{{c}}}')for c in learn.data.classes]
TOOLTIPS = [("Classification", "@pred")] + class_confidence


COLORMAP = {'Gleason3': 'yellow',
            'Gleason4': 'salmon',
            'Gleason5': 'red',
            'benign epithelium': 'forestgreen',
            'stroma': 'forestgreen'}


def bokehplotImage(image, grayscale=False):
    if grayscale:
        image= image.convert("L")
    
    # resize the image 
    w, h = image.size
    while w*h>2**21:
        image.thumbnail(size=[v//2 for v in image.size])
        w, h = image.size
    
    p = figure(sizing_mode='stretch_both',
               title=TEXTS["plot_title"][lang],
               output_backend="webgl")
    p.x_range=(Range1d(0,p.plot_width))
    p.y_range=(Range1d(0,p.plot_height))
    im = image.convert("RGBA")
    # convert to numpy and flip (https://github.com/bokeh/bokeh/issues/1666)
    imarray = np.array(im)[::-1]
    
    p.image_rgba(image=[imarray], x=0,y=0, dw=w, dh=h)
    
    return p

def bokehplotPatterns(image, confidence=0.7,  filename=None, verbose=False, grayscale=False):
    """Given 'imgage' this function will return a bokeh figure containing
    - the input image
    - rectangle boxes that have been classified a Gleason pattern"""
    start = time.time()

    if verbose: print(image.size)

    # transform to cv image (maskoff uses cv2.grabCut) and flip (https://github.com/bokeh/bokeh/issues/1666)
    img_cv = np.array(image.convert('RGB'))[::-1]
    img = img_cv[:, :, ::-1].copy()

    w, h = image.size
    size, crds = calculateGridCoords(w, h, init_pixels=8)
    if verbose: print('Box size', size)
    _, _, _, coords = maskoff(img, coords=crds, box_size=size, target_dim=image.size, pixel_score=0.05)

    data = [predictPatch(image, x, y, size, confidence) for x,y in coords]
    #data = []
    # Classify the boxes
    #for j, coord in enumerate(coords):
    #    x,y = coord
    #    patch = image.crop((x, y, x+size, y+size))
    #    c, l, preds = learn.predict(Image(T.ToTensor()(patch.convert('RGB'))))
    #    pname = learn.data.classes[l]
    #    tmp = dict(zip(learn.data.classes, ["{0:.2%}".format(p) for p in preds]))
    #    tmp['pred'] = pname
    #    tmp['x'] = x+size//2
    #    tmp['y'] = y+size//2
    #    tmp['line_clr'] = 'grey' if preds[l]<confidence else COLORMAP[pname]
    #    data.append(tmp)
    #    del patch
    #end = time.time()
    df = pd.DataFrame(data)
    source = ColumnDataSource(data=df) 

    rects = Rect(x='x',y='y', width=size, height=size, line_color='line_clr', fill_color=None)
    
    p = bokehplotImage(image, grayscale)
    master = p.add_glyph(source_or_glyph=source, glyph=rects)
    master_hover = HoverTool(renderers=[master],
                            tooltips=TOOLTIPS)

    p.add_tools(master_hover)
    p.x_range=(Range1d(0,p.plot_width))
    p.y_range=(Range1d(0,p.plot_height))
    if verbose: print('Took', end-start, 'to process image found', len(coords), 'patches')
    
    return p

In [None]:
# GUI
pn.extension()

# defining widgets
dpd_lang = Dropdown(options=['en', 'de'], value='en', 
                    description=TEXTS["dpd_lang"][lang], layout=Layout(height='auto', width='auto'))
btn_doc = Button(description=TEXTS["btn_doc"][lang], layout=Layout(height='auto', width='auto'))
btn_karolisnka = Button(description=TEXTS["btn_karolinska"][lang], layout=Layout(height='auto', width='auto'))
btn_radboud  = Button(description=TEXTS["btn_radboud"][lang], layout=Layout(height='auto', width='auto'))
btn_upload = FileUpload(description=TEXTS["btn_upload"][lang], multiple=False, layout=Layout(height='auto', width='auto'))
btn_header = Button(description=TEXTS["btn_header"][lang], disabled=True, layout=Layout(height='auto', width='auto'))
btn_status = Button(description=TEXTS["btn_status_init"][lang], disabled=True, layout=Layout(height='auto', width='auto'))
pre_output = Output(clear_output=True)
output = Output(clear_output=True)

# defining event functions
def displayWaitMessage(message=TEXTS["btn_status_default"][lang]):
    btn_status.description = f'{TEXTS["btn_status_progress"][lang]} {message}'
    btn_status.style.button_color = 'yellow'

def displayReadyness():
    btn_status.description = TEXTS["btn_status_ready"][lang]
    btn_status.style.button_color = 'lightgreen'

def outputImage(img):
    with pre_output:
        displayWaitMessage(TEXTS["btn_status_load"][lang])
        start = time.time()
        if VERBOSE: print(img.size)
        p = bokehplotImage(img)
        display(pn.pane.Bokeh(p))
        displayWaitMessage(TEXTS["btn_status_detect"][lang])
        output.clear_output()
        end = time.time()
        if VERBOSE: print('took', end-start, 'for displaying the image')
        start = time.time()
    with output:
        p = bokehplotPatterns(img)
        display(pn.pane.Bokeh(p))
        pre_output.clear_output()
        end = time.time()
        if VERBOSE: print('took', end-start, 'for displaying the preds')
        displayReadyness()
    
def processSampleImage(provider='karolinska'):
    emptyPlot()
    if hack_failed:
        btn_status.description = TEXTS["btn_status_error"][lang]
        btn_status.style.button_color = 'red'
    else:
        start = time.time()
        img = getSampleImageFromUrl(provider)
        end = time.time()
        if VERBOSE: print('took', end-start, 'for loading the image')
        outputImage(img)
    
def on_btn_karolisnka_clicked(b):
    processSampleImage(provider='karolinska')

def on_btn_radboud_clicked(b):
    processSampleImage(provider='radboud')

def on_btn_doc_clicked(b):
    output.clear_output()
    pre_output.clear_output()
    with output:
        display(HTML(DOC[lang]))
        
def on_data_change(change):
    emptyPlot()
    start = time.time()
    img = PILImage.open(io.BytesIO(btn_upload.data[-1]))
    end = time.time()
    if VERBOSE: print('took', end-start, 'for loading the image')
    outputImage(img)
    btn_upload._counter = 0
    
def on_lang_select(change):
    global lang 
    lang = dpd_lang.value
    dpd_lang.description=TEXTS["dpd_lang"][lang]
    btn_karolisnka.description=TEXTS["btn_karolinska"][lang]
    btn_radboud.description=TEXTS["btn_radboud"][lang]
    btn_upload.description=TEXTS["btn_upload"][lang]
    btn_header.description=TEXTS["btn_header"][lang]
    btn_status.description=TEXTS["btn_status_init"][lang]
    btn_doc.description=TEXTS["btn_doc"][lang]

# adding events
btn_doc.on_click(on_btn_doc_clicked)
btn_karolisnka.on_click(on_btn_karolisnka_clicked)
btn_radboud.on_click(on_btn_radboud_clicked)
btn_upload.observe(on_data_change, names=['data'])
dpd_lang.observe(on_lang_select)

# Layout and Style 
applayout1 = AppLayout(left_sidebar=btn_doc,
                       right_sidebar=dpd_lang)
applayout2 = AppLayout(header=btn_status,
                       left_sidebar=btn_karolisnka,
                       right_sidebar=btn_radboud,
                       footer=btn_upload)
btn_karolisnka.style.button_color = 'lightsalmon'
btn_radboud.style.button_color = 'aliceblue'

def emptyPlot():
    output.clear_output()
    pre_output.clear_output()
    with output:
        p = figure(sizing_mode='stretch_both',
                   tooltips=TOOLTIPS,
                   title=TEXTS["plot_title"][lang])
        p.plot_height = p.plot_height//2
        display(pn.pane.Bokeh(p))
        
emptyPlot()

display(VBox([btn_header, btn_status, applayout1, pre_output,output, applayout2]))