In [None]:
%%capture
!pip install --upgrade pip
!pip3 install --upgrade pip
!pip install opencv-rolling-ball

import sys
import sagemaker
import matplotlib.pyplot as plt
import PIL
from PIL import Image
import io
import boto3
import numpy as np
from skimage import util 
from skimage.util import img_as_ubyte
from skimage import exposure
from skimage.io import imread as pngread
from skimage.io import imsave as pngsave
import cv2
from rolling_ball_filter import rolling_ball_filter
import random
import threading
from skimage.segmentation import mark_boundaries
from skimage import color
from sagemaker import get_execution_role
from IPython.core.debugger import set_trace
from processfiles import *
role = get_execution_role()
print(role)
sess = sagemaker.Session()
bucket = sess.default_bucket()
from sagemaker.amazon.amazon_estimator import get_image_uri
training_image = get_image_uri(sess.boto_region_name, 'semantic-segmentation', repo_version="latest")
print (training_image)

s3 = boto3.resource('s3')
s3_resource = boto3.resource('s3')
s3meadata = s3_resource.Bucket(name='meadata')

## Setup data

In [None]:
%%capture
# Run process functions (raw and filtered versions of fig8 and liorP)
def procfilepar(key):
    proccessliorpreprocfiles(key)
    proccessliorfiles(key)
    proccessfigure8files(key)
    proccessfig8preprocfiles(key)
    proccessusiigacifiles(key)
    proccesshelafiles(key)
    
keys = [obj.key for obj in s3meadata.objects.all()]
for key in keys:
    t = threading.Thread(target = procfilepar, args=(key,)).start()

### Crop dataset images around labeled areas

In [None]:
keys = [obj.key for obj in s3_resource.Bucket(name=bucket).objects.all() if ('jpg' in obj.key and prefix in obj.key)]
for key in keys:
     t = threading.Thread(target = performcrop, args=(key,)).start()

### Delete all files without a matching image-annotation

In [None]:
removeunmatched()

### Remove samples with few segmentations

In [None]:
files = []
train_channel = prefix + '/train'
validation_channel = prefix + '/validation'
train_annotation_channel = prefix + '/train_annotation'
validation_annotation_channel = prefix + '/validation_annotation'
keys = [obj.key for obj in s3_resource.Bucket(name=bucket).objects.all() if ('png' in obj.key and prefix in obj.key)]
segs = []
empties = []
for key in keys:
    masksavepath = "/tmp/"+key.split('/')[-1]
    s3.meta.client.download_file(bucket, key , masksavepath)
    mask = cv2.imread(masksavepath)
    segs.append([np.sum(mask==1)])
    empties.append([np.sum(mask==0)])

ratio = ((np.asarray(segs)/np.asarray(empties))*100).ravel()
thresh = np.round(np.mean(ratio)-np.std(ratio))
plt.hist(ratio)
plt.show()
df = pd.DataFrame({'key':keys, 'ratio':ratio,'empty':ratio<thresh})
removesamples = df['key'].loc[np.where(df['empty'].values)].values
for removeme in removesamples:
    boto3.client('s3').delete_object(Bucket = bucket, Key = removeme)
    boto3.client('s3').delete_object(Bucket = bucket, Key = removeme.replace('_annotation/','/').replace('png','jpg'))

In [None]:
import json
label_map = { "scale": 1 }
with open('train_label_map.json', 'w') as lm_fname:
    json.dump(label_map, lm_fname)

In [None]:
s3_output_location = 's3://{}/{}/output'.format(bucket, prefix)
print(s3_output_location)

## Training

### Setup Model Hyperparameters

In [None]:
# Create the sagemaker estimator object.
ss_model = sagemaker.estimator.Estimator(training_image,
                                         role, 
                                         train_instance_count = 1, 
                                         train_instance_type = 'ml.p3.16xlarge',
                                         train_volume_size = 300, # size in gb on s3 to reserve
                                         train_max_run = 360000,
                                         output_path = s3_output_location,
                                         base_job_name = 'fresh-train-trial',
                                         sagemaker_session = sess)

In [None]:
# Setup hyperparameters 
import boto3
s3traindata = boto3.resource('s3').Bucket(name=bucket)
numtrain = len([obj.key for obj in s3traindata.objects.all() if ('train/' in obj.key and 'jpg' in obj.key)])
ss_model.set_hyperparameters(backbone='resnet-101', # This is the encoder. Other option is resnet-50
                             algorithm='deeplab', # This is the decoder. Other option is 'psp' and 'deeplab'                             
                             use_pretrained_model='False', # Use the pre-trained model.
                             crop_size=412, # Size of image random crop.                             
                             num_classes=2, # Background + cell 
                             epochs=1000, # Number of epochs to run.
                             learning_rate=0.003037052721870563, momentum = 0.6133596510181524, weight_decay = 0.0001560844683426084,                           
                             optimizer='adagrad', # Other options include 'adam', 'rmsprop', 'nag', 'adagrad'.
                             lr_scheduler='poly', # Other options include 'cosine' and 'step'.                           
                             mini_batch_size=35, # Setup some mini batch size.
                             validation_mini_batch_size=16, #try larger batch sizes maybe? 
                             early_stopping=True, # Turn on early stopping. If OFF, other early stopping parameters are ignored.
                             early_stopping_patience=50, # Tolerate these many epochs if the mIoU doens't increase.
                             early_stopping_min_epochs=25, # No matter what, run these many number of epochs.                             
                             num_training_samples=numtrain) 

### Setup data inputs

In [None]:
# Create full bucket names
s3_train_data = 's3://{}/{}'.format(bucket, train_channel)
s3_validation_data = 's3://{}/{}'.format(bucket, validation_channel)
s3_train_annotation = 's3://{}/{}'.format(bucket, train_annotation_channel)
s3_validation_annotation = 's3://{}/{}'.format(bucket, validation_annotation_channel)

distribution = 'FullyReplicated'
# Create sagemaker s3_input objects
train_data = sagemaker.session.s3_input(s3_train_data, distribution=distribution, 
                                        content_type='image/jpeg', s3_data_type='S3Prefix')
validation_data = sagemaker.session.s3_input(s3_validation_data, distribution=distribution, 
                                        content_type='image/jpeg', s3_data_type='S3Prefix')
train_annotation = sagemaker.session.s3_input(s3_train_annotation, distribution=distribution, 
                                        content_type='image/png', s3_data_type='S3Prefix')
validation_annotation = sagemaker.session.s3_input(s3_validation_annotation, distribution=distribution, 
                                        content_type='image/png', s3_data_type='S3Prefix')

data_channels = {'train': train_data, 
                 'validation': validation_data,
                 'train_annotation': train_annotation, 
                 'validation_annotation':validation_annotation}

### Fit model and deploy

In [None]:
%%capture
ss_model.fit(inputs=data_channels, logs=True)

### Deploy the model to an endpoint

In [None]:
ss_predictor = ss_model.deploy(initial_instance_count=1, instance_type='ml.c4.xlarge')

## Inference

Load an image for segmenting

In [None]:
clahe = cv2.createCLAHE(clipLimit=3.0, tileGridSize=(64,64))
# images/liorp_181106_2_raw.jpg
filename = "/home/ec2-user/SageMaker/itzik_images_cropped/_B37-543-2_07_ver2.tif"
# filename = "/home/ec2-user/SageMaker/images/190221_LV_ver2.tif"
im = cv2.imread(filename)
# selem = disk(60)
# inimage = rank.equalize(inimage, selem=selem)  
# im = clahe.apply(im)
# im,_ = rolling_ball_filter(im, ball_radius = 20, spacing = 1, top=False)
im =  cv2.resize(im, (1024,1024), interpolation = cv2.INTER_AREA)
num = int(''.join(filter(str.isdigit, str(im.dtype)))) - 1
im = img_as_ubyte(exposure.rescale_intensity(im, out_range=(0, 2**num - 1)))
pngsave(filename.replace('tif','jpg'), im)

with open(filename.replace('tif','jpg'), 'rb') as image:
    img = image.read()    
    img = bytearray(img)

fig1 = plt.figure(figsize=(10, 10)) # create a figure with the default size 
ax1 = fig1.add_subplot(1,1,1) 
ax1.imshow(Image.open(io.BytesIO(img)), interpolation='none')
plt.show()

### Run Segmentation

In [None]:
%%time 
ss_predictor.content_type = 'image/jpeg'
ss_predictor.accept = 'image/png'
return_img = ss_predictor.predict(img)

Let us display the segmentation mask.

In [None]:
from PIL import Image
import numpy as np
import io
from scipy import ndimage as ndi
from skimage.feature import peak_local_max
from skimage.segmentation import watershed
import seaborn as sns
from skimage.color import label2rgb

num_classes = 2
mask = np.array(Image.open(io.BytesIO(return_img)))
distance = ndi.distance_transform_edt(mask)
local_maxi = peak_local_max(distance, labels=mask, footprint=np.ones((3, 3)), indices=False)
markers = ndi.label(local_maxi)[0]
labels = watershed(-distance, markers, mask=mask)
pngsave('/home/ec2-user/SageMaker/testresult_mask.tif', mask)

fig1 = plt.figure(figsize=(20, 20)) # create a figure with the default size 
ax1 = fig1.add_subplot(2,2,1) 
result = label2rgb(label = labels, image = exposure.rescale_intensity(im.astype(np.float), out_range=(-1, 1)))
ax1.imshow(result)
ax2 = fig1.add_subplot(2,2,2) 
ax2.imshow(labels)
plt.show()
np.unique(labels)

### Delete the endpoint

In [None]:
sagemaker.Session().delete_endpoint(ss_predictor.endpoint)

# Run inference on deployed model 

### Preprocessed submitted data

In [None]:
from skimage import exposure,color, img_as_int, img_as_ubyte
from skimage.io import imread as pngread
from skimage.io import imsave as pngsave
from rolling_ball_filter import rolling_ball_filter
from skimage.morphology import disk
from skimage.filters.rank import autolevel,equalize
import numpy as np
import cv2
import math
import os

def preproc(img):
    selem = disk(60)
    try:
        img = autolevel(img, selem)
        img = exposure.adjust_gamma(img, 2)
        img = cv2.bilateralFilter(img,9,75,75)
    except:
        print(img.shape)
        pass
    return(img)

def createmultipleinputs(inputpath):
    # pad to square
    im = pngread(inputpath)
    if len(im.shape)==3:
        print('Images should be grayscale but had dimensions {} - automatically converted'.format(im.shape))
        im = np.sum(im,2)
    im = np.uint8(img_as_int(exposure.rescale_intensity(im, out_range=(0, 2**15 - 1))))
    imshape =im.shape
    edgediff = np.max(imshape)-np.min(imshape)
    orig = im
    if imshape[0]>imshape[1]:
        orig = cv2.copyMakeBorder(im, math.ceil(edgediff/2), math.ceil(edgediff/2), 0, 0, cv2.BORDER_CONSTANT,value=[0,0,0])
    if imshape[0]>imshape[1]:
        orig = cv2.copyMakeBorder(im, 0, 0, math.ceil(edgediff/2), math.ceil(edgediff/2), cv2.BORDER_CONSTANT,value=[0,0,0])
    
    # ==>resize to 1024
    im1024 = cv2.resize(orig, (1024,1024), interpolation = cv2.INTER_AREA)
    # ==>resize to 720
    im720 = cv2.resize(orig, (720,720), interpolation = cv2.INTER_AREA)
    # preprocess both
    im1024preproc = preproc(im1024)
    im720preproc = preproc(im720)
    return([orig, im1024preproc,im720preproc, im1024, im720])

def populate_inputs(localpaths,batchid = ''):
    os.makedirs('/tmp/{}/'.format(batchid), exist_ok=True)
    imlabels = ['orig', 'im1024pp','im720pp','im1024','im720']
    for filepath in localpaths:
        resimages =  createmultipleinputs(filepath)
        for idx in range(0,len(resimages)):
            savepath = '/tmp/'+batchid+'/'+batchid+'_'+filepath.split('.')[0].split('/')[-1]+'__'+imlabels[idx]+'.jpg'
            pngsave(savepath,resimages[idx])
    os.system("aws s3 sync '/tmp/{}/' 's3://sagemaker-eu-west-1-102554356212/submissions/{}/' ".format(batchid,batchid))
inputpath = '/home/ec2-user/SageMaker/itzik_images_cropped/'        
files = os.listdir(inputpath)
files = [os.path.join(inputpath,f) for f in files if '.jpg' in f or '.png' in f or '.tif' in f]        
populate_inputs(files, batchid='itzik') 

### Run batch job from a saved models

In [2]:
#Batch Job
import sagemaker
import boto3
from sagemaker import get_execution_role
from IPython.core.debugger import set_trace
role = get_execution_role()
print(role)
sess = sagemaker.Session()
bucket = sess.default_bucket()
# model_id = "fresh-train-trial-2019-07-28-08-49-49-994"
# model_id = "semantic-segmentatio-190726-1931-032-e7d26e04"

def runbatch(model_id, batchid=''):
    env = {'SAGEMAKER_MODEL_SERVER_TIMEOUT' : '3600' }
    s3 = boto3.resource('s3')
    s3_resource = boto3.resource('s3')
    s3results = s3_resource.Bucket(name='sagemaker-eu-west-1-102554356212')
    removesamples = [obj.key for obj in s3results.objects.all() if ("results_"+model_id in obj.key and ("out" in obj.key or "masks" in obj.key))]
    for removeme in removesamples:
        boto3.client('s3').delete_object(Bucket = bucket, Key = removeme)

    transform_job = sagemaker.transformer.Transformer(
        model_name = model_id, 
        instance_count = 1,
        instance_type = 'ml.p3.2xlarge',
        strategy = 'SingleRecord',
        assemble_with = 'None',
        output_path = "s3://sagemaker-eu-west-1-102554356212/results_{}/{}/".format(model_id,batchid),
        base_transform_job_name='inference-pipelines-batch',
        sagemaker_session=sess,
        accept = 'image/png',
        env = env)
    transform_job.transform(data = 's3://sagemaker-eu-west-1-102554356212/submissions/' , 
                            content_type = 'image/jpeg', 
                            split_type = None)

runbatch("semantic-segmentatio-190726-1931-032-e7d26e04")
runbatch("fresh-train-trial-2019-07-28-08-49-49-994")
# transform_job.wait()

arn:aws:iam::102554356212:role/service-role/AmazonSageMaker-ExecutionRole-20181129T100657


### Read batch processed results and export mask back to S3

In [4]:
%%capture
# Download data from batch job
import boto3
import mxnet as mx
from PIL import Image
import numpy as np
import io
import os
from skimage.io import imread as pngread
from skimage.io import imsave as pngsave

def batch2masks(model_id, batchid = ''):
    s3 = boto3.resource('s3')
    s3_resource = boto3.resource('s3')
    s3results = s3_resource.Bucket(name='sagemaker-eu-west-1-102554356212')
    keys = [obj.key for obj in s3results.objects.all()]
    os.makedirs('/tmp/results/', exist_ok=True)
    for s3_object in keys:
        if not s3_object.endswith("/") and "results_"+model_id+"/" in s3_object and "out" in s3_object:
                s3.meta.client.download_file('sagemaker-eu-west-1-102554356212', s3_object, '/tmp/tempfile.out')
                with open('/tmp/tempfile.out', 'rb') as image:
                    img = image.read()    
                    img = bytearray(img)
                    mask = np.array(Image.open(io.BytesIO(img)))
                    pngsave('/home/ec2-user/SageMaker/tmp/'+'.'.join(s3_object.split('/')[-1].split('.')[:-1]), mask)
#     os.system("aws s3 sync '/tmp/results/{}/' 's3://sagemaker-eu-west-1-102554356212/results_{}/masks/' ".format(batchid,model_id))

batch2masks("fresh-train-trial-2019-07-28-08-49-49-994")
batch2masks("semantic-segmentatio-190726-1931-032-e7d26e04")

### Merge multiple masks from different models

In [None]:
from processfiles import *
from scipy import ndimage as ndi
from skimage.feature import peak_local_max
from skimage.segmentation import watershed
from skimage.color import label2rgb
import threading
s3 = boto3.resource('s3')
s3_resource = boto3.resource('s3')
s3results = s3_resource.Bucket(name='sagemaker-eu-west-1-102554356212')
keys = [obj.key for obj in s3results.objects.all()]
for s3_object in keys:
    t = threading.Thread(target = merge_masks, args=(s3_object,["fresh-train-trial-2019-07-28-08-49-49-994","semantic-segmentatio-190726-1931-032-e7d26e04"],batchid,)).start()
!aws s3 sync '/tmp/results/merge/merged/' 's3://sagemaker-eu-west-1-102554356212/results_merged/masks/'

### Merge multiple masks from different inputs (different pre-processing)

In [None]:
import pandas as pd
import boto3
from processfiles import *
from scipy import ndimage as ndi
import threading

def merge_masks_diff_inputs(groupkeys, batchid = ''):
    os.makedirs('/tmp/results/'+batchid+'/', exist_ok=True)
    outpaths = []
    for s3_object in groupkeys:
        outpath = os.path.join('/tmp/results/'+s3_object.split('/')[-1])
        s3.meta.client.download_file('sagemaker-eu-west-1-102554356212', s3_object, outpath)
        pngsave(outpath,cv2.resize(pngread(outpath), (1024,1024), interpolation = cv2.INTER_AREA))
        outpaths.append(outpath)
    print(outpaths)
    if outpaths:            
        binarymask = merge_two_masks(outpaths)
        num_classes = 2
        distance = ndi.distance_transform_edt(binarymask)
        local_maxi = peak_local_max(distance, labels=binarymask, footprint=np.ones((3, 3)), indices=False)
        markers = ndi.label(local_maxi)[0]
        mask = watershed(-distance, markers, mask=binarymask)
        savepath = os.path.join('/tmp/results/'+batchid+'/inputmerged/',s3_object.split('/')[-1].split('__')[0].replace('merged_','inputmerged_')+'.jpg')
        pngsave(savepath, np.uint8(mask>0))

batchid = 'itzik'
s3 = boto3.resource('s3')
s3_resource = boto3.resource('s3')
s3results = s3_resource.Bucket(name='sagemaker-eu-west-1-102554356212')
keys = [obj.key for obj in s3results.objects.all() if (not obj.key.endswith("/")) and ("merged" in obj.key and batchid in obj.key)]
df = pd.DataFrame({'keys':keys,'orig_name':[k.split('/')[-1].split('__')[1].split('.jpg')[0] for k in keys]})
originals = np.unique(df['orig_name'].values)
for org in originals:
#      t = threading.Thread(target = merge_masks_diff_inputs, args=(df['keys'].loc[df['orig_name']==org].values,bid,)).start()
    merge_masks_diff_inputs(groupkeys = df['keys'].loc[df['orig_name']==org].values,batchid = batchid)
# !aws s3 sync '/tmp/results/merge/input_merged/' 's3://sagemaker-eu-west-1-102554356212/results_merged/input_merged_masks/'
# os.system("aws s3 sync '/tmp/results/{}/' 's3://sagemaker-eu-west-1-102554356212/results_merged/masks_{}/' ".format(batchid,batchid))

### Image Input Pipeline (for inference)

In [None]:
import random
import string
def randomString(stringLength=10):
    """Generate a random string of fixed length """
    letters = string.ascii_lowercase
    return ''.join(random.choice(letters) for i in range(stringLength))

# def submitimages(input_dir_path,batchid = randomString(10)):

input_dir_path = "/home/ec2-user/SageMaker/itzik_images_cropped/"
for filename in os.listdir(input_dir_path): 
    os.rename(filename,filename.replace('_','-'))

s3 = boto3.resource('s3')
s3_resource = boto3.resource('s3')
s3results = s3_resource.Bucket(name='sagemaker-eu-west-1-102554356212')
#create different inputs
files = os.listdir(input_dir_path)
files = [os.path.join(inputpath,f) for f in files if '.jpg' in f or '.png' in f or '.tif' in f]        
populate_inputs(files, batchid = batchid) 
#model1 - infer mask for all inputs
runbatch("semantic-segmentatio-190726-1931-032-e7d26e04", batchid = batchid)
#model2 - infer mask for all inputs
runbatch("fresh-train-trial-2019-07-28-08-49-49-994", batchid = batchid)
#merge masks from different models
keys = [obj.key for obj in s3results.objects.all() if batchid in obj.key]
for key in keys:
    t = threading.Thread(target = merge_masks, args=(key,["fresh-train-trial-2019-07-28-08-49-49-994","semantic-segmentatio-190726-1931-032-e7d26e04"],batchid)).start()
os.system("aws s3 sync '/tmp/results/{}/merge/merged/ 's3://sagemaker-eu-west-1-102554356212/results_merged/{}/masks/'".format(batchid,batchid))
#merge masks from different inputs
keys = [obj.key for obj in s3results.objects.all() if ('results_merged' in obj.key and 'masks' in obj.key and batchid in obj.key)]
df = pd.DataFrame({'keys':keys,'orig_name':[k.split('/')[-1].split('__')[1].split('.jpg')[0] for k in keys]})
originals = np.unique(df['orig_name'].values)
for org in originals: 
          merge_masks_diff_inputs(groupkeys = df['keys'].loc[df['orig_name']==org].values,batchid = batchid)
os.system("aws s3 sync '/tmp/results/{}/merge/merged/ 's3://sagemaker-eu-west-1-102554356212/results_merged/{}/masks/'".format(batchid,batchid))