In [None]:
%matplotlib inline
cur_dir = '/home/rob/Udacity/capstone/data'
import os
from collections import defaultdict
import cv2
import csv
import numpy as np
from shapely.geometry import MultiPolygon, Polygon
import shapely.wkt
import shapely.affinity
import tifffile as tiff

def mask_to_polygons(mask, epsilon=10., min_area=10.):
    # first, find contours with cv2: it's much faster than shapely
    image, contours, hierarchy = cv2.findContours(
        ((mask == 1) * 255).astype(np.uint8),
        cv2.RETR_CCOMP, cv2.CHAIN_APPROX_TC89_KCOS)
    # create approximate contours to have reasonable submission size
    approx_contours = [cv2.approxPolyDP(cnt, epsilon, True)
                       for cnt in contours]
    if not contours:
        return MultiPolygon()
    # now messy stuff to associate parent and child contours
    cnt_children = defaultdict(list)
    child_contours = set()
    assert hierarchy.shape[0] == 1
    # http://docs.opencv.org/3.1.0/d9/d8b/tutorial_py_contours_hierarchy.html
    for idx, (_, _, _, parent_idx) in enumerate(hierarchy[0]):
        if parent_idx != -1:
            child_contours.add(idx)
            cnt_children[parent_idx].append(approx_contours[idx])
    # create actual polygons filtering by area (removes artifacts)
    all_polygons = []
    for idx, cnt in enumerate(approx_contours):
        if idx not in child_contours and cv2.contourArea(cnt) >= min_area:
            assert cnt.shape[1] == 1
            poly = Polygon(
                shell=cnt[:, 0, :],
                holes=[c[:, 0, :] for c in cnt_children.get(idx, [])
                       if cv2.contourArea(c) >= min_area])
            all_polygons.append(poly)
    # approximating polygons might have created invalid ones, fix them
    all_polygons = MultiPolygon(all_polygons)
    if not all_polygons.is_valid:
        try:
            all_polygons = all_polygons.buffer(0)
        except ValueError:
            print "standard buffer not possible. Use -1"
            try:
                all_polygons = all_polygons.buffer(0)
            except ValueError:
                print "-1 buffer not possible. Use -2"
                all_polygons = all_polygons.buffer(-2)
        # Sometimes buffer() converts a simple Multipolygon to just a Polygon,
        # need to keep it a Multi throughout
        if all_polygons.type == 'Polygon':
            all_polygons = MultiPolygon([all_polygons])
    return all_polygons

def get_scalers(im_size):
    h, w = im_size # they are flipped so that mask_for_polygons works correctly
    h, w = float(h), float(w)
    w_ = w * (w / (w + 1))
    h_ = h * (h / (h + 1))
    return w_ / x_max, h_ / y_min

def show_mask(m):
        
    tiff.imshow(255 * np.stack([m, m, m]));
    
print "started"
submIM_IDs = []
cur_dir = '/home/rob/Udacity/capstone/data'
with open(cur_dir + '/sample_submission.csv') as csvfile:
    reader = csv.reader(csvfile, delimiter=',', quotechar=',')
    for i,row in enumerate(reader):
        if i == 0:
            i = 1
        if (i%10) == 0:
            submIM_IDs.append(row[0])   
            
#0 Build: tresh .25, eps0.1, area 3.
#1 misc man : .2, eps5., area 3.
#2 street:  .35, eps1., area 3.
#3 track: .25, 1., 3.
#4 trees .25, 1., 3.
#5 crops .25, 3., 3.
#6 waterway .25, 1., 15.
#7 standing water .35, 1.,3.
#8 cars: doesnt matter. low tresh probably
#9 large cars: same
#thresholds = [.250,.15,.250 ,.25,.15  ,.20,.35, .30,.1,.1]
thresholds = [0.25, 0.20, 0.25, 0.25, 0.25, 0.25, 0.30, 0.30, 0.15, 0.15]
epsilons =   [3.,2.,2.    ,2. ,2.   ,4. ,4.,   4.,2.,2.]
min_areas =  [4.0,2.,5. ,5. ,2.   ,10.,10.,  10.,1.,1.]
            
            
direct = 'prediction_final_final_NNET/'
predictions = []
counter = 0
for IM_ID in (submIM_IDs[:]):
    print counter
    filename = "pred_{}-0-0.npz".format(IM_ID)
    temp = np.load(direct+filename)
    masks = temp['arr_0'][0]
    x_max = y_min = None
    for _im_id, _x, _y in csv.reader(open(cur_dir + '/grid_sizes.csv')):
        if _im_id == IM_ID:
            x_max, y_min = float(_x), float(_y)
            break
    
    for i in range(1,11):
        POLY_TYPE = str(i)
        index = i-1
        mask = masks[:,:,index]
        
        threshold = thresholds[index]
        epsilon = epsilons[index]
        min_area = min_areas[index]
        
        #threshold = 0.25
        #epsilon = 4.
        #min_area= 5.
        
        im_size = mask.shape[:2]  

        x_scaler, y_scaler = get_scalers(im_size)
        
        pred_binary_mask = mask >= threshold

        pred_polygons = mask_to_polygons(pred_binary_mask, epsilon=epsilon,min_area=min_area)
        scaled_pred_polygons = shapely.affinity.scale(
            pred_polygons, xfact=1 / x_scaler, yfact=1 / y_scaler, origin=(0, 0, 0))
        dumped_prediction = shapely.wkt.dumps(scaled_pred_polygons)
        
        if dumped_prediction == 'GEOMETRYCOLLECTION EMPTY':
            dumped_prediction = 'MULTIPOLYGON EMPTY'
        
        predictions.append([IM_ID, POLY_TYPE, dumped_prediction])
        
    counter += 1
    
print "job done"

In [None]:
with open("submissions/final_NNET_optimized.csv","wb") as csvfile:
    writer = csv.writer(csvfile, delimiter=',')
    writer.writerow(['ImageId','ClassType','MultipolygonWKT'])
    for prediction in predictions:
        writer.writerow(prediction)

