In [1]:
import os
import cv2
import rasterio
import numpy as np 
import pandas as pd 
import tifffile
import matplotlib.pyplot as plt
from skimage.measure import label, regionprops

target_root = "E:/睡眠分期数据/hubmap/"
img_root = target_root + "test"
filelist = os.listdir(img_root)

def mask2rle(mask):
    ''' takes a 2d boolean numpy array and turns it into a space-delimited RLE string '''
    mask = mask.T.reshape(-1) # make 1D, column-first
    mask = np.pad(mask, 1, mode="constant") # make sure that the 1d mask starts and ends with a 0
    starts = np.nonzero((~mask[:-1]) & mask[1:])[0] # start points
    ends = np.nonzero(mask[:-1] & (~mask[1:]))[0] # end points
    rle = np.empty(2 * starts.size, dtype=int) # interlacing...
    rle[0::2] = starts + 1# ...starts...
    rle[1::2] = ends - starts # ...and lengths
    rle = ' '.join([ str(elem) for elem in rle ]) # turn into space-separated string
    return rle

def rle_decode(mask_rle, shape=(256, 256)):
    '''
    mask_rle: run-length as string formated (start length)
    shape: (height,width) of array to return 
    Returns numpy array, 1 - mask, 0 - background

    '''
    s = mask_rle.split()
    starts, lengths = [np.asarray(x, dtype=int) for x in (s[0:][::2], s[1:][::2])]
    starts -= 1
    ends = starts + lengths
    img = np.zeros(shape[0]*shape[1], dtype=np.uint8)
    for lo, hi in zip(starts, ends):
        img[lo:hi] = 1
    return img.reshape(shape, order='F')

def step1(mask):
    mask_re = cv2.resize(mask, (2048, 2048))
    label_img = label(mask_re, connectivity = mask_re.ndim)
    props = regionprops(label_img)
    for p in props:
        if p.area > 100:
            continue
        bbox = p.bbox
        bbox = list(bbox)
        bbox[0] = int(bbox[0]* mask.shape[0]/2048)
        bbox[2] = int(bbox[2]* mask.shape[0]/2048)
        bbox[1] = int(bbox[1]* mask.shape[1]/2048)
        bbox[3] = int(bbox[3] * mask.shape[1]/2048)
        mask[bbox[0]:bbox[2], bbox[1]:bbox[3]] = 0
    return mask

In [2]:
csvfloder = "./submission"
predcsvs = []
for fn in os.listdir(csvfloder):
    predcsvs.append(pd.read_csv("{}/{}".format(csvfloder, fn)))

out_info = {}
identity = rasterio.Affine(1, 0, 0, 0, 1, 0)
for id_ in predcsvs[0]['id']:
    print("正在处理 {}".format(id_))
    dataset = rasterio.open("{}/{}.tiff".format(img_root, id_), transform = identity)
    masks = np.zeros((len(predcsvs), dataset.shape[0], dataset.shape[1]), dtype=np.float16)
    for i, csv in  enumerate(predcsvs):
        mask = rle_decode(csv.loc[csv['id'] == id_]['predicted'].values[0], dataset.shape)
        masks[i] = step1(mask)
    del mask
    masks = np.mean(masks, axis=0, dtype=np.float16)
    masks = (masks >= 0.5).astype(np.uint8)
    masks = step1(masks)
    out_info[len(out_info)] = {'id':id_, 'predicted': mask2rle(masks)}
    del masks

submission = pd.DataFrame.from_dict(out_info, orient='index')
submission.to_csv('fixed_submission.csv', index=False)

正在处理 26dc41664


  s = DatasetReader(path, driver=driver, sharing=sharing, **kwargs)


正在处理 afa5e8098
正在处理 b2dc8411c
正在处理 b9a3865fc
正在处理 c68fe75ea


In [3]:
masks = np.random.randn(4,512,512)
t = np.mean(masks, axis=0, dtype=np.float16)
t = (t >= 0.5).astype(np.uint8)