In [11]:
%autosave 60 

Autosaving every 60 seconds


In [12]:
#default_exp segmentation.dataset

In [13]:
import fastai; print(fastai.__version__)

1.0.58.dev0


In [14]:
#export
from fastai.vision import *

### databunch

In [20]:
#export 
class SemanticSegmentationData:
    "Creates semantic segmentation dataset from fastai datablock API"
    def __init__(self, PATH, IMAGES, MASKS, CODES, TRAIN, VALID, TEST,
                     sample_size, bs, size, has_test_labels=True):
        
        self.path, self.sample_size, self.bs, self.size, self.has_test_labels  =\
                                            PATH, sample_size, bs, size, has_test_labels
        self.codes = np.loadtxt(self.path/CODES, dtype=str)
        
        self.train_df = pd.read_csv(self.path/TRAIN, header=None)
        if VALID is not None: self.valid_df = pd.read_csv(self.path/VALID, header=None)
        if TEST is not None: self.test_df = pd.read_csv(self.path/TEST, header=None)
        
        self.path_img = self.path/IMAGES
        self.path_lbl = self.path/MASKS
        
        self.VALID, self.TEST = VALID, TEST
        
    def get_y_fn(self, x): return self.path_lbl/f'{Path(x).stem}.png'
        
    def get_data(self):        
        if self.VALID: 
            self.train_valid_df = pd.concat([self.train_df, self.valid_df])
            self.train_valid_df.columns = ["images"]
            self.train_valid_df["is_valid"] = len(self.train_df)*[False] + len(self.valid_df)*[True]
        else:
            self.train_valid_df = self.train_df
        
        il = SegmentationItemList.from_df(self.train_valid_df, self.path, folder="images") # get
        if self.VALID: ill = il.split_from_df("is_valid") # split
        else: ill = il.split_by_rand_pct() # split
        ll = ill.label_from_func(self.get_y_fn, classes=self.codes) # label
            
        data = (ll.transform(get_transforms(), size=(self.size, self.size), tfm_y=True,
                             resize_method=ResizeMethod.SQUISH)
                    .databunch(bs=self.bs))
        # add_test
        if self.TEST:
            il = SegmentationItemList.from_df(self.test_df, self.path, folder="images") # get
            data.add_test(il, tfm_y=False)
        return data
        
    def __repr__(self):
        return f"""___repr__"""
    
    def __str__(self):
        return f"""___str___"""

### Download CAMVID to `.fastai/data`

In [21]:
# fastai.datasets.download_data(URLs.CAMVID)

In [18]:
PATH = Path("/home/turgutluk/.fastai/data/camvid")

In [19]:
PATH.ls()

[PosixPath('/home/turgutluk/.fastai/data/camvid/codes.txt'),
 PosixPath('/home/turgutluk/.fastai/data/camvid/images'),
 PosixPath('/home/turgutluk/.fastai/data/camvid/labels'),
 PosixPath('/home/turgutluk/.fastai/data/camvid/valid.txt')]

###  Convert dataset to good format

### multilabel: CAMVID

In [10]:
# custom validation
PATH, CODES, TRAIN, VALID, TEST = Path(USERSPACE/'camvid'), "codes.txt", "train.txt", "valid.txt", "test.txt"
ssdata = SemanticSegmentationData(PATH, CODES, TRAIN, VALID, TEST, sample_size=None, bs=4, size=112)
data = ssdata.get_data()

In [11]:
data

ImageDataBunch;

Train: LabelList (600 items)
x: SegmentationItemList
Image (3, 112, 112),Image (3, 112, 112),Image (3, 112, 112),Image (3, 112, 112),Image (3, 112, 112)
y: SegmentationLabelList
ImageSegment (1, 112, 112),ImageSegment (1, 112, 112),ImageSegment (1, 112, 112),ImageSegment (1, 112, 112),ImageSegment (1, 112, 112)
Path: /trainman-mount/trainman-storage-ac168968-e641-4146-85da-cf960ab9e0bc/camvid;

Valid: LabelList (101 items)
x: SegmentationItemList
Image (3, 112, 112),Image (3, 112, 112),Image (3, 112, 112),Image (3, 112, 112),Image (3, 112, 112)
y: SegmentationLabelList
ImageSegment (1, 112, 112),ImageSegment (1, 112, 112),ImageSegment (1, 112, 112),ImageSegment (1, 112, 112),ImageSegment (1, 112, 112)
Path: /trainman-mount/trainman-storage-ac168968-e641-4146-85da-cf960ab9e0bc/camvid;

Test: LabelList (101 items)
x: SegmentationItemList
Image (3, 112, 112),Image (3, 112, 112),Image (3, 112, 112),Image (3, 112, 112),Image (3, 112, 112)
y: EmptyLabelList
,,,,
Path: /train

In [18]:
list(data.test_ds.items)

['/trainman-mount/trainman-storage-ac168968-e641-4146-85da-cf960ab9e0bc/camvid/images/0016E5_07959.png',
 '/trainman-mount/trainman-storage-ac168968-e641-4146-85da-cf960ab9e0bc/camvid/images/0016E5_07961.png',
 '/trainman-mount/trainman-storage-ac168968-e641-4146-85da-cf960ab9e0bc/camvid/images/0016E5_07963.png',
 '/trainman-mount/trainman-storage-ac168968-e641-4146-85da-cf960ab9e0bc/camvid/images/0016E5_07965.png',
 '/trainman-mount/trainman-storage-ac168968-e641-4146-85da-cf960ab9e0bc/camvid/images/0016E5_07967.png',
 '/trainman-mount/trainman-storage-ac168968-e641-4146-85da-cf960ab9e0bc/camvid/images/0016E5_07969.png',
 '/trainman-mount/trainman-storage-ac168968-e641-4146-85da-cf960ab9e0bc/camvid/images/0016E5_07971.png',
 '/trainman-mount/trainman-storage-ac168968-e641-4146-85da-cf960ab9e0bc/camvid/images/0016E5_07973.png',
 '/trainman-mount/trainman-storage-ac168968-e641-4146-85da-cf960ab9e0bc/camvid/images/0016E5_07975.png',
 '/trainman-mount/trainman-storage-ac168968-e641-4146-8

In [55]:
# random validation
PATH, CODES, TRAIN, VALID, TEST = Path(USERSPACE/'camvid'), "codes.txt", "train.txt", None, "test.txt"
ssdata = SemanticSegmentationData(PATH, CODES, TRAIN, VALID, TEST, sample_size=None, bs=4, size=112)

In [56]:
data = ssdata.get_data()
data

ImageDataBunch;

Train: LabelList (480 items)
x: SegmentationItemList
Image (3, 112, 112),Image (3, 112, 112),Image (3, 112, 112),Image (3, 112, 112),Image (3, 112, 112)
y: SegmentationLabelList
ImageSegment (1, 112, 112),ImageSegment (1, 112, 112),ImageSegment (1, 112, 112),ImageSegment (1, 112, 112),ImageSegment (1, 112, 112)
Path: /trainman-mount/trainman-storage-ac168968-e641-4146-85da-cf960ab9e0bc/camvid;

Valid: LabelList (120 items)
x: SegmentationItemList
Image (3, 112, 112),Image (3, 112, 112),Image (3, 112, 112),Image (3, 112, 112),Image (3, 112, 112)
y: SegmentationLabelList
ImageSegment (1, 112, 112),ImageSegment (1, 112, 112),ImageSegment (1, 112, 112),ImageSegment (1, 112, 112),ImageSegment (1, 112, 112)
Path: /trainman-mount/trainman-storage-ac168968-e641-4146-85da-cf960ab9e0bc/camvid;

Test: LabelList (101 items)
x: SegmentationItemList
Image (3, 112, 112),Image (3, 112, 112),Image (3, 112, 112),Image (3, 112, 112),Image (3, 112, 112)
y: EmptyLabelList
,,,,
Path: /train

### binary: SIIM

In [59]:
# custom validation
PATH, CODES, TRAIN, VALID, TEST = Path(USERSPACE/'siim'), "codes.txt", "train.txt", None, "test.txt"
ssdata = SemanticSegmentationData(PATH, CODES, TRAIN, VALID, TEST, sample_size=None, bs=4, size=112, has_test_labels=False)
data = ssdata.get_data()

In [60]:
data

ImageDataBunch;

Train: LabelList (9638 items)
x: SegmentationItemList
Image (3, 112, 112),Image (3, 112, 112),Image (3, 112, 112),Image (3, 112, 112),Image (3, 112, 112)
y: SegmentationLabelList
ImageSegment (1, 112, 112),ImageSegment (1, 112, 112),ImageSegment (1, 112, 112),ImageSegment (1, 112, 112),ImageSegment (1, 112, 112)
Path: /trainman-mount/trainman-storage-ac168968-e641-4146-85da-cf960ab9e0bc/siim;

Valid: LabelList (2409 items)
x: SegmentationItemList
Image (3, 112, 112),Image (3, 112, 112),Image (3, 112, 112),Image (3, 112, 112),Image (3, 112, 112)
y: SegmentationLabelList
ImageSegment (1, 112, 112),ImageSegment (1, 112, 112),ImageSegment (1, 112, 112),ImageSegment (1, 112, 112),ImageSegment (1, 112, 112)
Path: /trainman-mount/trainman-storage-ac168968-e641-4146-85da-cf960ab9e0bc/siim;

Test: LabelList (3205 items)
x: SegmentationItemList
Image (3, 112, 112),Image (3, 112, 112),Image (3, 112, 112),Image (3, 112, 112),Image (3, 112, 112)
y: EmptyLabelList
,,,,
Path: /trainm

In [104]:
data.c

2

### Fix SIIM masks

In [198]:
# mask_files = get_files(PATH/"masks")

In [218]:
# for fname in mask_files:
#     mask = open_mask(fname)
#     pixel_vals = mask.data.unique()
#     if len(pixel_vals) == 1: assert pixel_vals.item() == 0
#     elif len(pixel_vals) == 2: assert torch.equal(pixel_vals, tensor([1,0]))
#     else: 
#         mask = open_mask(fname, div=True)
#         pixel_vals = mask.data.unique()
#         assert torch.equal(pixel_vals, tensor([1,0]))
#         PIL.Image.fromarray(image2np(mask.data).astype(np.uint8)).save(PATH/"masks"/f"{fname}.png")

In [219]:
# for fname in mask_files:
#     mask = open_mask(fname)
#     pixel_vals = mask.data.unique()
#     if len(pixel_vals) == 1: assert pixel_vals.item() == 0
#     elif len(pixel_vals) == 2: assert torch.equal(pixel_vals, tensor([1,0]))
#     else: assert False