In [1]:
import os
import glob
import slide_tools

In [2]:
slide_paths = sorted(glob.glob("/media/hdd_big/gyn/slides/*.svs") + glob.glob("/media/hdd_big/gyn/pseudo_slides/*.svs"), key=os.path.basename)
annotation_paths = sorted(glob.glob("../../hrd/data/ago-tr1/annotations/*.geojson"))
assert len(slide_paths) == len(annotation_paths)
print(f"Found {len(slide_paths)} slides")

Found 584 slides


### Load tiles without annotation (very fast)

In [3]:
%%time
ds = slide_tools.datasets.TileLevelDataset(
    slide_paths=slide_paths,
    annotation_paths=annotation_paths,
    simplify_tolerance=100,
)
print(f"Found {len(ds.samples)} tiles of native size")

[Plugin: cucim.kit.cuslide] Loading the dynamic library from: /home/allstar-04/anaconda3/envs/timmae/lib/python3.8/site-packages/cucim/clara/cucim.kit.cuslide@22.02.00.so
Initializing plugin: cucim.kit.cuslide (interfaces: [cucim::io::IImageFormat v0.1]) (impl: cucim.kit.cuslide)
[Plugin: cucim.kit.cumed] Loading the dynamic library from: /home/allstar-04/anaconda3/envs/timmae/lib/python3.8/site-packages/cucim/clara/cucim.kit.cumed@22.02.00.so
Initializing plugin: cucim.kit.cumed (interfaces: [cucim::io::IImageFormat v0.1]) (impl: cucim.kit.cumed)


Found 69439613 tiles of native size
CPU times: user 19 s, sys: 1.95 s, total: 20.9 s
Wall time: 21.3 s


### Load tiles with annotation (still fast)

In [4]:
%%time
ds.setup_regions(centroid_in_annotation=True)
ds.setup_epoch()
print(f"Found {len(ds.samples)} tiles of native size inside annotations")

Found 29300444 tiles of native size inside annotations
CPU times: user 4.23 s, sys: 279 ms, total: 4.51 s
Wall time: 4.52 s


In [5]:
%%time
region_overlap = 0.5
ds.setup_regions(centroid_in_annotation=True, region_overlap=region_overlap)
ds.setup_epoch()
print(f"Found {len(ds.samples)} tiles of native size inside annotations with overlap of {region_overlap}")

Found 117206963 tiles of native size inside annotations with overlap of 0.5
CPU times: user 11.3 s, sys: 1.14 s, total: 12.5 s
Wall time: 12.5 s


### Balancing by size 

In [6]:
%%time
ds.setup_regions(centroid_in_annotation=True)
ds.setup_epoch(
    shuffle=True,
    balance_size_by=slide_tools.objects.BalanceMode.MEDIAN,
)
print(f"Found {len(ds.samples)} tiles of native size inside annotations with overlap of {region_overlap} and balanced by size")

Found 29157952 tiles of native size inside annotations with overlap of 0.5 and balanced by size
CPU times: user 38.2 s, sys: 204 ms, total: 38.4 s
Wall time: 38.5 s


### Shuffling with shuffle_chunk_size > 1 can be a little bit slow but will save time when loading tiles

In [7]:
%%time
ds.setup_epoch(
    shuffle=True,
    balance_size_by=slide_tools.objects.BalanceMode.MEDIAN,
    shuffle_chunk_size=9,
)
print(f"Found {len(ds.samples)} tiles of native size")

Found 29157952 tiles of native size
CPU times: user 1min 16s, sys: 587 ms, total: 1min 16s
Wall time: 1min 16s


### Some documentation

In [8]:
slide_tools.datasets.TileLevelDataset?

[0;31mInit signature:[0m [0mslide_tools[0m[0;34m.[0m[0mdatasets[0m[0;34m.[0m[0mTileLevelDataset[0m[0;34m([0m[0;34m*[0m[0margs[0m[0;34m,[0m [0;34m**[0m[0mkwds[0m[0;34m)[0m[0;34m[0m[0;34m[0m[0m
[0;31mDocstring:[0m     
An abstract class representing a :class:`Dataset`.

All datasets that represent a map from keys to data samples should subclass
it. All subclasses should overwrite :meth:`__getitem__`, supporting fetching a
data sample for a given key. Subclasses could also optionally overwrite
:meth:`__len__`, which is expected to return the size of the dataset by many
:class:`~torch.utils.data.Sampler` implementations and the default options
of :class:`~torch.utils.data.DataLoader`.

.. note::
  :class:`~torch.utils.data.DataLoader` by default constructs a index
  sampler that yields integral indices.  To make it work with a map-style
  dataset with non-integral indices/keys, a custom sampler must be provided.
[0;31mInit docstring:[0m
Map-style dataset 

In [9]:
ds.setup_regions?

[0;31mSignature:[0m
[0mds[0m[0;34m.[0m[0msetup_regions[0m[0;34m([0m[0;34m[0m
[0;34m[0m    [0msize[0m[0;34m:[0m [0mUnion[0m[0;34m[[0m[0mSequence[0m[0;34m[[0m[0mint[0m[0;34m][0m[0;34m,[0m [0mNoneType[0m[0;34m][0m [0;34m=[0m [0;32mNone[0m[0;34m,[0m[0;34m[0m
[0;34m[0m    [0munit[0m[0;34m:[0m [0mslide_tools[0m[0;34m.[0m[0mobjects[0m[0;34m.[0m[0mconstants[0m[0;34m.[0m[0mSizeUnit[0m [0;34m=[0m [0;34m<[0m[0mSizeUnit[0m[0;34m.[0m[0mPIXEL[0m[0;34m:[0m [0;34m'pixel'[0m[0;34m>[0m[0;34m,[0m[0;34m[0m
[0;34m[0m    [0mlevel[0m[0;34m:[0m [0mint[0m [0;34m=[0m [0;36m0[0m[0;34m,[0m[0;34m[0m
[0;34m[0m    [0mcentroid_in_annotation[0m[0;34m:[0m [0mbool[0m [0;34m=[0m [0;32mFalse[0m[0;34m,[0m[0;34m[0m
[0;34m[0m    [0mannotation_align[0m[0;34m:[0m [0mbool[0m [0;34m=[0m [0;32mFalse[0m[0;34m,[0m[0;34m[0m
[0;34m[0m    [0mregion_overlap[0m[0;34m:[0m [0mfloat[0m [0;34m=[0m [0

In [10]:
ds.setup_epoch?

[0;31mSignature:[0m
[0mds[0m[0;34m.[0m[0msetup_epoch[0m[0;34m([0m[0;34m[0m
[0;34m[0m    [0mbalance_size_by[0m[0;34m:[0m [0mUnion[0m[0;34m[[0m[0mslide_tools[0m[0;34m.[0m[0mobjects[0m[0;34m.[0m[0mconstants[0m[0;34m.[0m[0mBalanceMode[0m[0;34m,[0m [0mint[0m[0;34m,[0m [0mNoneType[0m[0;34m][0m [0;34m=[0m [0;32mNone[0m[0;34m,[0m[0;34m[0m
[0;34m[0m    [0mbalance_label_key[0m[0;34m:[0m [0mUnion[0m[0;34m[[0m[0mstr[0m[0;34m,[0m [0mNoneType[0m[0;34m][0m [0;34m=[0m [0;32mNone[0m[0;34m,[0m[0;34m[0m
[0;34m[0m    [0mbalance_label_bins[0m[0;34m:[0m [0mint[0m [0;34m=[0m [0;36m10[0m[0;34m,[0m[0;34m[0m
[0;34m[0m    [0mshuffle[0m[0;34m:[0m [0mbool[0m [0;34m=[0m [0;32mFalse[0m[0;34m,[0m[0;34m[0m
[0;34m[0m    [0mshuffle_chunk_size[0m[0;34m:[0m [0mint[0m [0;34m=[0m [0;36m1[0m[0;34m,[0m[0;34m[0m
[0;34m[0m[0;34m)[0m[0;34m[0m[0;34m[0m[0m
[0;31mDocstring:[0m
Populate .samples 