In [81]:
import glob
import os
import shutil
import warnings

import numpy as np
import torchgeo.datasets
import rasterio.windows
from torchgeo.datasets import Potsdam2D

In [14]:
root_path = r"C:\Users\Dimit\Downloads\Potsdam"

image_dirname = "image"
image_dirpath = os.path.join(root_path, image_dirname)

mask_dirname = "label"
mask_dirpath = os.path.join(root_path, mask_dirname)

In [88]:
os.makedirs(image_dirpath, exist_ok=True)
os.makedirs(mask_dirpath, exist_ok=True)

In [89]:
image_paths = glob.glob(os.path.join(root_path, "2_Ortho_RGB", "*.tif"))
for path in image_paths:
    shutil.copy2(src=path, dst=image_dirpath)

mask_paths = glob.glob(os.path.join(root_path, "5_Labels_all", "*.tif"))
for path in mask_paths:
    shutil.copy2(src=path, dst=mask_dirpath)

In [69]:
image_paths = glob.glob(os.path.join(image_dirpath, "*.tif"))
image_paths.sort()

mask_paths = glob.glob(os.path.join(mask_dirpath, "*.tif"))
mask_paths.sort()

In [90]:
for i, (image_path, mask_path) in enumerate(zip(image_paths, mask_paths)):
    print(f"{i}/{len(mask_paths)}")
    image_src: rasterio.io.DatasetReader
    with rasterio.open(image_path) as image_src:
        mask_dst_profile = image_src.profile
    with warnings.catch_warnings(
            action="ignore", 
            category=rasterio.errors.NotGeoreferencedWarning
    ):
        mask_src: rasterio.io.DatasetReader
        with rasterio.open(mask_path) as mask_src:
            mask_src_data = mask_src.read()
    mask_dst_data=torchgeo.datasets.utils.rgb_to_mask(
        np.moveaxis(mask_src_data,source=0,destination=-1), 
        colors=Potsdam2D.colormap
    )
    mask_dst_profile.update(count=1)
    mask_dst: rasterio.io.DatasetWriter
    with rasterio.open(mask_path, mode="w", **mask_dst_profile) as mask_dst:
        mask_dst.write(mask_dst_data,indexes=1)

0/38
1/38
2/38
3/38
4/38
5/38
6/38
7/38
8/38
9/38
10/38
11/38
12/38
13/38
14/38
15/38
16/38
17/38
18/38
19/38
20/38
21/38
22/38
23/38
24/38
25/38
26/38
27/38
28/38
29/38
30/38
31/38
32/38
33/38
34/38
35/38
36/38
37/38


In [100]:
(np.moveaxis(mask_src_data,0,-1)==(255,255,0)).all(-1).any()

False

In [85]:
mask_dst_data.shape

(6000, 6000)

In [34]:
mask_paths

['C:\\Users\\Dimit\\Downloads\\Potsdam\\label\\top_potsdam_2_10_label.tif',
 'C:\\Users\\Dimit\\Downloads\\Potsdam\\label\\top_potsdam_2_11_label.tif',
 'C:\\Users\\Dimit\\Downloads\\Potsdam\\label\\top_potsdam_2_12_label.tif',
 'C:\\Users\\Dimit\\Downloads\\Potsdam\\label\\top_potsdam_2_13_label.tif',
 'C:\\Users\\Dimit\\Downloads\\Potsdam\\label\\top_potsdam_2_14_label.tif',
 'C:\\Users\\Dimit\\Downloads\\Potsdam\\label\\top_potsdam_3_10_label.tif',
 'C:\\Users\\Dimit\\Downloads\\Potsdam\\label\\top_potsdam_3_11_label.tif',
 'C:\\Users\\Dimit\\Downloads\\Potsdam\\label\\top_potsdam_3_12_label.tif',
 'C:\\Users\\Dimit\\Downloads\\Potsdam\\label\\top_potsdam_3_13_label.tif',
 'C:\\Users\\Dimit\\Downloads\\Potsdam\\label\\top_potsdam_3_14_label.tif',
 'C:\\Users\\Dimit\\Downloads\\Potsdam\\label\\top_potsdam_4_10_label.tif',
 'C:\\Users\\Dimit\\Downloads\\Potsdam\\label\\top_potsdam_4_11_label.tif',
 'C:\\Users\\Dimit\\Downloads\\Potsdam\\label\\top_potsdam_4_12_label.tif',
 'C:\\Users\

In [27]:
profile

{'driver': 'GTiff', 'dtype': 'uint8', 'nodata': None, 'width': 6000, 'height': 6000, 'count': 3, 'crs': CRS.from_epsg(32633), 'transform': Affine(0.05, 0.0, 366676.5,
       0.0, -0.05, 5807062.6), 'blockxsize': 608, 'blockysize': 608, 'tiled': True, 'compress': 'packbits', 'interleave': 'pixel'}

In [16]:
bad_tiles = set()
for i, src_path in enumerate(mask_paths):
    print(f"{i}/{len(mask_paths)}")
    src: rasterio.io.DatasetReader
    with rasterio.open(src_path) as src:
        src_data = src.read(1)
    if not np.any(src_data):
        bad_tiles.add(os.path.basename(src_path))