In [1]:
from pathlib import Path

import numpy as np
import matplotlib.pyplot as plt
from torchvision.transforms.functional import pil_to_tensor, to_pil_image
from tqdm import tqdm
from sklearn.model_selection import train_test_split

from src import ROOT_DIR
from src.datasets import MixedDatasetPreprocessing, BildacherBackgroundDataset, OUT_IMAGE_RES, TRAIN_VAL_SPLIT, SEED, DATA_DIR
from src.utils import (
    is_bbox_outside_crop,
    get_patch_label,
    plot_tensor,
    unpack_yolo_label,
    yolo_bbox_relative_to_absolute_coords,
    )

%load_ext autoreload
%autoreload 2

  from .autonotebook import tqdm as notebook_tqdm


## Mixed dataset

In [None]:
dataset = MixedDatasetPreprocessing(center_crop=False)

In [None]:
dataset.transforms.transforms

In [None]:
image, label, patch = dataset[1]
image.shape, label, patch

### Check patches

In [None]:
label, label.shape 

In [None]:
x, y, w, h = unpack_yolo_label(label[1])
x, y, w, h

In [None]:
plot_tensor(image)
H = image.shape[1]
W = image.shape[2]

In [None]:
patch_height = round(h * H)
patch_width = round(w * W)
patch_height, patch_width

In [None]:
x_center = round(x * W)
y_center = round(y * H)
_patch = image[:, y_center - patch_height//2 : y_center + patch_height//2, x_center - patch_width//2 : x_center + patch_width//2]

In [None]:
print(_patch.shape)
plot_tensor(_patch)

In [None]:
print(pil_to_tensor(patch).shape)
patch

**Patch extracted from labels match! (before doing CenterCrop)**

### Discard labels, patches and images when any part of the BBox is outside CenterCrop

In [2]:
dataset = MixedDatasetPreprocessing(center_crop=True)

In [None]:
len(dataset.images) == len(dataset.labels) == len(dataset.patches)

In [None]:
len(dataset.patches_idxs_to_discard)

In [None]:
len(dataset.images)

In [None]:
image, label, patch = dataset[1]
label

In [None]:
# ## Test -> seems to work
# for i in range(len(dataset)):
#     image, label, patch = dataset[i]
#     patch_height, patch_width = pil_to_tensor(patch).shape[1:]
#     H, W = image.shape[1], image.shape[2]
#     label_match = get_patch_label(image, label, patch)
#     x, y, w, h = unpack_yolo_label(label_match)
#     x, y, w, h = yolo_bbox_relative_to_absolute_coords(x, y, w, h, W, H)
#     print(h, w)
#     print(patch_height, patch_width)    

In [None]:
# patches_to_discard = []
# labels = []
# for i in tqdm(range(len(dataset))):
#     image, label, patch = dataset[i]
    
#     # There are more than one bbox per image, we need to find which one is the one that we see in the patch
#     # Finding closest match by patch size:  
#     patch_height, patch_width = pil_to_tensor(patch).shape[1:]
#     H, W = image.shape[1], image.shape[2]
#     label_match = get_patch_label(image, label, patch)
#     labels.append(label_match)
    
#     ## BBox coordinates
#     x, y, w, h = unpack_yolo_label(label_match)
#     x, y, w, h = yolo_bbox_relative_to_absolute_coords(x, y, w, h, W, H)
#     if is_bbox_outside_crop(x, y, w, h, W, H, OUT_IMAGE_RES):
#         patches_to_discard.append(i)
        
# patches_to_discard

### Make splits

In [3]:
indices = np.arange(len(dataset))
train, val = train_test_split(indices, train_size=TRAIN_VAL_SPLIT, random_state=SEED)
print(len(train) + len(val) + len(dataset.patches_idxs_to_discard))
train, val

584


(array([218, 113, 140, 122, 162, 216,  38,  24, 185, 137,  29,  19,  69,
        205,  60,  68,   5, 139,  56, 156, 176, 163,  65, 112, 186, 155,
         31,  12,  35,  28,  42,  84, 104, 150,  51, 124, 177,  76,  41,
        135,  78, 178,  26, 146, 173, 206,   0,   2,  77,  46, 153, 118,
         90,  85, 147,  98,  36, 125,  61,  22, 144, 117,  33,  11, 215,
        183,   6,  27, 136, 212, 203,   4,  32, 119, 159, 126, 158, 114,
         10,  62, 164, 194, 181, 128, 198, 167,  70, 170,  64,  44, 143,
         40, 123,  23, 165, 161,  81,  39, 184,  47,  94, 168,  43, 145,
        152,   3, 105,  53, 133, 199, 171, 204,  49,  80,  34,   7, 110,
         91,  83, 195, 200,  89,   8,  13,  59, 214, 131,  17, 166,  72,
        192, 134, 182, 208,  63,  54, 107,  50, 174, 217, 169,  58,  48,
         88,  21,  57, 160, 213, 187, 191, 129,  37, 157, 207,   1,  52,
        149, 130, 151, 103,  99, 116,  87, 202,  74, 210, 121, 220,  20,
        188,  71, 106,  14,  92, 179, 102]),
 array

In [4]:
[(DATA_DIR.parent / 'postprocessed' / split).mkdir(parents=True, exist_ok=True) for split in ["train", "val", "free_patches"]]

[None, None, None]

In [12]:
# Get paths from train and val sets from self.images, self.labels and self.patches
train_folder = DATA_DIR.parent / 'postprocessed' / 'train'
val_folder = DATA_DIR.parent / 'postprocessed' / 'val'

for folder in [train_folder, val_folder]:
    (folder / 'images').mkdir(parents=True, exist_ok=True)
    (folder / 'labels').mkdir(parents=True, exist_ok=True)
    (folder / 'patches').mkdir(parents=True, exist_ok=True)

for i in range(len(dataset)):
    image, label, patch = dataset[i]
    if i in train:
        to_pil_image(image).save(train_folder / "images" / f"{str(dataset.images[i].name)}")
        label_path = train_folder / "labels" / f"{str(dataset.labels[i].name)}"
        with open(label_path, 'wb'):
            np.savetxt(label_path, label, delimiter=' ', newline='\n')        
        patch.save(train_folder / "patches" / f"{str(dataset.patches[i].name)}")        
    elif i in val:
        to_pil_image(image).save(val_folder / "images" / f"{str(dataset.images[i].name)}")
        label_path = val_folder / "labels" / f"{str(dataset.labels[i].name)}"
        with open(label_path, 'wb'):
            np.savetxt(label_path, label, delimiter=' ', newline='\n')        
        patch.save(val_folder / "patches" / f"{str(dataset.patches[i].name)}")        

In [13]:
# Get paths for free_patches from patches to discard and a self._patches attribute 
#for file in dataset.patches_idxs_to_discard:

dataset._patches.index(dataset.patches_idxs_to_discard)
    
    
#patch.save(DATA_DIR.parent / 'postprocessed' / 'free_patches' / f"{str(patch.name)}")

ValueError: [0, 4, 5, 6, 7, 8, 11, 13, 19, 20, 21, 22, 23, 26, 27, 28, 30, 31, 33, 34, 36, 37, 38, 40, 42, 43, 44, 46, 48, 49, 50, 51, 52, 56, 57, 58, 59, 61, 64, 65, 66, 70, 71, 72, 73, 76, 77, 80, 81, 82, 83, 84, 88, 90, 92, 93, 94, 97, 98, 99, 100, 104, 107, 110, 114, 115, 116, 117, 121, 122, 124, 125, 126, 128, 130, 131, 133, 134, 135, 136, 137, 139, 140, 142, 143, 145, 147, 148, 151, 152, 153, 155, 156, 157, 158, 159, 160, 161, 169, 170, 171, 172, 173, 174, 175, 177, 180, 181, 184, 185, 186, 188, 189, 190, 191, 194, 195, 196, 197, 199, 200, 201, 203, 204, 205, 206, 207, 208, 210, 211, 213, 215, 219, 221, 223, 224, 225, 226, 229, 233, 235, 237, 239, 240, 243, 245, 246, 250, 251, 252, 253, 254, 255, 256, 258, 262, 264, 270, 271, 273, 275, 276, 277, 281, 284, 285, 286, 287, 289, 292, 293, 294, 296, 297, 298, 300, 301, 302, 303, 304, 305, 307, 308, 310, 312, 313, 314, 316, 318, 322, 324, 325, 326, 329, 331, 333, 335, 337, 338, 339, 340, 341, 343, 345, 346, 347, 349, 350, 351, 353, 354, 356, 359, 360, 361, 362, 363, 364, 365, 366, 367, 368, 371, 372, 373, 374, 376, 377, 378, 379, 380, 381, 382, 384, 385, 386, 389, 390, 394, 397, 399, 401, 404, 407, 408, 411, 412, 413, 414, 415, 416, 417, 418, 419, 420, 422, 423, 424, 425, 426, 427, 428, 429, 430, 431, 432, 433, 434, 436, 437, 441, 442, 445, 446, 447, 449, 450, 454, 456, 457, 459, 460, 461, 462, 463, 465, 466, 467, 468, 469, 472, 473, 474, 477, 482, 483, 486, 489, 490, 492, 494, 495, 496, 497, 498, 499, 500, 501, 502, 503, 504, 509, 510, 511, 512, 513, 514, 515, 516, 517, 518, 519, 520, 521, 522, 523, 524, 525, 527, 530, 532, 533, 534, 535, 536, 539, 540, 541, 545, 546, 547, 548, 549, 550, 552, 554, 556, 558, 560, 561, 562, 563, 564, 565, 570, 572, 574, 575, 577, 578, 579, 581, 583] is not in list

In [None]:
# Save to disk

## Bildacher backgrounds dataset

In [None]:
bildacher = BildacherBackgroundDataset()
image = bildacher[0]
print(image.shape)
plot_tensor(image)