In [16]:
from pathlib import Path

import numpy as np
import matplotlib.pyplot as plt
from torchvision.transforms.functional import pil_to_tensor
from tqdm import tqdm
from sklearn.model_selection import train_test_split

from src import ROOT_DIR
from src.datasets import MixedDatasetPreprocessing, BildacherBackgroundDataset, OUT_IMAGE_RES, TRAIN_VAL_SPLIT, SEED, DATA_DIR
from src.utils import (
    is_bbox_outside_crop,
    get_patch_label,
    plot_tensor,
    unpack_yolo_label,
    yolo_bbox_relative_to_absolute_coords,
    )

%load_ext autoreload
%autoreload 2

The autoreload extension is already loaded. To reload it, use:
  %reload_ext autoreload


## Mixed dataset

In [None]:
dataset = MixedDatasetPreprocessing(center_crop=False)

In [None]:
dataset.transforms.transforms

In [None]:
image, label, patch = dataset[1]
image.shape, label, patch

### Check patches

In [None]:
label, label.shape 

In [None]:
x, y, w, h = unpack_yolo_label(label[1])
x, y, w, h

In [None]:
plot_tensor(image)
H = image.shape[1]
W = image.shape[2]

In [None]:
patch_height = round(h * H)
patch_width = round(w * W)
patch_height, patch_width

In [None]:
x_center = round(x * W)
y_center = round(y * H)
_patch = image[:, y_center - patch_height//2 : y_center + patch_height//2, x_center - patch_width//2 : x_center + patch_width//2]

In [None]:
print(_patch.shape)
plot_tensor(_patch)

In [None]:
print(pil_to_tensor(patch).shape)
patch

**Patch extracted from labels match! (before doing CenterCrop)**

### Discard labels, patches and images when any part of the BBox is outside CenterCrop

In [10]:
dataset = MixedDatasetPreprocessing(center_crop=True)

In [11]:
len(dataset.images) == len(dataset.labels) == len(dataset.patches)

True

In [12]:
len(dataset.patches_idxs_to_discard)

363

In [13]:
len(dataset.images)

221

In [14]:
image, label, patch = dataset[1]
label

array([[0.       , 0.9036328, 0.9363663, 0.1510937, 0.1277362],
       [0.       , 0.884458 , 0.5364212, 0.0780175, 0.1018095],
       [0.       , 0.7046386, 0.2098388, 0.0486718, 0.1118827],
       [0.       , 0.6617773, 0.3876007, 0.042207 , 0.0393333],
       [0.       , 0.3827685, 0.3029743, 0.0924121, 0.0902564]])

In [None]:
# ## Test -> seems to work
# for i in range(len(dataset)):
#     image, label, patch = dataset[i]
#     patch_height, patch_width = pil_to_tensor(patch).shape[1:]
#     H, W = image.shape[1], image.shape[2]
#     label_match = get_patch_label(image, label, patch)
#     x, y, w, h = unpack_yolo_label(label_match)
#     x, y, w, h = yolo_bbox_relative_to_absolute_coords(x, y, w, h, W, H)
#     print(h, w)
#     print(patch_height, patch_width)    

In [None]:
# patches_to_discard = []
# labels = []
# for i in tqdm(range(len(dataset))):
#     image, label, patch = dataset[i]
    
#     # There are more than one bbox per image, we need to find which one is the one that we see in the patch
#     # Finding closest match by patch size:  
#     patch_height, patch_width = pil_to_tensor(patch).shape[1:]
#     H, W = image.shape[1], image.shape[2]
#     label_match = get_patch_label(image, label, patch)
#     labels.append(label_match)
    
#     ## BBox coordinates
#     x, y, w, h = unpack_yolo_label(label_match)
#     x, y, w, h = yolo_bbox_relative_to_absolute_coords(x, y, w, h, W, H)
#     if is_bbox_outside_crop(x, y, w, h, W, H, OUT_IMAGE_RES):
#         patches_to_discard.append(i)
        
# patches_to_discard

### Make splits

In [None]:
indices = np.arange(len(dataset))
train, val = train_test_split(indices, train_size=TRAIN_VAL_SPLIT, random_state=SEED)
print(len(train) + len(val) + len(dataset.patches_idxs_to_discard))
train, val

In [21]:
[(DATA_DIR.parent / 'postprocessed' / split).mkdir(parents=True, exist_ok=True) for split in ["train", "val", "free_patches"]]

[None, None, None]

In [27]:
images_dir = dataset.dataset_dir / 'images'
labels_dir = dataset.dataset_dir / 'yolo'
patches_dir = DATA_DIR / 'patches'
len([path for path in patches_dir.iterdir() if ])

# Get paths fro train and val sets from self.images, self.labels and self.patches. Get paths for free_patches from patches to discard and a self._patches attribute 

3271

## Bildacher backgrounds dataset

In [None]:
bildacher = BildacherBackgroundDataset()
image = bildacher[0]
print(image.shape)
plot_tensor(image)