In [7]:
import numpy as np
from patchify import patchify
import tifffile as tiff
import os
from PIL import Image
from math import floor
import random
import shutil
import os
from torchvision import transforms

In [11]:
def make_patches(
    dataset_dir,
    save_dir,
    patch_size=224,
):
    original_sample_names = os.listdir(dataset_dir)

    for name in original_sample_names:
        sample_path = os.path.join(dataset_dir, name)
        image = tiff.imread(sample_path, key=0)
        print(image.shape)
        h, w, c = image.shape

        if h >= patch_size and w >= patch_size:
            h_new = floor(h / patch_size) * patch_size
            w_new = floor(w / patch_size) * patch_size
            image = image[:h_new, :w_new, :]
            print(image.shape)

            patches_imgs = patchify(image, (224, 224, 3), step=224)
            print("N patches ", patches_imgs.shape)

            for i in range(len(patches_imgs)):
                for j in range(len(patches_imgs[i])):
                    im = Image.fromarray(patches_imgs[i][j][0])
                    save_path = os.path.join(save_dir, f"{name.split('.')[0]}_{i}_{j}.jpeg")
                    im.save(save_path)
        else:
            print("Skipping image: patch size too big!")

    return os.listdir(save_dir)

### Divide dataset images into batches

**Carefull of a bias in the data**: crow traps are generally placed in the center of the image! After generating patches, the crow trpas will (almost) always placed at the top left corner of the patch.

In [13]:
pathes_names = make_patches(
    dataset_dir="Datasets/backup/Crow_traps_Subset",
    save_dir="Datasets/Crow_traps_splits/background",
    patch_size=224,
)

(1016, 860, 4)
(896, 672, 4)
N patches  (4, 3, 1, 224, 224, 3)
(1016, 860, 4)
(896, 672, 4)
N patches  (4, 3, 1, 224, 224, 3)
(1016, 860, 4)
(896, 672, 4)
N patches  (4, 3, 1, 224, 224, 3)
(1016, 860, 4)
(896, 672, 4)
N patches  (4, 3, 1, 224, 224, 3)
(1016, 860, 4)
(896, 672, 4)
N patches  (4, 3, 1, 224, 224, 3)
(1016, 860, 4)
(896, 672, 4)
N patches  (4, 3, 1, 224, 224, 3)
(1016, 860, 4)
(896, 672, 4)
N patches  (4, 3, 1, 224, 224, 3)
(1016, 860, 4)
(896, 672, 4)
N patches  (4, 3, 1, 224, 224, 3)
(1016, 860, 4)
(896, 672, 4)
N patches  (4, 3, 1, 224, 224, 3)
(1016, 860, 4)
(896, 672, 4)
N patches  (4, 3, 1, 224, 224, 3)
(1016, 860, 4)
(896, 672, 4)
N patches  (4, 3, 1, 224, 224, 3)
(508, 430, 4)
(448, 224, 4)
N patches  (2, 1, 1, 224, 224, 3)
(1016, 860, 4)
(896, 672, 4)
N patches  (4, 3, 1, 224, 224, 3)
(1016, 860, 4)
(896, 672, 4)
N patches  (4, 3, 1, 224, 224, 3)
(1016, 860, 4)
(896, 672, 4)
N patches  (4, 3, 1, 224, 224, 3)
(1016, 860, 4)
(896, 672, 4)
N patches  (4, 3, 1, 224, 2

In [14]:
print("Total N pathces: ", len(pathes_names))

Total N pathces:  526


Finally, manually select patches that contain crow traps and move it to the "crow_trap" folder

In [22]:
'''
Script for renaming and repositioning files
'''

class_dir = "Datasets/Crow_classify/background_tiff"
output_dir = "Datasets/Crow_classify/background"

samples = os.listdir(class_dir)

for sample in samples:
    dot_splits = sample.split(".")
    name = dot_splits[0]
    under_splits = dot_splits[1].split("_")
    position = under_splits[1]+"_"+under_splits[2]
    file_name = name+"_"+position

    sample_path = os.path.join(class_dir, sample)
    save_path = os.path.join(output_dir, file_name+".jpeg")

    shutil.copyfile(sample_path, save_path)

### Split data into train-test

In [6]:
import numpy as np
from sklearn.model_selection import train_test_split


def make_split(class_name, dataset_dir="Datasets/Crow_classify", split_p=0.2):
    class_dir = os.path.join(dataset_dir, class_name)
    samples_list = os.listdir(class_dir)

    print("Sample list: ", len(samples_list))

    samples_train, samples_test = train_test_split(samples_list, test_size=split_p, random_state=42)

    print(len(samples_train), len(samples_test))

    train_dir = os.path.join(dataset_dir, "train", class_name)
    test_dir = os.path.join(dataset_dir, "test", class_name)

    if not os.path.exists(train_dir):
        os.makedirs(train_dir)

    if not os.path.exists(test_dir):
        os.makedirs(test_dir)

    for sample in samples_test:
        sample_path = os.path.join(class_dir, sample)
        save_path = os.path.join(test_dir, sample)

        shutil.copyfile(sample_path, save_path)

    for sample in samples_train:
        sample_path = os.path.join(class_dir, sample)
        save_path = os.path.join(train_dir, sample)

        shutil.copyfile(sample_path, save_path)

make_split("crow_trap", dataset_dir="Datasets/Crow_classify", split_p=0.2)
make_split("background", dataset_dir="Datasets/Crow_classify", split_p=0.2)

Sample list:  45
36 9
Sample list:  481
384 97


### Data augmentation

The train split can be used to generate augmented data and reduce imbalance in the dataset.

- **crow_trap generation**: traps are manually masked out and then placed into background images
- **general augmentation**: background/crow_trap images can be transformed and distorted 

##### Crow trap generation

In [8]:
transform_p = 0.5
back_transforms = transforms.Compose(
    [
        transforms.RandomHorizontalFlip(transform_p),
        transforms.RandomVerticalFlip(transform_p),
        transforms.ColorJitter(
            brightness=0.5, contrast=0.15, saturation=0.3, hue=0.3
        ),
    ]
)


test_transforms = transforms.Compose(
    [
        transforms.RandomHorizontalFlip(transform_p),
        transforms.RandomVerticalFlip(transform_p),
        transforms.RandomRotation((0, 360)),
    ]
)

trap_dir = "Datasets/backup/Only_traps"
back_dir = "Datasets/Crow_classify/train/background"
save_dir = "Datasets/Crow_classify/generated_traps"

# Proportion of background images to augment with crow traps
split_p = 0.8

_, samples_back = train_test_split(os.listdir(back_dir), test_size=split_p, random_state=42)
samples_trap = os.listdir(trap_dir)

for sample in samples_back:
    back = Image.open(os.path.join(back_dir, sample))

    sample_trap = random.choice(samples_trap)
    trap = Image.open(os.path.join(trap_dir, sample_trap))

    x = random.randint(24, 200)
    y = random.randint(24, 200)

    trap = test_transforms(trap)

    back.paste(trap, (x, y), trap)
    print(sample.split(".")[0] + "_" + sample_trap)
    back = back_transforms(back)
    back.save(os.path.join(save_dir, sample.split(".")[0] + "_" + sample_trap))

103138_0_0_14.png
103638_2_1_3.png
101339_0_2_13.png
99911_3_1_1.png
102563_1_0_15.png
104201_1_1_5.png
103303_0_0_2.png
100631_1_2_11.png
100631_0_1_18.png
101548_3_0_16.png
101679_2_0_17.png
102521_1_1_17.png
102523_1_0_18.png
101164_0_0_3.png
103849_1_0_11.png
101442_0_1_18.png
101559_1_1_3.png
101550_3_0_2.png
102523_3_2_4.png
101550_1_2_11.png
103638_2_0_16.png
101559_2_0_1.png
103303_1_2_4.png
103439_1_1_17.png
104268_2_0_3.png
101125_1_1_17.png
102520_3_1_5.png
101559_0_1_7.png
101520_2_0_17.png
101517_1_0_13.png
102522_2_0_1.png
99495_3_2_16.png
104272_2_0_17.png
102521_0_1_8.png
101343_1_0_5.png
102598_0_2_2.png
99911_2_2_1.png
101569_1_0_9.png
101679_3_0_16.png
101569_2_0_17.png
104268_3_1_6.png
102522_1_2_4.png
101546_0_0_14.png
101569_0_2_17.png
99911_0_0_1.png
102237_2_2_3.png
101559_2_2_3.png
102522_1_1_11.png
100615_2_2_4.png
102601_2_2_13.png
100459_0_2_7.png
101520_0_2_13.png
104272_3_1_1.png
101125_1_2_6.png
101123_1_2_16.png
100631_3_1_10.png
101343_1_2_8.png
104270_

##### General augmentation

In [9]:
from torchvision.transforms.autoaugment import AutoAugmentPolicy

transform_p = 0.8

def apply_general_augmentation(samples_dir, save_dir, p_to_generate = 0.65, auto_augment_policy = AutoAugmentPolicy.SVHN, replace=True):


    transform = transforms.Compose(
        [
            transforms.RandomHorizontalFlip(0.5),
            transforms.RandomVerticalFlip(0.5),
            transforms.AutoAugment(auto_augment_policy),
            transforms.ColorJitter(
                brightness=0.5, contrast=0.15, saturation=0.3, hue=0.3
            ),
        ]
    )


    samples_list = random.choices(
        os.listdir(samples_dir), k=int(len(os.listdir(samples_dir)) * p_to_generate)
    )

    for sample in samples_list:

        sample_img = Image.open(os.path.join(samples_dir, sample))
        sample_img = transform(sample_img)
        
        sample_name = sample.split(".")[0] + "_aug" + ".jpeg"
        if replace:
            sample_name = sample

        print(sample_name)
        sample_img.save(os.path.join(save_dir, sample_name))

apply_general_augmentation("Datasets/Crow_classify/train/background", "Datasets/Crow_classify/train/background", p_to_generate = 0.2, replace=False)
apply_general_augmentation("Datasets/Crow_classify/train/crow_trap", "Datasets/Crow_classify/train/crow_trap", p_to_generate = 0.2, replace=False)

# apply_general_augmentation("Datasets/Crow_classify/train/background", "Datasets/Crow_classify/train/background", p_to_generate = 0.2, replace=True)
# apply_general_augmentation("Datasets/Crow_classify/train/crow_trap", "Datasets/Crow_classify/train/crow_trap", p_to_generate = 0.2, replace=True)

101343_2_0_aug.jpeg
103710_1_0_aug.jpeg
103138_1_0_aug.jpeg
104357_2_2_aug.jpeg
101520_3_0_aug.jpeg
102523_3_1_aug.jpeg
104008_1_2_aug.jpeg
101164_0_0_aug.jpeg
102237_1_0_aug.jpeg
103710_0_1_aug.jpeg
101559_2_0_aug.jpeg
99911_3_0_aug.jpeg
103138_3_2_aug.jpeg
101679_3_1_aug.jpeg
104268_1_2_aug.jpeg
103439_2_2_aug.jpeg
99495_1_2_aug.jpeg
104269_1_0_aug.jpeg
102237_2_2_aug.jpeg
101509_0_2_aug.jpeg
100615_1_1_aug.jpeg
101550_0_2_aug.jpeg
103303_2_0_aug.jpeg
99911_1_1_aug.jpeg
101343_0_0_aug.jpeg
101548_1_2_aug.jpeg
104270_0_0_aug.jpeg
104268_0_2_aug.jpeg
103303_0_2_aug.jpeg
101509_3_0_aug.jpeg
104269_1_2_aug.jpeg
101442_2_2_aug.jpeg
100980_1_0_aug.jpeg
102522_2_2_aug.jpeg
102521_1_0_aug.jpeg
101679_0_2_aug.jpeg
104268_3_1_aug.jpeg
101442_2_2_aug.jpeg
102520_1_2_aug.jpeg
101517_0_1_aug.jpeg
104270_0_0_aug.jpeg
100631_3_0_aug.jpeg
104272_2_0_aug.jpeg
102520_1_0_aug.jpeg
103439_0_1_aug.jpeg
101125_0_2_aug.jpeg
102521_3_0_aug.jpeg
100459_0_0_aug.jpeg
100631_3_0_aug.jpeg
104357_3_2_aug.jpeg
103

### Split data into train-val

In [10]:
make_split("crow_trap", dataset_dir="Datasets/Crow_classify/train", split_p=.15)
make_split("background", dataset_dir="Datasets/Crow_classify/train", split_p=.15)

Sample list:  406
345 61
Sample list:  453
385 68


### Convert to greyscale

In [12]:
def to_grey(samples_dir, save_dir):

    samples_list = os.listdir(samples_dir)

    for sample in samples_list:

        sample_img = Image.open(os.path.join(samples_dir, sample))
        to_greyscale = transforms.Grayscale()
        sample_img = to_greyscale(sample_img)
        
        sample_img.save(os.path.join(save_dir, sample))

to_grey("Datasets/Crow_classify_grey/train/background", "Datasets/Crow_classify_grey/train/background")
to_grey("Datasets/Crow_classify_grey/train/crow_trap", "Datasets/Crow_classify_grey/train/crow_trap")

to_grey("Datasets/Crow_classify_grey/val/background", "Datasets/Crow_classify_grey/val/background")
to_grey("Datasets/Crow_classify_grey/val/crow_trap", "Datasets/Crow_classify_grey/val/crow_trap")

to_grey("Datasets/Crow_evaluate_classify_grey/val/background", "Datasets/Crow_evaluate_classify_grey/val/background")
to_grey("Datasets/Crow_evaluate_classify_grey/val/crow_trap", "Datasets/Crow_evaluate_classify_grey/val/crow_trap")