In [1]:
import os
import re

from PIL import Image
from natsort import natsorted
from torchvision import models
from sklearn.model_selection import train_test_split
import shutil

In [2]:
BASE_FOLDER = os.getcwd()

image_folder = os.path.join(BASE_FOLDER, "data", "images")
img_filenames = natsorted([f for f in os.listdir(image_folder) if os.path.isfile(os.path.join(image_folder, f))])

label_folder = os.path.join(BASE_FOLDER, "data", "labels")
label_filenames = natsorted([f for f in os.listdir(label_folder) if os.path.isfile(os.path.join(label_folder, f))])

data = list(zip(img_filenames, label_filenames))

train_dataset, test_dataset = train_test_split(data, test_size=0.3, random_state=1)


In [4]:
import Augmentor

SAMPLES = 2000

if os.path.isdir(os.path.join(BASE_FOLDER, "data", "test")):
    shutil.rmtree(os.path.join(BASE_FOLDER, "data", "test"))
if os.path.isdir(os.path.join(BASE_FOLDER, "data", "train")):   
    shutil.rmtree(os.path.join(BASE_FOLDER, "data", "train"))
    
os.makedirs(os.path.join(BASE_FOLDER, "data", "test", "images"))
os.makedirs(os.path.join(BASE_FOLDER, "data", "test", "labels"))
os.makedirs(os.path.join(BASE_FOLDER, "data", "train", "images"))
os.makedirs(os.path.join(BASE_FOLDER, "data", "train", "labels"))

train_idx = 0
for img_file, label_file in train_dataset:
  shutil.copyfile(os.path.join(BASE_FOLDER, "data", "images", img_file), os.path.join(BASE_FOLDER, "data", "train", "images", f"{train_idx}.png"))
  shutil.copyfile(os.path.join(BASE_FOLDER, "data", "labels", label_file), os.path.join(BASE_FOLDER, "data", "train", "labels", f"{train_idx}.txt"))
  train_idx += 1

test_idx = 0
for img_file, label_file in test_dataset:
  shutil.copyfile(os.path.join(BASE_FOLDER, "data", "images", img_file), os.path.join(BASE_FOLDER, "data", "test", "images", f"{test_idx}.png"))
  shutil.copyfile(os.path.join(BASE_FOLDER, "data", "labels", label_file), os.path.join(BASE_FOLDER, "data", "test", "labels", f"{test_idx}.txt"))
  test_idx += 1

p = Augmentor.Pipeline(os.path.join(BASE_FOLDER, "data", "test", "images"), os.path.join(BASE_FOLDER, "data", "test", "images"))
p.rotate_random_90(probability=0.75)
#p.zoom(probability=0.75, min_factor=1.1, max_factor=1.6)
p.flip_random(probability=0.75)
p.random_contrast(probability=0.75, min_factor=0.5, max_factor=1.5)  # Adjust contrast randomly
p.random_brightness(probability=0.75, min_factor=0.5, max_factor=1.5)  # Adjust brightness randomly
p.random_distortion(probability=0.75, grid_width=4, grid_height=4, magnitude=10)  # Apply random distortion
p.random_color(probability=0.75, min_factor=0.5, max_factor=1.5)  # Randomly change color balance
p.sample(int(SAMPLES * 0.3))

p = Augmentor.Pipeline(os.path.join(BASE_FOLDER, "data", "train", "images"), os.path.join(BASE_FOLDER, "data", "train", "images"))
p.rotate_random_90(probability=0.75)
#p.zoom(probability=0.75, min_factor=1.1, max_factor=1.6)
p.flip_random(probability=0.75)
p.random_contrast(probability=0.75, min_factor=0.5, max_factor=1.5)  # Adjust contrast randomly
p.random_brightness(probability=0.75, min_factor=0.5, max_factor=1.5)  # Adjust brightness randomly
p.random_distortion(probability=0.75, grid_width=4, grid_height=4, magnitude=10)  # Apply random distortion
p.random_color(probability=0.75, min_factor=0.5, max_factor=1.5)  # Randomly change color balance
p.sample(int(SAMPLES * 0.7))

def extract_number(s):
    match = re.search(r'\d+', s)
    if match:
        return match.group()
    else:
        return None

for filename in os.listdir(os.path.join(BASE_FOLDER, "data", "train", "images")):
    if filename.startswith("images_original_"):
        n = extract_number(filename)
        os.rename(os.path.join(BASE_FOLDER, "data", "train", "images", filename), os.path.join(BASE_FOLDER, "data", "train", "images", f"{train_idx}.png"))
        shutil.copyfile(os.path.join(BASE_FOLDER, "data", "labels", f"{n}.txt"), os.path.join(BASE_FOLDER, "data", "train", "labels", f"{train_idx}.txt"))
        train_idx += 1
    
for filename in os.listdir(os.path.join(BASE_FOLDER, "data", "test", "images")):
    if filename.startswith("images_original_"):
        n = extract_number(filename)
        os.rename(os.path.join(BASE_FOLDER, "data", "test", "images", filename), os.path.join(BASE_FOLDER, "data", "test", "images", f"{test_idx}.png"))
        shutil.copyfile(os.path.join(BASE_FOLDER, "data", "labels", f"{n}.txt"), os.path.join(BASE_FOLDER, "data", "test", "labels", f"{test_idx}.txt"))
        test_idx += 1


Initialised with 113 image(s) found.
Output directory set to c:\Users\fdavi\Desktop\LymphomAug\data\test\images.

Processing <PIL.Image.Image image mode=RGB size=312x417 at 0x1F584A1DFA0>: 100%|██████████| 600/600 [00:02<00:00, 238.33 Samples/s]


Initialised with 261 image(s) found.
Output directory set to c:\Users\fdavi\Desktop\LymphomAug\data\train\images.

Processing <PIL.Image.Image image mode=RGB size=312x417 at 0x1F584AEDE20>: 100%|██████████| 1400/1400 [00:05<00:00, 242.13 Samples/s]
