In [35]:
from csv import reader
import os
import more_itertools as mit

In [36]:
TRAIN_SIZE = 1000
VAL_SIZE = TRAIN_SIZE + 100
TEST_SIZE = VAL_SIZE + 100

In [37]:
train = "aug_train"
val = "aug_val"
test = "aug_test"

os.makedirs(f"./data/{train}", exist_ok=True)
os.makedirs(f"./data/{val}", exist_ok=True)
os.makedirs(f"./data/{test}", exist_ok=True)

In [38]:
def recalculate_offsets_to_center(x, y, w, h):
    cx = int(x) + int(w)/2
    cy = int(y) + int(h)/2
    return str(cx), str(cy)

def fix_positions(positions_joined):
    positions = mit.chunked(positions_joined, n=4)
    fixed_positions = []
    for position in positions:
        cx, cy = recalculate_offsets_to_center(*position)
        fixed_positions.extend([cx, cy, position[2], position[3]])
    return fixed_positions

def prep_log_line(line, new_dir):
    old_path = line[0]
    
    path_list = old_path.split('/')
    for idx, part in enumerate(path_list):
        if part == "augmented_test":
            path_list[idx] = new_dir
    
    new_path = '/'.join(path_list)
    
    all_positions = line[1:]
    fixed_positions = fix_positions(all_positions)
    
    new_line = [new_path] + fixed_positions
    new_line = ','.join(new_line) + '\n'
    
    return old_path, new_path, new_line


train_set = []
val_set = []
test_set = []

train_lines = []
val_lines = []
test_lines = []

with open('./data/labels_test.csv', 'r') as read_object:
    csv_reader = reader(read_object)
    csv_reader.__next__()  # skip header
    for i, line in enumerate(csv_reader):
        if i < TRAIN_SIZE:
            old_path, new_path, new_line = prep_log_line(line, train)
            train_set.append((old_path, new_path))
            train_lines.append(new_line)
        if i >= TRAIN_SIZE and i < VAL_SIZE:
            old_path, new_path, new_line = prep_log_line(line, val)
            val_set.append((old_path, new_path))
            val_lines.append(new_line)
        if i >= VAL_SIZE and i < TEST_SIZE:
            old_path, new_path, new_line = prep_log_line(line, test)
            test_set.append((old_path, new_path))
            test_lines.append(new_line)
        if i > TEST_SIZE:
            break

In [39]:
train_set[:2], val_set[:2], test_set[:2]

([('./data/augmented_test/1_1.png', './data/aug_train/1_1.png'),
  ('./data/augmented_test/1_2.png', './data/aug_train/1_2.png')],
 [('./data/augmented_test/1060_1.png', './data/aug_val/1060_1.png'),
  ('./data/augmented_test/1060_2.png', './data/aug_val/1060_2.png')],
 [('./data/augmented_test/10618_1.png', './data/aug_test/10618_1.png'),
  ('./data/augmented_test/10618_2.png', './data/aug_test/10618_2.png')])

In [40]:
from PIL import Image

for (old, new) in train_set:
    img = Image.open(old)
    img.save(new)
    
for (old, new) in val_set:
    img = Image.open(old)
    img.save(new)

for (old, new) in test_set:
    img = Image.open(old)
    img.save(new)

In [None]:
with open("train_labels.csv", "w") as data_pairs:
       data_pairs.write(
           "imagename,x1,y1,w1,h1,x2,y2,w2,h2,x3,y3,w3,h3,x4,y4,w4,h4,x5,y5,w5,h5\n"
       )
       for line in train_lines:
           data_pairs.write(line)

with open("val_labels.csv", "w") as data_pairs:
       data_pairs.write(
           "imagename,x1,y1,w1,h1,x2,y2,w2,h2,x3,y3,w3,h3,x4,y4,w4,h4,x5,y5,w5,h5\n"
       )
       for line in val_lines:
           data_pairs.write(line)

with open("test_labels.csv", "w") as data_pairs:
       data_pairs.write(
           "imagename,x1,y1,w1,h1,x2,y2,w2,h2,x3,y3,w3,h3,x4,y4,w4,h4,x5,y5,w5,h5\n"
       )
       for line in test_lines:
           data_pairs.write(line)