In [1]:
import glob
import os
import pickle
import xml.etree.ElementTree as ET
from os import listdir, getcwd
from os.path import join
from PIL import Image
import pandas as pd

In [2]:
import sys
sys.path.append("../")

from src.constants import *
from src.training_utils.dataset import *

In [3]:
BASE_DIR_IMG = f"{PATH}/data/tiles/image_slices/"
BASE_DIR_XML = f"{PATH}/data/tiles/xml_slices/"

In [4]:
train_df = pd.read_csv("/home/nacho/TFI-Cazcarra/data/tiles/train_cardinalidades_2023_fixed.csv")
test_df = pd.read_csv("/home/nacho/TFI-Cazcarra/data/tiles/test_cardinalidades_2023_fixed.csv")

In [5]:
le_dict = get_encoder_dict(CLASSES_CSV)

le_dict = {'muchos_opcional': 2,
           'muchos_obligatorio': 1,
           'uno_opcional': 3,
           'uno_obligatorio': 4}

In [6]:
CLASSES = ['muchos_opcional','muchos_obligatorio','uno_opcional','uno_obligatorio']
SLICES_DIR = [BASE_DIR_IMG]

In [7]:
def get_images_in_dir(dir_path):
    image_list = []
    for filename in glob.glob(dir_path + '*.png')+glob.glob(dir_path + '*.jpg'):
        image_list.append(filename)
    return image_list

In [8]:
def convert(size, box):
    dw = 1./(size[0])
    dh = 1./(size[1])
    x = (box[0] + box[1])/2.0 - 1
    y = (box[2] + box[3])/2.0 - 1
    w = box[1] - box[0]
    h = box[3] - box[2]
    x = x*dw
    w = w*dw
    y = y*dh
    h = h*dh
    return (x,y,w,h)

In [9]:
def pad_coords(b, w, h):
    if b[1] > w:
        b[1] = w
    if b[3] > h:
        b[3] = h
    if b[0] < w:
        b[0] = 0
    if b[2] < h:
        b[2] = 0
        
    return tuple(b)

In [10]:
def convert_annotation(dir_path, output_path, image_path):
    '''
    dir_path -> image_slices/
    output_path -> yolo_cardinalidades/train o yolo_cardinalidades/test
    image_path -> imagen .png
    '''
    basename = os.path.basename(image_path)
    basename_no_ext = os.path.splitext(basename)[0] # El nombre de la imagen en limpio

    in_file = open(BASE_DIR_XML +  basename_no_ext + '.xml')
    tree = ET.parse(in_file)
    root = tree.getroot()
    size = root.find('size')

    w,h = Image.open(image_path).size
    
    with open(output_path + basename_no_ext + '.txt', 'w+') as out_file:
        for obj in root.iter('object'):
            cls = obj.find('name').text
            if cls not in CLASSES:
                continue
            cls_id = le_dict[cls] - 1 # Para que empiecen desde 0
            xmlbox = obj.find('bndbox')
            b = (float(xmlbox.find('xmin').text), float(xmlbox.find('xmax').text), 
                 float(xmlbox.find('ymin').text), float(xmlbox.find('ymax').text))
            ## If it is, pad the coords
            if b not in train_df[train_df['image_path'].str.contains(basename_no_ext)][['xmin','xmax','ymin','ymax']].values and \
                b not in test_df[test_df['image_path'].str.contains(basename_no_ext)][['xmin','xmax','ymin','ymax']].values:
                # If it's not in any DF, ignore it:
                continue
            b = pad_coords(b=list(b), w=w, h=h)
            bb = convert((w,h), b)
            out_file.write(str(cls_id) + " " + " ".join([str(a) for a in bb]) + '\n')
    out_file.close()
cwd = getcwd()

In [11]:
for dir_path in SLICES_DIR:
    full_dir_path = dir_path
    output_path = f"{PATH}/data/tiles/yolo_cardinalidades/"
    
    if not os.path.exists(output_path):
        os.makedirs(output_path)

    image_paths = get_images_in_dir(full_dir_path)       
#     train_file = open(full_dir_path +  "train/" + '.txt', 'w')
#     test_file = open(full_dir_path + "test/" + '.txt', 'w')

    for image_path in image_paths:
        current_folder = "train/" if image_path in train_df['image_path'].unique() else "test/"
        out_path = output_path + current_folder       
        if not os.path.exists(out_path):
            os.makedirs(out_path)
            
        convert_annotation(full_dir_path, out_path, image_path)

    print("Finished processing: " + dir_path)

Finished processing: /home/nacho/TFI-Cazcarra/data/tiles/image_slices/


## Copy files to train/ and test/

In [12]:
import shutil
all_images = get_images_in_dir(dir_path)

In [13]:
for img in all_images:
    current_folder = "train" if img in train_df['image_path'].unique() else "test"
    dst = f"/home/nacho/TFI-Cazcarra/data/tiles/yolo_cardinalidades/{current_folder}/images/{os.path.basename(img)}"
    
    if not os.path.exists(f"/home/nacho/TFI-Cazcarra/data/tiles/yolo_cardinalidades/{current_folder}/images/"):
        os.makedirs(f"/home/nacho/TFI-Cazcarra/data/tiles/yolo_cardinalidades/{current_folder}/images/")
        
    print(img, dst)
    shutil.copyfile(img, dst)

/home/nacho/TFI-Cazcarra/data/tiles/image_slices/000413.png /home/nacho/TFI-Cazcarra/data/tiles/yolo_cardinalidades/train/images/000413.png
/home/nacho/TFI-Cazcarra/data/tiles/image_slices/000005.png /home/nacho/TFI-Cazcarra/data/tiles/yolo_cardinalidades/test/images/000005.png
/home/nacho/TFI-Cazcarra/data/tiles/image_slices/000403.png /home/nacho/TFI-Cazcarra/data/tiles/yolo_cardinalidades/test/images/000403.png
/home/nacho/TFI-Cazcarra/data/tiles/image_slices/000094.png /home/nacho/TFI-Cazcarra/data/tiles/yolo_cardinalidades/train/images/000094.png
/home/nacho/TFI-Cazcarra/data/tiles/image_slices/000273.png /home/nacho/TFI-Cazcarra/data/tiles/yolo_cardinalidades/train/images/000273.png
/home/nacho/TFI-Cazcarra/data/tiles/image_slices/000429.png /home/nacho/TFI-Cazcarra/data/tiles/yolo_cardinalidades/train/images/000429.png
/home/nacho/TFI-Cazcarra/data/tiles/image_slices/000341.png /home/nacho/TFI-Cazcarra/data/tiles/yolo_cardinalidades/train/images/000341.png
/home/nacho/TFI-Cazcar

/home/nacho/TFI-Cazcarra/data/tiles/image_slices/000439.jpg /home/nacho/TFI-Cazcarra/data/tiles/yolo_cardinalidades/train/images/000439.jpg
/home/nacho/TFI-Cazcarra/data/tiles/image_slices/000319.jpg /home/nacho/TFI-Cazcarra/data/tiles/yolo_cardinalidades/train/images/000319.jpg
/home/nacho/TFI-Cazcarra/data/tiles/image_slices/000303.jpg /home/nacho/TFI-Cazcarra/data/tiles/yolo_cardinalidades/test/images/000303.jpg
/home/nacho/TFI-Cazcarra/data/tiles/image_slices/000321.jpg /home/nacho/TFI-Cazcarra/data/tiles/yolo_cardinalidades/train/images/000321.jpg
/home/nacho/TFI-Cazcarra/data/tiles/image_slices/000291.jpg /home/nacho/TFI-Cazcarra/data/tiles/yolo_cardinalidades/train/images/000291.jpg
/home/nacho/TFI-Cazcarra/data/tiles/image_slices/000317.jpg /home/nacho/TFI-Cazcarra/data/tiles/yolo_cardinalidades/train/images/000317.jpg
/home/nacho/TFI-Cazcarra/data/tiles/image_slices/000318.jpg /home/nacho/TFI-Cazcarra/data/tiles/yolo_cardinalidades/train/images/000318.jpg
/home/nacho/TFI-Cazca

In [14]:
for current_dir in ["train", "test"]:
    if not os.path.exists(f"/home/nacho/TFI-Cazcarra/data/tiles/yolo_cardinalidades/{current_dir}/labels/"):
            os.makedirs(f"/home/nacho/TFI-Cazcarra/data/tiles/yolo_cardinalidades/{current_dir}/labels/")
            
    for img in glob.glob(f"/home/nacho/TFI-Cazcarra/data/tiles/yolo_cardinalidades/{current_dir}/*.txt"):
        dst = f"/home/nacho/TFI-Cazcarra/data/tiles/yolo_cardinalidades/{current_dir}/labels/{os.path.basename(img)}"

        print(img, dst)
        shutil.move(img, dst)

/home/nacho/TFI-Cazcarra/data/tiles/yolo_cardinalidades/train/000020.txt /home/nacho/TFI-Cazcarra/data/tiles/yolo_cardinalidades/train/labels/000020.txt
/home/nacho/TFI-Cazcarra/data/tiles/yolo_cardinalidades/train/000258.txt /home/nacho/TFI-Cazcarra/data/tiles/yolo_cardinalidades/train/labels/000258.txt
/home/nacho/TFI-Cazcarra/data/tiles/yolo_cardinalidades/train/000333.txt /home/nacho/TFI-Cazcarra/data/tiles/yolo_cardinalidades/train/labels/000333.txt
/home/nacho/TFI-Cazcarra/data/tiles/yolo_cardinalidades/train/000197.txt /home/nacho/TFI-Cazcarra/data/tiles/yolo_cardinalidades/train/labels/000197.txt
/home/nacho/TFI-Cazcarra/data/tiles/yolo_cardinalidades/train/000381.txt /home/nacho/TFI-Cazcarra/data/tiles/yolo_cardinalidades/train/labels/000381.txt
/home/nacho/TFI-Cazcarra/data/tiles/yolo_cardinalidades/train/000436.txt /home/nacho/TFI-Cazcarra/data/tiles/yolo_cardinalidades/train/labels/000436.txt
/home/nacho/TFI-Cazcarra/data/tiles/yolo_cardinalidades/train/000424.txt /home/nac