In [53]:
####################################################################################################################################################
## LIBRARIES:
import glob
import os
import pandas as pd
import numpy as np
import cv2
import shutil
####################################################################################################################################################
## FUNCTIONS:
def visualization(img_path, ann_df):
    
    # The image is loaded:
    imagen = cv2.imread(img_path)
    
    # Each annotation is run through and the verties are extracted to draw it.:
    for idx, row in ann_df.iterrows():
        vert = np.array([[row["x1"], row["y1"]],
                         [row["x2"], row["y2"]],
                         [row["x3"], row["y3"]],
                         [row["x4"], row["y4"]]], dtype=np.int32).reshape((-1, 1, 2))
        cv2.polylines(imagen, [vert], True, (0, 255, 0), 2)
        cv2.putText(image, "1", (vert[0,0,0], vert[0,0,1]), cv2.FONT_HERSHEY_SIMPLEX, 1, (0, 255, 255), 1) #Left-upper corner
        cv2.putText(image, "2", (vert[1,0,0,0], vert[1,0,1]), cv2.FONT_HERSHEY_SIMPLEX, 1, (0, 255, 255), 1) #Right-up corner
        cv2.putText(image, "3", (vert[2,0,0,0], vert[2,0,1]), cv2.FONT_HERSHEY_SIMPLEX, 1, (0, 255, 255), 1) #Lower-right corner
        cv2.putText(image, "4", (vert[3,0,0,0], vert[3,0,1]), cv2.FONT_HERSHEY_SIMPLEX, 1, (0, 255, 255), 1) #Left-bottom corner

    # Show the image with the polygons:
    cv2.imshow('Imagen con Polígono', imagen)
    cv2.waitKey(0)
    cv2.destroyAllWindows()
    #return
####################################################################################################################################################
## MAIN:
# Determine the working directory and set the path of the final folders:
work_path = os.getcwd() 
ICDAR15_im_path = work_path + chr(92) + "Scene_Text_Datasets" + chr(92) + "Training_Images"
ICDAR15_ann_path = work_path + chr(92) + "Scene_Text_Datasets" + chr(92) + "Training_Annotations"

# Set the path of images and annotations to be modified:
ICDAR13_im_path = work_path + chr(92) + "Challenge2_Training_Task12_Images"
ICDAR13_ann_path = work_path + chr(92) + "Challenge2_Training_Task1_GT"

# Each of the image type files within the ICDAR15 folder is browsed and recompiled:
all_ICDAR15_imgs = []
for ext in ["jpeg", "jpg", "png"]:
    all_ICDAR15_imgs.extend(glob.glob(os.path.join(ICDAR15_im_path, "*.{}".format(ext))))

# Determine which ICDCAR15 file has the highest numbering (the last one) and extract the number:
all_ICDAR15_img = max(all_ICDAR15_imgs, key=lambda x: int(x.split(chr(92))[-1].split(".")[0].split("_")[-1]))
last_num = int(all_ICDAR15_img.split(chr(92))[-1].split(".")[0].split("_")[-1]) + 1

# NOTE : Doing "split(chr(92))[-1]" chooses the last part of the "path", i.e.: "img_xxx.ext" (another option is
# "os.path.basename(...)"). Then divide by the dot to get: "img_xxx". Finally, divide by the trailing slash 
# and choose the last one in the list (-1) which will be the number. To the maximum number you add a "+1" for 
# when you pass the new files. 
    
# Each of the image type files within the ICDAR13 folder is traversed and recompiled:
all_ICDAR13_imgs = []
for ext in ["jpeg", "jpg", "png"]:
    all_ICDAR13_imgs.extend(glob.glob(os.path.join(ICDAR13_im_path, "*.{}".format(ext))))      

# Each ICDAR13 image path is traversed and passed along with its TXT file:
for img in all_ICDAR13_imgs:
    
    # The name of the TXT attached to the selected ICDAR13 image is determined and it is determined if it exists:
    txt_path = os.path.join(ICDAR13_ann_path, "gt_" + os.path.basename(img).split(".")[0] + ".txt")
    if not os.path.exists(txt_path):
        print("The image \"{}\" has no annotations attached and is discarded.".format(os.path.basename(img)))
        continue #Go to the next image
        
    # The annotation file is loaded and the new annotation format is initialised:
    txt_df = pd.read_csv(txt_path, delimiter=" ", names=["left", "top", "right", "bottom", "text"])
    new_txt_df = pd.DataFrame({"x1":[], "y1":[], "x2":[], "y2":[], "x3":[], "y3":[], "x4":[], "y4":[], "text":[]})
    
    # Annotations are generated in ICDAR15 format and saved back to the previous TXT:
    new_txt_df["x1"] = txt_df["left"] #Left-upper corner
    new_txt_df["y1"] = txt_df["top"] #Left-upper corner
    new_txt_df["x2"] = txt_df["right"] #Right-upper corner
    new_txt_df["y2"] = txt_df["top"] #Top-right corner
    new_txt_df["x3"] = txt_df["right"] #Right-bottom corner
    new_txt_df["y3"] = txt_df["bottom"] #Bottom-right corner
    new_txt_df["x4"] = txt_df["left"] #Left-bottom corner
    new_txt_df["y4"] = txt_df["bottom"] #Left-bottom corner
    new_txt_df["text"] = txt_df["text"].astype("str").apply(lambda x: x.replace(",", " "))
    new_txt_df.to_csv(txt_path, sep=",", index=False, header=False)
    #-> visualization(img, new_txt_df) #Can be visualised if desired to check corner orientation
    
    # The extension of the ICDAR13 image to be moved is extracted (strategy already applied):
    ext = img.split(chr(92))[-1].split(".")[-1]
    
    # The new ICDAR13 image/TXT destination path with renaming is generated:
    new_img_ICDAR15_path = os.path.join(ICDAR15_im_path, "img_{}.{}".format(last_num, ext))
    new_ann_ICDAR15_path = os.path.join(ICDAR15_ann_path, "gt_img_{}.txt".format(last_num))
    
    # The image/TXT folder is moved and the numbering is updated:
    shutil.move(img, new_img_ICDAR15_path)
    shutil.move(txt_path, new_ann_ICDAR15_path)
    print("The image \"{}\" has been passed with successfully.".format(img.split(chr(92))[-1]))
    last_num += 1
####################################################################################################################################################

La imagen "102.jpg" se ha pasado con exito.
La imagen "103.jpg" se ha pasado con exito.
La imagen "104.jpg" se ha pasado con exito.
La imagen "105.jpg" se ha pasado con exito.
La imagen "106.jpg" se ha pasado con exito.
La imagen "107.jpg" se ha pasado con exito.
La imagen "108.jpg" se ha pasado con exito.
La imagen "109.jpg" se ha pasado con exito.
La imagen "110.jpg" se ha pasado con exito.
La imagen "111.jpg" se ha pasado con exito.
La imagen "112.jpg" se ha pasado con exito.
La imagen "113.jpg" se ha pasado con exito.
La imagen "114.jpg" se ha pasado con exito.
La imagen "115.jpg" se ha pasado con exito.
La imagen "116.jpg" se ha pasado con exito.
La imagen "117.jpg" se ha pasado con exito.
La imagen "118.jpg" se ha pasado con exito.
La imagen "119.jpg" se ha pasado con exito.
La imagen "120.jpg" se ha pasado con exito.
La imagen "121.jpg" se ha pasado con exito.
La imagen "122.jpg" se ha pasado con exito.
La imagen "123.jpg" se ha pasado con exito.
La imagen "124.jpg" se ha pasado