In [115]:
import pandas as pd
import numpy as np
import cv2, os
import glob
import matplotlib.pyplot as plt
import pathlib
import shutil
from sklearn.model_selection import train_test_split
import pathlib

In [96]:
def normalize_keypoints(keypoints, width, height):
    normalized_keypoints = []
    for x, y in keypoints:
        x_norm = x / width
        y_norm = y / height
        normalized_keypoints.append([x_norm, y_norm])
    return normalized_keypoints


def xyxy_to_xywh_normalized(xyxy_bbox, image_width, image_height):
    x_min, y_min, x_max, y_max = xyxy_bbox
    width = x_max - x_min
    height = y_max - y_min
    x_center = x_min + width / 2
    y_center = y_min + height / 2

    x_center_normalized = x_center / image_width
    y_center_normalized = y_center / image_height
    width_normalized = width / image_width
    height_normalized = height / image_height

    return x_center_normalized, y_center_normalized, width_normalized, height_normalized


def keypoints_to_bbox(keypoints):
    x_coordinates = [point[0] for point in keypoints]
    y_coordinates = [point[1] for point in keypoints]

    x_min = min(x_coordinates)
    y_min = min(y_coordinates)
    x_max = max(x_coordinates)
    y_max = max(y_coordinates)
    return x_min, y_min, x_max, y_max


def show_polygons(dir_image, url, points):
    image = cv2.imread(os.path.join(dir_image, os.path.basename(url)))
    pts = np.array(points, np.int32)
    pts = pts.reshape((-1, 1, 2))
    image = cv2.polylines(image, [pts], True, (0,255,255), 5)

    plt.imshow(image)
    plt.show()


def show_bounding_box(dir_image, url, bboxes):
    image = cv2.imread(os.path.join(dir_image, os.path.basename(url)))
    start_pt = (int(bboxes[0]), int(bboxes[1]))
    end_pt = (int(bboxes[2]), int(bboxes[3]))

    print(start_pt)
    print(end_pt)

    image = cv2.rectangle(image, start_pt, end_pt, (0, 255, 0), 2) 
    plt.imshow(image)
    plt.show()


def move_file(source_folder, destination_folder, file_name):
    # Check if the source file exists
    source_path = os.path.join(source_folder, file_name)
    if not os.path.exists(source_path):
        print(f"Source: {source_path}, file '{file_name}' not found in '{source_folder}'.")
        return
    
    # Check if the destination folder exists
    if not os.path.exists(destination_folder):
        os.makedirs(destination_folder, exist_ok=True)  # Create the destination folder if it doesn't exist
    
    # Construct the destination path
    destination_path = os.path.join(destination_folder, file_name)
    
    try:
        shutil.move(source_path, destination_path)
        print(f"File '{file_name}' moved from '{source_folder}' to '{destination_folder}'.")
    except Exception as e:
        print(f"Failed to move the file: {e}")



In [None]:
root = "/home/fit/optivisionlab/scan-bill/bill_printed"
dires = os.listdir(root)
for dir in dires:
    print(dir)
    os.makedirs(os.path.join(root, dir, "label_txt"), exist_ok=True)
    os.makedirs(os.path.join(root, dir, "label_json"), exist_ok=True)
    os.makedirs(os.path.join(root, dir, "images"), exist_ok=True)
    os.system(f"mv {os.path.join(root, dir, '*.json')} {os.path.join(root, dir, 'label_json')}")
    os.system(f"mv {os.path.join(root, dir, '*.jpg')} {os.path.join(root, dir, 'images')}")
    files_json = glob.glob(os.path.join(root, dir, "label_json/*"))
    for file in files_json:
        print("file >>> ", file)
        _, _, label, file_name, _, imageHeight, imageWidth = pd.read_json(file, orient='index').values
        keyptsn = normalize_keypoints(keypoints=np.array(label[0][0]['points']), width=imageWidth[0], height=imageHeight[0])
        boxes = keypoints_to_bbox(keypoints=np.array(label[0][0]['points']))
        xywhn = xyxy_to_xywh_normalized(xyxy_bbox=boxes, image_width=imageWidth[0], image_height=imageHeight[0])
        line = (0, *xywhn, *keyptsn[0], *keyptsn[1], *keyptsn[2], *keyptsn[3])
        with open(os.path.join(root, dir, 'label_txt', pathlib.Path(file_name[0]).stem + '.txt'), 'w') as f:
            f.write(('%g ' * len(line)).rstrip() % line + '\n')

In [None]:
root = "/home/fit/optivisionlab/scan-bill/bill_printed"
dires = os.listdir(root)
for dir in dires:
    files = glob.glob(os.path.join(root, dir, "images/*.jpg"))
    print(dir, len(files))
    if len(files) > 10:
        print("process >>>>>> ", dir, len(files))
        train, val= train_test_split(files, test_size=0.3, random_state=1000, shuffle=True)
        print(len(train), len(val))

        os.makedirs(os.path.join(root, dir, "images/train"), exist_ok=True)
        os.makedirs(os.path.join(root, dir, "images/val"), exist_ok=True)
        os.makedirs(os.path.join(root, dir, "labels/train"), exist_ok=True)
        os.makedirs(os.path.join(root, dir, "labels/val"), exist_ok=True)

        for file in train:
                move_file(source_folder=os.path.join(root, dir, "images"), 
                        destination_folder=os.path.join(root, dir, "images/train"), 
                        file_name=os.path.basename(file)
                        )
                move_file(source_folder=os.path.join(root, dir, "label_txt"), 
                        destination_folder=os.path.join(root, dir, "labels/train"), 
                        file_name=pathlib.Path(os.path.basename(file)).stem + '.txt'
                        )
        
        for file in val:
                move_file(source_folder=os.path.join(root, dir, "images"), 
                        destination_folder=os.path.join(root, dir, "images/val"), 
                        file_name=os.path.basename(file)
                        )
                move_file(source_folder=os.path.join(root, dir, "label_txt"), 
                        destination_folder=os.path.join(root, dir, "labels/val"), 
                        file_name=pathlib.Path(os.path.basename(file)).stem + '.txt'
                        )
