In [None]:
import pandas as pd
import shutil
import os
import numpy as np
import xml.etree.ElementTree as ET

In [None]:
images_on_scad_path = "/scratch/gpfs/RUSTOW/deskewing_datasets/images/jts_images/indexes/all_images_on_scad"
with open(images_on_scad_path, "r") as file:
    images_on_scad = file.read().split("\n")

images_on_scad_extension_map = {}
for image in images_on_scad:
    path_without_extension = image.split(".")[0]
    path_without_extension = path_without_extension.replace("\\", "/")
    extension = image.split(".")[-1]
    images_on_scad_extension_map[path_without_extension] = extension

In [None]:
ground_truth_data_path = "/scratch/gpfs/RUSTOW/deskewing_datasets/images/jts_images/skew_information/ground_truth_image_data.csv"
images_map = "/scratch/gpfs/RUSTOW/deskewing_datasets/images/jts_images/small_images_flat/file_mapping" 

In [None]:
ground_truth_df = pd.read_csv(ground_truth_data_path)
ground_truth_df["normalized_path"] = ground_truth_df["file_path"].str.replace("\\", "/")
ground_truth_df["normalized_path"] = ground_truth_df["normalized_path"].str[3:]
ground_truth_df["normalized_path"] = ground_truth_df["normalized_path"].str.replace(".tif", ".jpg")

In [None]:
images_map_df = pd.read_csv(images_map, header=None)
images_map_df.head()
images_map_df.columns = ["mapping"]
images_map_df["mapping"] = images_map_df["mapping"].str.replace("\\", "/")
images_map_df[["source", "destination"]] = images_map_df["mapping"].str.split(" -> ", expand=True)
images_map_df = images_map_df.drop(columns=["mapping"])

prefix = "/scratch/gpfs/RUSTOW/deskewing_datasets/images/jts_images/small_images/"
images_map_df["source_no_prefix"] = images_map_df["source"].str[len(prefix) :]

In [None]:
images_map_dict = images_map_df.set_index('destination')['source'].to_dict()

In [None]:
ground_truth_df

In [None]:
merged_df = ground_truth_df.merge(images_map_df, left_on="normalized_path", right_on="source_no_prefix")
keep = ["file_path", "source", "destination", "angle"]
merged_df = merged_df[keep]
rename = {"file_path": "scad_path", "source": "della_source", "destination": "flat_file_location"}
merged_df = merged_df.rename(columns=rename)

In [None]:
def compute_angle(x1, y1, x2, y2):
    return -np.degrees(np.arctan2(y2-y1, x1-x2))

In [None]:
def get_full_image_path(name):
    return os.path.join("/scratch/gpfs/RUSTOW/deskewing_datasets/images/jts_images/small_images_flat/", name)

In [None]:
def convert_full_image_path_to_scad_path(path):
    prefix = "/scratch/gpfs/RUSTOW/deskewing_datasets/images/jts_images/small_images/"
    path = path[len(prefix):]
    path = path.split(".")[0]
    path = path + "." + images_on_scad_extension_map[path]
    path = "Z:/" + path
    path = path.replace("/", "\\")
    return path

In [None]:
def parse_xml_and_calculate_angles(file_path):
    tree = ET.parse(file_path)
    root = tree.getroot()

    lines_data = []
    for image in root.iter('image'):
        image_name = image.get("name")
        for polyline in image.findall('polyline'):
            points_data = polyline.get('points').split(';')
            if len(points_data) == 2:
                x1, y1 = map(float, points_data[0].split(','))
                x2, y2 = map(float, points_data[1].split(','))
                if x1 > x2:
                    angle = compute_angle(x1, y1, x2, y2)
                    destination = get_full_image_path(image_name)
                    source = images_map_dict[destination]
                    scad_file_path = convert_full_image_path_to_scad_path(source)
                    annotations = {
                        "scad_path": scad_file_path,
                        "della_source": source,
                        "flat_file_location": destination,
                        "angle": angle,
                    }
                    lines_data.append(annotations)

    return lines_data

In [None]:
files = [
    "/scratch/gpfs/RUSTOW/deskewing_datasets/images/jts_images/skew_information/1",
    "/scratch/gpfs/RUSTOW/deskewing_datasets/images/jts_images/skew_information/2",
    "/scratch/gpfs/RUSTOW/deskewing_datasets/images/jts_images/skew_information/3",
]
xml_annotations = []
for file in files:
    annotations = parse_xml_and_calculate_angles(file)
    xml_annotations.extend(annotations)

xml_annotations_df = pd.DataFrame.from_dict(xml_annotations)
xml_annotations_df

In [None]:
merged_df = pd.concat([merged_df, xml_annotations_df], ignore_index=True)
merged_df

In [None]:
merged_df

In [None]:
merged_df.to_csv("/scratch/gpfs/RUSTOW/deskewing_datasets/images/jts_images/skew_information/small_images_flat_image_annotations.csv", index=False)

In [None]:
destination_folder = "/scratch/gpfs/RUSTOW/deskewing_datasets/images/jts_images/small_images_flat_processed"
count = 0
for image_source in merged_df["flat_file_location"]:
    image_name = os.path.basename(image_source)
    image_destination_path = os.path.join(destination_folder, image_name)
    if os.path.exists(image_source):
        shutil.move(image_source, image_destination_path)
        count += 1

print(f"moved {count} images")

In [None]:
from SkewNet.utils.image_utils import rotate_image
import cv2
import matplotlib.pyplot as plt

non_zero_angles = merged_df[merged_df["angle"] != 0]
non_zero_angles = non_zero_angles.reset_index(drop=True)
i = np.random.randint(0, len(non_zero_angles))
image_path = non_zero_angles["flat_file_location"][i]
image_path = image_path.replace("small_images_flat", "small_images_flat_processed")
print(image_path)
image = cv2.imread(image_path)
image = cv2.cvtColor(image, cv2.COLOR_BGR2RGB)
angle = non_zero_angles["angle"][i]
radians = np.radians(angle)
image = rotate_image(image, radians)
plt.imshow(image)
plt.show()