In [13]:
import os
import xml.etree.ElementTree as ET

xml_dir = "data/train/train"

# Create a list to store unique object names
object_names = []

# Iterate through XML files in the directory
for file_name in os.listdir(xml_dir):
    if file_name.endswith(".xml"):
        xml_path = os.path.join(xml_dir, file_name)

        # Parse the XML file
        tree = ET.parse(xml_path)
        root = tree.getroot()

        # Extract object names from the XML file
        for obj in root.findall("object"):
            name = obj.find("name").text
            if name in object_names:
                continue
            else:
                object_names.append(name)

# Print the unique object names
print("Unique Object Names:")
for name in object_names:
    print(name)

Unique Object Names:
table


In [4]:
import os
import cv2
import xml.etree.ElementTree as ET

op_img_dir      ="selected_data/images"

op_ano_dir      ="selected_data/annotation/"
desired_size    = 1024 
output_img      = "selected_data/processed_images/"
output_ano      = "selected_data/processed_anotations/"

# iterate through the input directory
for image_file in os.listdir(op_img_dir):
    if image_file.endswith(".jpg"):
        
        #saving the path to image
        image_path = os.path.join(op_img_dir, image_file)
        
        # reading the images
        img = cv2.imread(image_path)

        # resize the image while preserving aspect ratio and adding padding
        height, width   =   img.shape[:2]
        max_dim         =   max(height, width)
        ratio           =   int(desired_size) / max_dim
        new_size        =   tuple([int(x * ratio) for x in (width, height)])
        resized_img     =   cv2.resize(img, (new_size[0], new_size[1]))

        # adding pad to the image to get consistent shape
        pad_w           =   desired_size - new_size[0]
        pad_h           =   desired_size - new_size[1]
        top, bottom     =   pad_h // 2, pad_h - (pad_h // 2)
        left, right     =   pad_w // 2, pad_w - (pad_w // 2)
        padded_img      =   cv2.copyMakeBorder(resized_img, top, bottom, left, right, cv2.BORDER_CONSTANT, value=[0, 0, 0])

        # save the padded image
        output_image_path = os.path.join(output_img,  image_file)
        cv2.imwrite(output_image_path, padded_img)

        # update the corresponding annotation XML file
        annotation_file = image_file.replace(".jpg", ".xml")
        annotation_path = os.path.join(op_ano_dir, annotation_file)
        output_annotation_path = os.path.join(output_ano, annotation_file)

        if os.path.exists(annotation_path):
            tree = ET.parse(annotation_path)
            root = tree.getroot()

            # Update the size element
            size_elem = root.find("size")
            size_elem.find("width").text = str(desired_size)
            size_elem.find("height").text = str(desired_size)

            # Update the coordinates of the objects in the annotation
            for obj_elem in root.iter("object"):
                bbox = obj_elem.find("bndbox")
                xmin = float(bbox.find("xmin").text)
                ymin = float(bbox.find("ymin").text)
                xmax = float(bbox.find("xmax").text)
                ymax = float(bbox.find("ymax").text)

                # Adjust the coordinates based on the resizing and padding
                xmin = float((xmin * ratio) + left)
                ymin = float((ymin * ratio) + top)
                xmax = float((xmax * ratio) + left)
                ymax = float((ymax * ratio) + top)

                # Update the bounding box coordinates
                bbox.find("xmin").text = str(xmin)
                bbox.find("ymin").text = str(ymin)
                bbox.find("xmax").text = str(xmax)
                bbox.find("ymax").text = str(ymax)

            # Save the updated annotation XML file
            tree.write(output_annotation_path)
        else:
            print(f"Annotation file not found for image: {image_file}")
    print(f"Processed: {image_file} ")

print("Resizing and annotation update complete.")

Processed: 0101_003.png 
Processed: 0110_099.png 
Processed: 0113_013.png 
Processed: 0140_007.png 
Processed: 0146_281.png 
Processed: 0147_090.png 
Processed: 0147_125.png 
Processed: 0147_256.png 
Processed: 0148_271.png 
Processed: 0148_479.png 
Processed: 0151_180.png 
Processed: 0151_208.png 
Processed: 0154_080.png 
Processed: 0154_474.png 
Processed: 0155_081.png 
Processed: 0199_384.png 
Processed: 0203_075.png 
Processed: 0203_207.png 
Processed: 0206_007.png 
Processed: 0206_048.png 
Processed: 0207_025.png 
Processed: 0209_207.png 
Processed: 0210_111.png 
Processed: 0212_175.png 
Processed: 0219_043.png 
Processed: 0220_030.png 
Processed: 0223_017.png 
Processed: 0626_005.png 
Processed: 0651_008.png 
Processed: 0651_013.png 
Processed: 0667_005.png 
Processed: 0672_278.png 
Processed: 0672_356.png 
Processed: 0685_048.png 
Processed: 0717_023.png 
Processed: 0725_026.png 
Processed: 0725_043.png 
Processed: 0765_005.png 
Processed: 10.1.1.1.2006_3.jpg 
Annotation file no

In [6]:
import cv2
import xml.etree.ElementTree as ET

def visualize_annotations(image_path, annotation_path):
    # Load image
    image = cv2.imread(image_path)

    # Parse XML annotations
    tree = ET.parse(annotation_path)
    root = tree.getroot()

    # Define colors for each category
    colors = {
        
    'table': (255, 0, 0),                     # Blue
    'table column header': (0, 255, 0),        # Green
    'table row': (0, 0, 255),                  # Red
    'table spanning cell': (255, 255, 0),      # Cyan
    'table projected row header': (255, 0, 255),  # Magenta
    'table column': (0, 255, 255)  # Add the color for 'table column'


    }

    # Iterate over objects in the XML
    for obj in root.findall('.//object'):
        # Get object category and bounding box coordinates
        category = obj.find('name').text
        xmin = int(float(obj.find('bndbox/xmin').text))
        ymin = int(float(obj.find('bndbox/ymin').text))
        xmax = int(float(obj.find('bndbox/xmax').text))
        ymax = int(float(obj.find('bndbox/ymax').text))

        # Draw bounding box on the image with the corresponding color
        cv2.rectangle(image, (xmin, ymin), (xmax, ymax), colors[category], 2)
        cv2.putText(image, category, (xmin, ymin - 5), cv2.FONT_HERSHEY_SIMPLEX, 0.5, colors[category], 2)

    # Display the annotated image
    cv2.imshow('Annotated Image', image)
    cv2.waitKey(0)
    cv2.destroyAllWindows()

# Example usage
image_path = 'selected_data/processed_images/PMC1064074_table_0.jpg'
annotation_path = 'selected_data/processed_anotations/PMC1064074_table_0.xml'
visualize_annotations(image_path, annotation_path)


In [6]:
gg="table"
hh="table column header"
ii="table row"
jj="table projected row header"
kk="table spanning cell"
ll="table column"

import os
import glob
import xml.etree.ElementTree as ET
from PIL import Image ,ImageDraw
import numpy as np

xml_folder="selected_data/processed_anotations/"
png_ano="selected_data/png_ano"

# Create the output PNG folder if it doesn't exist
if not os.path.exists(png_ano):
    os.makedirs(png_ano)

# Get a list of all XML files in the XML folder
xml_files = glob.glob(os.path.join(xml_folder, "*.xml"))

for xml_file in xml_files:
    # Load the XML file
    tree = ET.parse(xml_file)
    root = tree.getroot()

    # Get the image filename from the XML file
    image_filename = root.find("filename").text

    # Create an empty image with the same size
    width, height = 512, 512
    image = np.full((height, width), 0, dtype=np.uint8)  # Initialize with 0 (representing background)

    # Iterate over all object annotations
    for obj in root.findall("object"):
        # Get the object label
        label = obj.find("name").text
        
        # Get the bounding box coordinates
        bbox = obj.find("bndbox")
        xmin = int(float(bbox.find("xmin").text))
        ymin = int(float(bbox.find("ymin").text))
        xmax = int(float(bbox.find("xmax").text))
        ymax = int(float(bbox.find("ymax").text))
        print(xmin, ymin, xmax, ymax, xml_file)

        # Set the region inside the bounding box as 1
        if label==gg:        
            image[ymin:ymax, xmin:xmax] = 40
            
        
    # Save the image in PNG format
    temp=os.path.join(png_ano,image_filename[:-4] + ".png")
    cv2.imencode('.png', image)[1].tofile(temp)
    

563 648 770 714 selected_data/processed_anotations\10.1.1.1.2006_3.xml
229 186 789 741 selected_data/processed_anotations\10.1.1.1.2013_63.xml
229 186 789 319 selected_data/processed_anotations\10.1.1.1.2013_64.xml
229 420 790 632 selected_data/processed_anotations\10.1.1.1.2013_64.xml
181 256 485 668 selected_data/processed_anotations\10.1.1.1.2014_4.xml
193 199 469 343 selected_data/processed_anotations\10.1.1.1.2014_6.xml
197 67 485 205 selected_data/processed_anotations\10.1.1.1.2018_4.xml
541 67 821 220 selected_data/processed_anotations\10.1.1.1.2018_8.xml
607 843 755 920 selected_data/processed_anotations\10.1.1.1.2018_8.xml
292 483 730 827 selected_data/processed_anotations\10.1.1.1.2019_2.xml
292 623 730 733 selected_data/processed_anotations\10.1.1.1.2019_3.xml
351 336 670 480 selected_data/processed_anotations\10.1.1.1.2023_31.xml
251 548 770 692 selected_data/processed_anotations\10.1.1.1.2023_31.xml
382 759 639 889 selected_data/processed_anotations\10.1.1.1.2023_31.xml
19

In [12]:
gg="table"
hh="table column header"
ii="table row"
jj="table projected row header"
kk="table spanning cell"
ll="table column"

import os
import cv2
import xml.etree.ElementTree as ET
import numpy as np
png_ano="selected_data/png_ano"
xml_files="selected_data/processed_anotations/"

for xml in os.listdir(xml_files):
    # Load the XML file
    xml_file=os.path.join(xml_files,xml)
    print(xml_file)
    tree = ET.parse(xml_file)
    root = tree.getroot()

    # Get the image filename from the XML file
    image_filename = root.find("filename").text

    # Create an empty image with the same size
    width, height = 512, 512
    print(xml_file)
    img_name = xml.replace(".xml", ".png")
    temp=os.path.join(png_ano,img_name)
    print(temp)
    # Load the grayscale image
    y = cv2.imread(temp)
    
    # Convert the image to grayscale
    m = cv2.cvtColor(y, cv2.COLOR_BGR2GRAY)
    image = np.array(m)

    # Iterate over all object annotations
    for obj in root.findall("object"):
        # Get the object label
        label = obj.find("name").text
        
        # Get the bounding box coordinates
        bbox = obj.find("bndbox")
        xmin = int(float(bbox.find("xmin").text))
        ymin = int(float(bbox.find("ymin").text))
        xmax = int(float(bbox.find("xmax").text))
        ymax = int(float(bbox.find("ymax").text))
        print(xmin, ymin, xmax, ymax, xml_file)

        # Set the region inside the bounding box as 1
        if label==hh:        
            image[ymin:ymax, xmin:xmax] = 80
            
        
    # Save the image in PNG format
    temp=os.path.join(png_ano,image_filename[:-4] + ".png")
    cv2.imencode('.png', image)[1].tofile(temp)

selected_data/processed_anotations/PMC1064074_table_0.xml
selected_data/processed_anotations/PMC1064074_table_0.xml
selected_data/png_ano\PMC1064074_table_0.png
29 226 481 284 selected_data/processed_anotations/PMC1064074_table_0.xml
29 226 481 239 selected_data/processed_anotations/PMC1064074_table_0.xml
29 226 481 239 selected_data/processed_anotations/PMC1064074_table_0.xml
29 239 481 255 selected_data/processed_anotations/PMC1064074_table_0.xml
29 255 481 270 selected_data/processed_anotations/PMC1064074_table_0.xml
29 270 481 284 selected_data/processed_anotations/PMC1064074_table_0.xml
29 226 109 284 selected_data/processed_anotations/PMC1064074_table_0.xml
109 226 202 284 selected_data/processed_anotations/PMC1064074_table_0.xml
202 226 304 284 selected_data/processed_anotations/PMC1064074_table_0.xml
304 226 404 284 selected_data/processed_anotations/PMC1064074_table_0.xml
404 226 481 284 selected_data/processed_anotations/PMC1064074_table_0.xml
selected_data/processed_anotatio

In [10]:
gg="table"
hh="table column header"
ii="table row"
jj="table projected row header"
kk="table spanning cell"
ll="table column"

import os
import cv2
import xml.etree.ElementTree as ET
import numpy as np
png_ano="selected_data/png_ano"
xml_files="selected_data/processed_anotations/"

for xml in os.listdir(xml_files):
    # Load the XML file
    xml_file=os.path.join(xml_files,xml)
    print(xml_file)
    tree = ET.parse(xml_file)
    root = tree.getroot()

    # Get the image filename from the XML file
    image_filename = root.find("filename").text

    # Create an empty image with the same size
    width, height = 512, 512
    print(xml_file)
    img_name = xml.replace(".xml", ".png")
    temp=os.path.join(png_ano,img_name)
    print(temp)
    # Load the grayscale image
    y = cv2.imread(temp)
    
    # Convert the image to grayscale
    m = cv2.cvtColor(y, cv2.COLOR_BGR2GRAY)
    image = np.array(m)

    # Iterate over all object annotations
    for obj in root.findall("object"):
        # Get the object label
        label = obj.find("name").text
        
        # Get the bounding box coordinates
        bbox = obj.find("bndbox")
        xmin = int(float(bbox.find("xmin").text))
        ymin = int(float(bbox.find("ymin").text))
        xmax = int(float(bbox.find("xmax").text))
        ymax = int(float(bbox.find("ymax").text))
        print(xmin, ymin, xmax, ymax, xml_file)

        # Set the region inside the bounding box as 1
        if label==ii:        
            image[ymin:ymax, xmin:xmax] = 120
            
        
    # Save the image in PNG format
    temp=os.path.join(png_ano,image_filename[:-4] + ".png")
    cv2.imencode('.png', image)[1].tofile(temp)

selected_data/processed_anotations/PMC1064074_table_0.xml
selected_data/processed_anotations/PMC1064074_table_0.xml
selected_data/png_ano\PMC1064074_table_0.png
29 226 481 284 selected_data/processed_anotations/PMC1064074_table_0.xml
29 226 481 239 selected_data/processed_anotations/PMC1064074_table_0.xml
29 226 481 239 selected_data/processed_anotations/PMC1064074_table_0.xml
29 239 481 255 selected_data/processed_anotations/PMC1064074_table_0.xml
29 255 481 270 selected_data/processed_anotations/PMC1064074_table_0.xml
29 270 481 284 selected_data/processed_anotations/PMC1064074_table_0.xml
29 226 109 284 selected_data/processed_anotations/PMC1064074_table_0.xml
109 226 202 284 selected_data/processed_anotations/PMC1064074_table_0.xml
202 226 304 284 selected_data/processed_anotations/PMC1064074_table_0.xml
304 226 404 284 selected_data/processed_anotations/PMC1064074_table_0.xml
404 226 481 284 selected_data/processed_anotations/PMC1064074_table_0.xml
selected_data/processed_anotatio