In [None]:
# Import Python Standard Library dependencies
import datetime
from functools import partial
from glob import glob
import json
import math
import multiprocessing
import os
from pathlib import Path
import random
from typing import Any, Dict, Optional
from pathlib import Path
import shutil

# Import utility functions
from cjm_psl_utils.core import download_file, file_extract, get_source_code
from cjm_pil_utils.core import resize_img, get_img_files, stack_imgs
from cjm_pytorch_utils.core import pil_to_tensor, tensor_to_pil, get_torch_device, set_seed, denorm_img_tensor, move_data_to_device
from cjm_pandas_utils.core import markdown_to_pandas, convert_to_numeric, convert_to_string
from cjm_torchvision_tfms.core import ResizeMax, PadSquare, CustomRandomIoUCrop

# Import the distinctipy module
from distinctipy import distinctipy

# Import matplotlib for creating plots
import matplotlib.pyplot as plt

# Import numpy
import numpy as np

# Import the pandas package
import pandas as pd

# Set options for Pandas DataFrame display
pd.set_option('max_colwidth', None)  # Do not truncate the contents of cells in the DataFrame
pd.set_option('display.max_rows', None)  # Display all rows in the DataFrame
pd.set_option('display.max_columns', None)  # Display all columns in the DataFrame

# Import PIL for image manipulation
from PIL import Image, ImageDraw

# Import PyTorch dependencies
import torch
from torch.amp import autocast
from torch.cuda.amp import GradScaler
import torch.nn as nn
import torch.nn.functional as F
from torch.utils.data import Dataset, DataLoader
#from torchtnt.utils import get_module_summary
import torchvision
torchvision.disable_beta_transforms_warning()
from torchvision.tv_tensors import BoundingBoxes, Mask
from torchvision.utils import draw_bounding_boxes, draw_segmentation_masks
import torchvision.transforms.v2  as transforms
from torchvision.transforms.v2 import functional as TF
from torchsummary import summary

# Import Mask R-CNN
from torchvision.models.detection import maskrcnn_resnet50_fpn_v2, MaskRCNN
from torchvision.models.detection import MaskRCNN_ResNet50_FPN_V2_Weights
from torchvision.models.detection.faster_rcnn import FastRCNNPredictor
from torchvision.models.detection.mask_rcnn import MaskRCNNPredictor

# Import tqdm for progress bar
from tqdm.auto import tqdm

In [None]:
xml_file_path = "C:/Users/Anwender/Downloads/LectorAI_SOSE24/dokumentenklassifikation/workspace/mix_up_data/xml/AG_016.xml"
image_file_path = "C:/Users/Anwender/Downloads/LectorAI_SOSE24/dokumentenklassifikation/workspace/mix_up_data/Bilder/AG_016.jpeg"

WORKSPACE_PATH = 'C:/Users/Anwender/Downloads/LectorAI_SOSE24/dokumentenklassifikation/workspace/'
IMAGES_ALL = WORKSPACE_PATH + '/mix_up_data/Bilder'
ANNOTATION_PATH_ALL =  WORKSPACE_PATH + '/mix_up_data/xml'
ANNOTATION_PATH_ALL_JSON =  WORKSPACE_PATH + '/mix_up_data/all_json'

In [None]:
import xml.etree.ElementTree as ET
import json
import base64
import pandas as pd
from tqdm import tqdm
from PIL import Image


def xml_to_json(xml_file_path, image_file_path, target_height=1024, target_width=1024):
    # Parse XML data from file
    tree = ET.parse(xml_file_path)
    root = tree.getroot()

    # Read and encode image data
    with open(image_file_path, "rb") as image_file:
        image_data = base64.b64encode(image_file.read()).decode('utf-8')
    
    # Get image dimensions from XML
    size = root.find('size')
    original_width = float(size.find('width').text)
    original_height = float(size.find('height').text)
    
    # Initialize the JSON structure
    json_data = {
        "version": "5.4.1",
        "flags": {},
        "shapes": [],
        "imagePath": image_file_path,
        "imageData": image_data,
        "imageHeight": target_height,
        "imageWidth": target_width
    }

    # Iterate through each object in the XML
    for obj in root.findall('object'):
        label = obj.find('name').text
        bndbox = obj.find('bndbox')
        xmin = float(bndbox.find('xmin').text)
        ymin = float(bndbox.find('ymin').text)
        xmax = float(bndbox.find('xmax').text)
        ymax = float(bndbox.find('ymax').text)

        # Normalize the bounding box points
        normalized_points = [
            [xmin * target_width / original_width, ymin * target_height / original_height],
            [xmin * target_width / original_width, ymax * target_height / original_height],
            [xmax * target_width / original_width, ymax * target_height / original_height],
            [xmax * target_width / original_width, ymin * target_height / original_height]
        ]

        # Create shape entry
        shape = {
            "label": label,
            "points": normalized_points,
            "group_id": None,
            "description": "",
            "shape_type": "polygon",
            "flags": {},
            "mask": None
        }

        # Add shape entry to shapes list
        json_data["shapes"].append(shape)
    
    return json_data
    





In [None]:
def find_image_file(image_base_name, images_folder_path):
    possible_extensions = ['.jpg', '.jpeg', '.png', '.JPG', '.JPEG', '.PNG']
    for ext in possible_extensions:
        image_file_path = os.path.join(images_folder_path, image_base_name + ext)
        if os.path.exists(image_file_path):
            return image_file_path
    return None

def process_xml_folder(xml_folder_path, images_folder_path, json_output_folder, target_height=1024, target_width=1024):
    os.makedirs(json_output_folder, exist_ok=True)

    # Iterate over all XML files in the folder
    for xml_file_name in os.listdir(xml_folder_path):
        if xml_file_name.endswith('.xml'):
            # Construct full XML file path
            xml_file_path = os.path.join(xml_folder_path, xml_file_name)
            image_base_name = os.path.splitext(xml_file_name)[0] 
            
            image_file_path = find_image_file(image_base_name, images_folder_path)
            if image_file_path is None:
                print(f"Error: No image file found for {xml_file_name}")
                continue
            
            # Convert XML to JSON
            json_data = xml_to_json(xml_file_path, image_file_path, target_height, target_width)
            
            # Determine JSON output path
            json_file_name = xml_file_name.replace('.xml', '.json')
            json_output_path = os.path.join(json_output_folder, json_file_name)
            
            # Write JSON data to file
            with open(json_output_path, 'w') as json_file:
                json.dump(json_data, json_file, indent=4)
            
            print(f"Processed {xml_file_name} and saved to {json_output_path}")


In [None]:
xml_folder_path = ANNOTATION_PATH_ALL
images_folder_path = IMAGES_ALL
json_output_folder = ANNOTATION_PATH_ALL_JSON
process_xml_folder(xml_folder_path, images_folder_path, json_output_folder)

In [None]:
# Convert XML to JSON
json_output = xml_to_json(xml_file_path, image_file_path)
print(json.dumps(json_output, indent=2))


In [None]:
json_output_path = "C:/Users/Anwender/Downloads/LectorAI_SOSE24/dokumentenklassifikation/MASKRCNN/MASKRCNN/test_folder/images/AG_016.json"
# Write JSON output to a file with pretty formatting
with open(json_output_path, 'w') as json_file:
    json.dump(json_output, json_file, indent=4)

In [None]:
json_output_path = "C:/Users/Anwender/Downloads/LectorAI_SOSE24/dokumentenklassifikation/MASKRCNN/MASKRCNN/test_folder/images"
json_output_path_f = Path(json_output_path)
annotation_file_path = list(json_output_path_f.glob('*.json'))
# Create a generator that yields Pandas DataFrames containing the data from each JSON file
cls_dataframes = (pd.read_json(f, orient='index').transpose() for f in tqdm(annotation_file_path))


In [None]:
# Concatenate the DataFrames into a single DataFrame
annotation_df = pd.concat(cls_dataframes, ignore_index=False)


In [None]:
annotation_df

In [None]:
# Assign the image file name as the index for each row
annotation_df['index'] = annotation_df['imagePath'].apply(lambda x: os.path.splitext(os.path.basename(x))[0])

# Set the new index
annotation_df = annotation_df.set_index('index')

# Print the unique values of imagePath to verify
print(annotation_df['imagePath'].unique())


In [None]:
json_output_path_f = get_img_files(json_output_path)

img_dict = {file.stem : file for file in json_output_path_f}

In [None]:
img_dict

In [None]:
annotation_df['imagePath']

In [None]:
shapes_df = annotation_df['shapes'].explode().to_frame().shapes.apply(pd.Series)

In [None]:
# Get a list of unique labels in the 'annotation_df' DataFrame
class_names = shapes_df['label'].unique().tolist()

# Display labels using a Pandas DataFrame
pd.DataFrame(class_names)

In [None]:
# Prepend a `background` class to the list of class names
class_names = ['background']+class_names

# Display labels using a Pandas DataFrame
pd.DataFrame(class_names)

In [None]:
# Generate a list of colors with a length equal to the number of labels
colors = distinctipy.get_colors(len(class_names))

# Make a copy of the color map in integer format
int_colors = [tuple(int(c*255) for c in color) for color in colors]

# Generate a color swatch to visualize the color map
distinctipy.color_swatch(colors)

In [None]:
# Set the name of the font file
font_file = 'KFOlCnqEu92Fr1MmEU9vAw.ttf'

# Download the font file
download_file(f"https://fonts.gstatic.com/s/roboto/v30/{font_file}", "./")

In [None]:
# Get the file ID of the first image file
file_id = list(img_dict.keys())[3]

# Open the associated image file as a RGB image
sample_img = Image.open(img_dict[file_id]).convert('RGB')

# Print the dimensions of the image
print(f"Image Dims: {sample_img.size}")

# Show the image
sample_img

In [None]:
# Get the row from the 'annotation_df' DataFrame corresponding to the 'file_id'

annotation_df.loc[file_id].to_frame()

In [None]:
def create_polygon_mask(image_size, vertices):
    """
    Create a grayscale image with a white polygonal area on a black background.

    Parameters:
    - image_size (tuple): A tuple representing the dimensions (width, height) of the image.
    - vertices (list): A list of tuples, each containing the x, y coordinates of a vertex
                        of the polygon. Vertices should be in clockwise or counter-clockwise order.

    Returns:
    - PIL.Image.Image: A PIL Image object containing the polygonal mask.
    """

    # Create a new black image with the given dimensions
    mask_img = Image.new('L', image_size, 0)

    # Draw the polygon on the image. The area inside the polygon will be white (255).
    ImageDraw.Draw(mask_img, 'L').polygon(vertices, fill=(255))

    # Return the image with the drawn polygon
    return mask_img

In [None]:
draw_bboxes = partial(draw_bounding_boxes, fill=False, width=2, font=font_file, font_size=25)

In [None]:
# Extract the labels for the sample
labels = [shape['label'] for shape in annotation_df.loc[file_id]['shapes']]
# Extract the polygon points for segmentation mask
shape_points = [shape['points'] for shape in annotation_df.loc[file_id]['shapes']]
# Format polygon points for PIL
xy_coords = [[tuple(p) for p in points] for points in shape_points]
# Generate mask images from polygons
mask_imgs = [create_polygon_mask(sample_img.size, xy) for xy in xy_coords]
# Convert mask images to tensors
masks = torch.concat([Mask(transforms.PILToTensor()(mask_img), dtype=torch.bool) for mask_img in mask_imgs])
# Generate bounding box annotations from segmentation masks
bboxes = torchvision.ops.masks_to_boxes(masks)

# Annotate the sample image with segmentation masks
annotated_tensor = draw_segmentation_masks(
    image=transforms.PILToTensor()(sample_img),
    masks=masks,
    alpha=0.3,
    colors=[int_colors[i] for i in [class_names.index(label) for label in labels]]
)

# Annotate the sample image with labels and bounding boxes
annotated_tensor = draw_bboxes(
    image=annotated_tensor,
    boxes=bboxes,
    labels=labels,
    colors=[int_colors[i] for i in [class_names.index(label) for label in labels]]
)

tensor_to_pil(annotated_tensor)