## Here we try to extract the MGRS (Military Grid Reference System) Coordinates from the svg element.

We assume that the coordinates are created through path elements and display complex polygons as text. So we think the coordinates are just svg path elements with very long d attributes (between 3000 and 6000). After we found them we also try to reengineer the position of the element from the original svg. Therefore we look at the svg transformation from the parent elements of the specific path element which might be the coordinates. 

In the end we create a little png from the isolated svg path element and do OCR to get the Text machine readable from the svg path element. 

In [24]:
from bs4 import BeautifulSoup
import cairosvg
import pytesseract
from PIL import Image

In [None]:
def find_mgrs_coordinates(pdf):
    # Open and read the SVG file
    with open(pdf, 'r') as file:
        svg_content = file.read()

    file_id = pdf.split(".")[0].split("/")[-1]

    # Parse the SVG content using BeautifulSoup with lxml's XML parser
    soup = BeautifulSoup(svg_content, 'xml')
    min_length = 2000
    max_length = 6000

    svg_elements = []

    for path in soup.find_all('path'):
        d_length = len(path.get('d', ''))  # Get the length of the `d` attribute
        if min_length <= d_length <= max_length:
            svg_elements.append(path)


    #military grid reference system
    mgrs_coordinates = []

    count_coordinate = 0
    # Output the paths or manipulate them as needed
    for path in svg_elements:
        count_coordinate += 1 # count how many coordinates appear in the pdf

        print(path)
        print("Length of 'd' attribute:", len(path['d']))
        # Traverse up the tree to print parent elements and their attributes
        parent = path.parent

        start_coordinates_path_element = ()

        while parent:
            print("Parent Element:", parent.name)
            for attribute, value in parent.attrs.items():

                if attribute == "transform":
                    
                    print(f"{attribute}: {value}")
                    # here we extract the start coordinate information  for the path element out of the transform attributes from the svg parent elements
                    transformations = value.split("(")[-1].split(")")[0].split(",")
                    # if attribute transform has 1 or 2 elements its just a relocation (the function translate) of the coordinates in either x or x and y direction
                    # if attribute transform has 6 elements its the function matrix. Here is what each position does:
                    # 1. scaling x coordinates
                    # 2. skewing transformation alongside x axis
                    # 3. skewing transformation alongside y axis
                    # 4. scaling y coordinates 
                    # 5. translation in x direction
                    # 6. translation in y direction

                    if len(transformations) == 2:
                        start_coordinates_path_element = (float(transformations[0]), float(transformations[1]))
                
                    elif len(transformations) == 1:
                        start_coordinates_path_element = (start_coordinates_path_element[0] + float(transformations[0]) , start_coordinates_path_element[1])

                    elif len(transformations) == 6:

                        #1. scaling x coordinates
                        start_coordinates_path_element =  (start_coordinates_path_element[0] * float(transformations[0]), start_coordinates_path_element[1])

                        #4. scaling y coordinates and 6. translate in y directio 
                        start_coordinates_path_element = (start_coordinates_path_element[0], (float(transformations[3]) * start_coordinates_path_element[1]) + float(transformations[5]) )


                    print("neu berechnete Koordinaten: ")
                    print(start_coordinates_path_element)
            
            parent = parent.parent
        print("Endgültige Startkoordinatne für path Element: ")
        print(start_coordinates_path_element)
        
        
        #now we manipulate the svg path element in a way that we can create a picture out of it for OCR

        svg_drawing = "<" + (str(path).split("/>")[0] + ' transform="scale(1, -1) translate(0, -10)"' + "/>").split("svg:")[-1] 
        svg_drawing = '<svg xmlns="http://www.w3.org/2000/svg" width="80" height="20">' + svg_drawing + '</svg>'

        name = file_id + "_num_" + str(count_coordinate)

        mgrs_coordinates.append({"path" : path, "coordinates": start_coordinates_path_element, "svg_drawing": svg_drawing, "file_id": name})
    return mgrs_coordinates


In [25]:
coordinates = find_mgrs_coordinates("data/2384-I.svg")

<svg:path d="m 0,0 h 1.44 c 0.48,-0.48 0.48,-0.96 0.96,-0.96 0,-0.48 0.48,-0.48 0.48,-0.48 0.48,0 0.96,0 1.44,0.48 0,0.48 0,0.48 0,1.44 0,0.48 0,0.48 0,0.96 -0.48,0.48 -0.96,0.48 -0.96,0.48 -0.48,0 -0.96,0 -0.96,0 v 1.44 c 0.48,0 0.96,0 1.44,0.48 0,0 0.48,0.48 0.48,0.96 0,0 -0.48,0.48 -0.48,0.48 0,0.48 -0.48,0.48 -0.96,0.48 0,0 -0.48,0 -0.48,-0.48 C 1.92,5.28 1.92,4.8 1.92,4.32 L 0,4.8 c 0,0.48 0.48,0.96 0.48,1.44 0.48,0.48 0.48,0.48 0.96,0.96 0.48,0 0.96,0 1.44,0 0.96,0 1.92,0 2.4,-0.96 C 5.76,5.76 6.24,5.28 6.24,4.8 6.24,3.84 5.76,2.88 4.8,2.4 5.28,2.4 5.76,2.4 6.24,1.92 6.24,1.44 6.72,0.96 6.72,0 6.72,-0.48 6.24,-1.44 5.76,-1.92 4.8,-2.88 4.32,-2.88 3.36,-2.88 c -0.96,0 -1.92,0 -2.4,0.48 C 0.48,-1.92 0,-0.96 0,0 Z M 7.68,0 H 9.6 c 0,-0.48 0,-0.96 0.48,-0.96 0,-0.48 0.48,-0.48 0.96,-0.48 0.48,0 0.48,0 0.96,0.48 0.48,0.48 0.48,0.48 0.48,1.44 0,0.48 0,0.48 -0.48,0.96 -0.48,0.48 -0.48,0.48 -0.96,0.48 0,0 -0.48,0 -0.96,0 l 0.48,1.44 c 0.48,0 0.96,0 0.96,0.48 0.48,0 0.48,0.48 0.48,0.96 0,

In [27]:
coordinates[0]

{'path': <svg:path d="m 0,0 h 1.44 c 0.48,-0.48 0.48,-0.96 0.96,-0.96 0,-0.48 0.48,-0.48 0.48,-0.48 0.48,0 0.96,0 1.44,0.48 0,0.48 0,0.48 0,1.44 0,0.48 0,0.48 0,0.96 -0.48,0.48 -0.96,0.48 -0.96,0.48 -0.48,0 -0.96,0 -0.96,0 v 1.44 c 0.48,0 0.96,0 1.44,0.48 0,0 0.48,0.48 0.48,0.96 0,0 -0.48,0.48 -0.48,0.48 0,0.48 -0.48,0.48 -0.96,0.48 0,0 -0.48,0 -0.48,-0.48 C 1.92,5.28 1.92,4.8 1.92,4.32 L 0,4.8 c 0,0.48 0.48,0.96 0.48,1.44 0.48,0.48 0.48,0.48 0.96,0.96 0.48,0 0.96,0 1.44,0 0.96,0 1.92,0 2.4,-0.96 C 5.76,5.76 6.24,5.28 6.24,4.8 6.24,3.84 5.76,2.88 4.8,2.4 5.28,2.4 5.76,2.4 6.24,1.92 6.24,1.44 6.72,0.96 6.72,0 6.72,-0.48 6.24,-1.44 5.76,-1.92 4.8,-2.88 4.32,-2.88 3.36,-2.88 c -0.96,0 -1.92,0 -2.4,0.48 C 0.48,-1.92 0,-0.96 0,0 Z M 7.68,0 H 9.6 c 0,-0.48 0,-0.96 0.48,-0.96 0,-0.48 0.48,-0.48 0.96,-0.48 0.48,0 0.48,0 0.96,0.48 0.48,0.48 0.48,0.48 0.48,1.44 0,0.48 0,0.48 -0.48,0.96 -0.48,0.48 -0.48,0.48 -0.96,0.48 0,0 -0.48,0 -0.96,0 l 0.48,1.44 c 0.48,0 0.96,0 0.96,0.48 0.48,0 0.48,0.48 0.4

# Now we create a picture of the svg path element to prepare for OCR

In [37]:
# 1. we create an svg image from the coordinate
# 2. convert the svg to png
# 3. ocr on the png
# 4. return coordinates as text

def ocr_image(svg_path, file_id):
    # Open the file in write mode and write the SVG content
    with open(f"data/ocr_images/{file_id}.svg", 'w') as file:
        file.write(svg_path)

    # Convert SVG to PNG
    cairosvg.svg2png(url = f"data/ocr_images/{file_id}.svg", write_to=f"data/ocr_images/{file_id}.png")

    img = Image.open(f"data/ocr_images/{file_id}.png")
    # Use Tesseract to do OCR on the image
    text = pytesseract.image_to_string(img)
    return text.strip()
    

In [38]:
ocr_image(coordinates[1]["svg_drawing"], coordinates[1]["file_id"])

33TWK8769



'33TWK8769'

In [46]:
import mgrs

def convert_mgrs_to_latlon(mgrs_str):
    m = mgrs.MGRS()
    # Convert MGRS string to latitude and longitude
    latlon = m.toLatLon(mgrs_str.encode('utf-8'))
    return latlon

# Example MGRS coordinate
mgrs_coordinate = "34TBP8932"
latlon = convert_mgrs_to_latlon(mgrs_coordinate)
print(f"{latlon[0]}, {latlon[1]}")

43.61108109713452, 18.38529256651145


## find all used colors

In [39]:
from bs4 import BeautifulSoup


def find_all_used_colors(pdf):
    # Open and read the SVG file
    with open(pdf, 'r') as file:
        svg_content = file.read()

    soup = BeautifulSoup(svg_content, 'xml')  # Use 'xml' parser for parsing SVG


    unique_colors = set()  # Set to store unique colors

    for path in soup.find_all('path'):
        style = path.get('style')  # Get the style attribute of the path
        if style:
            styles = style.split(';')  # Split style into individual properties
            fill_style = [s for s in styles if 'fill:' in s]  # Find the fill style
            if fill_style:
                color = fill_style[0].split(':')[1].strip()  # Extract the color value
                unique_colors.add(color)  # Add to set of unique colors

    # Output the unique colors
    return unique_colors


## find yellow cross element

In [26]:

soup = BeautifulSoup(svg_content, 'xml')

target_color = '#ffff00'
paths_with_target_color = []

for path in soup.find_all('path'):
    # Check the fill attribute directly
    if path.get('fill') == target_color:
        paths_with_target_color.append(path)

    # Check within the style attribute
    elif path.get('style'):
        styles = dict(item.split(':') for item in path['style'].split(';') if item)
        if 'fill' in styles and styles['fill'].strip() == target_color:
            paths_with_target_color.append(path)

# Output the paths or manipulate them as needed
for path in paths_with_target_color:
    print(path)

<svg:path d="m 0,0 -5.76,-0.96 1.44,-2.4 -2.88,0.96 -1.92,-1.44 v 1.92 h -1.92 L -9.6,0 -12,1.44 -9.12,1.92 -8.64,3.84 -7.68,2.4 -4.32,5.76 -5.76,1.92 Z" id="path168" style="fill:#ffff00;fill-opacity:1;fill-rule:nonzero;stroke:none"/>
<svg:path d="m 0,0 -5.76,-0.96 1.44,-2.4 -2.88,0.96 -1.92,-1.44 v 1.92 h -1.92 L -9.6,0 -12,1.44 -9.12,1.92 -8.64,3.84 -7.68,2.4 -4.32,5.76 -5.76,1.92 Z" id="path180" style="fill:#ffff00;fill-opacity:1;fill-rule:nonzero;stroke:none"/>
<svg:path d="m 0,0 -5.76,-0.96 1.44,-2.4 -2.88,0.96 -1.92,-1.44 v 1.92 h -1.92 L -9.6,0 -12,1.44 -9.12,1.92 -8.64,3.84 -7.68,2.4 -4.32,5.76 -5.76,1.92 Z" id="path192" style="fill:#ffff00;fill-opacity:1;fill-rule:nonzero;stroke:none"/>
<svg:path d="m 0,0 -5.76,-0.96 1.44,-2.4 -2.88,0.96 -1.92,-1.44 v 1.92 h -1.92 L -9.6,0 -12,1.44 -9.12,1.92 -8.64,3.84 -7.68,2.4 -4.32,5.76 -5.76,1.92 Z" id="path204" style="fill:#ffff00;fill-opacity:1;fill-rule:nonzero;stroke:none"/>
<svg:path d="m 0,0 -5.76,-0.96 1.44,-2.4 -2.88,0.96 -1.92,-1