## Here we try to extract the MGRS (Military Grid Reference System) Coordinates from the svg element.

We assume that the coordinates are created through path elements and display complex polygons as text. So we think the coordinates are just svg path elements with very long d attributes (between 3000 and 6000). After we found them we also try to reengineer the position of the element from the original svg. Therefore we look at the svg transformation from the parent elements of the specific path element which might be the coordinates. 

In the end we create a little png from the isolated svg path element and do OCR to get the Text machine readable from the svg path element. 

In [52]:
from bs4 import BeautifulSoup

# Open and read the SVG file
with open('2782_3.svg', 'r') as file:
    svg_content = file.read()

# Parse the SVG content using BeautifulSoup with lxml's XML parser
soup = BeautifulSoup(svg_content, 'xml')
min_length = 2000
max_length = 6000

svg_elements = []

for path in soup.find_all('path'):
    d_length = len(path.get('d', ''))  # Get the length of the `d` attribute
    if min_length <= d_length <= max_length:
        svg_elements.append(path)


#military grid reference system
mgrs_coordinates = []


# Output the paths or manipulate them as needed
for path in svg_elements:
    print(path)
    print("Length of 'd' attribute:", len(path['d']))
    # Traverse up the tree to print parent elements and their attributes
    parent = path.parent

    start_coordinates_path_element = ()

    while parent:
        print("Parent Element:", parent.name)
        for attribute, value in parent.attrs.items():

            if attribute == "transform":
                
                print(f"{attribute}: {value}")
                # here we extract the start coordinate information  for the path element out of the transform attributes from the svg parent elements
                transformations = value.split("(")[-1].split(")")[0].split(",")
                # if attribute transform has 1 or 2 elements its just a relocation (the function translate) of the coordinates in either x or x and y direction
                # if attribute transform has 6 elements its the function matrix. Here is what each position does:
                # 1. scaling x coordinates
                # 2. skewing transformation alongside x axis
                # 3. skewing transformation alongside y axis
                # 4. scaling y coordinates 
                # 5. translation in x direction
                # 6. translation in y direction

                if len(transformations) == 2:
                    start_coordinates_path_element = (float(transformations[0]), float(transformations[1]))
            
                elif len(transformations) == 1:
                    start_coordinates_path_element = (start_coordinates_path_element[0] + float(transformations[0]) , start_coordinates_path_element[1])

                elif len(transformations) == 6:

                    #1. scaling x coordinates
                    start_coordinates_path_element =  (start_coordinates_path_element[0] * float(transformations[0]), start_coordinates_path_element[1])

                    #4. scaling y coordinates and 6. translate in y directio 
                    start_coordinates_path_element = (start_coordinates_path_element[0], (float(transformations[3]) * start_coordinates_path_element[1]) + float(transformations[5]) )


                print("neu berechnete Koordinaten: ")
                print(start_coordinates_path_element)
        
        parent = parent.parent
    print("Endgültige Startkoordinatne für path Element: ")
    print(start_coordinates_path_element)
    
    
    #now we manipulate the svg path element in a way that we can create a picture out of it for OCR

    svg_drawing = "<" + (str(path).split("/>")[0] + ' transform="scale(1, -1) translate(0, -10)"' + "/>").split("svg:")[-1] 
    svg_drawing = '<svg xmlns="http://www.w3.org/2000/svg" width="80" height="20">' + svg_drawing + '</svg>'

    mgrs_coordinates.append({"path" : path, "coordinates": start_coordinates_path_element, "svg_drawing": svg_drawing})


<svg:path d="m 0,0 h 0.96 c 0.48,-0.96 0.48,-1.44 0.96,-1.44 0.48,-0.48 0.48,-0.48 0.96,-0.48 0.96,0 1.44,0 1.92,0.48 0,0.48 0.48,0.96 0.48,1.44 0,0.48 -0.48,0.96 -0.48,1.44 -0.48,0.48 -0.96,0.48 -1.44,0.48 -0.48,0 -0.48,0 -0.96,0 v 0.96 c 0,0 0,0 0.48,0 0.48,0 0.96,0.48 1.44,0.48 0,0.48 0.48,0.96 0.48,1.44 0,0.48 0,0.96 -0.48,0.96 C 3.84,6.24 3.36,6.24 2.88,6.24 2.4,6.24 2.4,6.24 1.92,5.76 1.44,5.76 1.44,5.28 1.44,4.32 L 0,4.8 C 0,5.28 0.48,6.24 0.96,6.72 1.44,7.2 2.4,7.2 2.88,7.2 c 0.48,0 1.44,0 1.92,-0.48 0,0 0.48,-0.48 0.96,-0.96 0,0 0,-0.48 0,-0.96 0,-0.48 0,-0.96 0,-1.44 C 5.28,3.36 5.28,2.88 4.8,2.4 5.28,2.4 5.76,2.4 5.76,1.92 6.24,1.44 6.72,0.96 6.72,0 6.72,-0.96 6.24,-1.44 5.76,-1.92 4.8,-2.88 3.84,-2.88 2.88,-2.88 2.4,-2.88 1.44,-2.88 0.96,-2.4 0.48,-1.92 0,-0.96 0,0 Z m 11.52,-2.88 v 2.4 H 7.2 V 0.48 L 12,7.2 h 0.96 V 0.48 h 1.44 v -0.96 h -1.44 v -2.4 z m 0,3.36 v 4.8 L 8.64,0.48 Z m 7.2,-3.36 V 6.24 H 15.36 V 7.2 h 8.16 V 6.24 h -3.36 v -9.12 z m 5.76,0 V 7.2 h 3.84 c 0.96

# Now we create a picture of the svg path element to prepare for OCR

In [55]:
# Open the file in write mode and write the SVG content
with open("test.svg", 'w') as file:
    file.write(mgrs_coordinates[0]["svg_drawing"])

import cairosvg

# Convert SVG to PNG
cairosvg.svg2png(url='test.svg', write_to='test.png')

# Test OCT with tesseract

In [56]:
import pytesseract
from PIL import Image

# Function to apply OCR on an image
def apply_ocr(image_path):
    # Open the image
    img = Image.open(image_path)
    # Use Tesseract to do OCR on the image
    text = pytesseract.image_to_string(img)
    return text

# Apply OCR on the converted image
extracted_text = apply_ocr('test.png')
print(extracted_text)

34TBP6534



## Understand svg path structure with its parent elements

So this is my svg: 
```
<svg xmlns="http://www.w3.org/2000/svg" width = "2016" height= "2784" viewBox= "0 0 2016 2784">
  <g transform= "matrix(1.3333333,0,0,-1.3333333,0,2784)">
    <g
       id="g60002"
       transform="translate(16.8)"><g
         id="g60004"
         transform="translate(985.92,1121.28)">
<path d="m 0,0 h 0.96 c 0.48,-0.96 0.48,-1.44 0.96,-1.44 0.48,-0.48 0.48,-0.48 0.96,-0.48 0.96,0 1.44,0 1.92,0.48 0,0.48 0.48,0.96 0.48,1.44 0,0.48 -0.48,0.96 -0.48,1.44 -0.48,0.48 -0.96,0.48 -1.44,0.48 -0.48,0 -0.48,0 -0.96,0 v 0.96 c 0,0 0,0 0.48,0 0.48,0 0.96,0.48 1.44,0.48 0,0.48 0.48,0.96 0.48,1.44 0,0.48 0,0.96 -0.48,0.96 C 3.84,6.24 3.36,6.24 2.88,6.24 2.4,6.24 2.4,6.24 1.92,5.76 1.44,5.76 1.44,5.28 1.44,4.32 L 0,4.8 C 0,5.28 0.48,6.24 0.96,6.72 1.44,7.2 2.4,7.2 2.88,7.2z" id="path60006" style="fill:#000000;fill-opacity:1;fill-rule:nonzero;stroke:none"/></g></g></g>
</svg>"
```
Now out of this information can you calculate me the coordinates at which point in the plane the path element starts?

To find the coordinates at which the ```<path>``` element starts within your SVG, we need to understand and apply the transformations that have been defined in your SVG markup. The transformations are hierarchical and apply from outer to inner groups, affecting all contained elements.

Here's the breakdown of the transformations applied:
Initial SVG Setup

    SVG Dimensions: Width = 2016, Height = 2784
    ViewBox: "0 0 2016 2784"

Group Transformations

    First Group (<g>) Transformation: matrix(1.3333333,0,0,-1.3333333,0,2784)
        This scales the x-coordinates by about 33.33% (1.3333333 times) and the y-coordinates similarly, but flips the y-axis (since the scale factor is negative) and then translates the y-coordinates down by 2784 units.

    Second Group (<g id="g60002">) Transformation: translate(16.8)
        This translates everything within this group by 16.8 units to the right along the x-axis.

    Third Group (<g id="g60004">) Transformation: translate(985.92, 1121.28)
        This translates everything within this group by 985.92 units to the right and 1121.28 units down.

Path Element

    Path Start: m 0,0
        This command starts the path at coordinate (0,0) relative to its immediate positioning context, which is affected by all the above transformations.

Calculating Global Position

To calculate the effective starting position of the path, we need to consider all the transformations. We'll start from the local coordinate (0,0) defined by the <path> element and transform it step-by-step through the transformations up to the global SVG coordinates:

    Apply the translations from the groups: Starting from (0,0), translate by the amounts specified:
        From the innermost group: (0,0) + (985.92, 1121.28) = (985.92, 1121.28)
        From the second group: (985.92, 1121.28) + (16.8, 0) = (1002.72, 1121.28)

    Apply the matrix transformation:
        Scale x by 1.3333333: 1002.72×1.33333331002.72×1.3333333
        Scale y by -1.3333333 and then translate by 2784: 1121.28×−1.3333333+27841121.28×−1.3333333+2784

We'll calculate these final global coordinates.

The effective global coordinates at which the ```<path> ``` element starts in your SVG are approximately (1336.96,1289.96)(1336.96,1289.96). These coordinates are in the SVG's coordinate system, after applying all the specified transformations.

## find all used colors

In [25]:
from bs4 import BeautifulSoup

# Open and read the SVG file
with open('2681_2.svg', 'r') as file:
    svg_content = file.read()

soup = BeautifulSoup(svg_content, 'xml')  # Use 'xml' parser for parsing SVG


unique_colors = set()  # Set to store unique colors

for path in soup.find_all('path'):
    style = path.get('style')  # Get the style attribute of the path
    if style:
        styles = style.split(';')  # Split style into individual properties
        fill_style = [s for s in styles if 'fill:' in s]  # Find the fill style
        if fill_style:
            color = fill_style[0].split(':')[1].strip()  # Extract the color value
            unique_colors.add(color)  # Add to set of unique colors

# Output the unique colors
print(unique_colors)

{'#00ffff', '#00ff00', 'none', '#ff0000', '#000000', '#ffffff', '#ffff00'}


## find yellow cross element

In [26]:

soup = BeautifulSoup(svg_content, 'xml')

target_color = '#ffff00'
paths_with_target_color = []

for path in soup.find_all('path'):
    # Check the fill attribute directly
    if path.get('fill') == target_color:
        paths_with_target_color.append(path)

    # Check within the style attribute
    elif path.get('style'):
        styles = dict(item.split(':') for item in path['style'].split(';') if item)
        if 'fill' in styles and styles['fill'].strip() == target_color:
            paths_with_target_color.append(path)

# Output the paths or manipulate them as needed
for path in paths_with_target_color:
    print(path)

<svg:path d="m 0,0 -5.76,-0.96 1.44,-2.4 -2.88,0.96 -1.92,-1.44 v 1.92 h -1.92 L -9.6,0 -12,1.44 -9.12,1.92 -8.64,3.84 -7.68,2.4 -4.32,5.76 -5.76,1.92 Z" id="path168" style="fill:#ffff00;fill-opacity:1;fill-rule:nonzero;stroke:none"/>
<svg:path d="m 0,0 -5.76,-0.96 1.44,-2.4 -2.88,0.96 -1.92,-1.44 v 1.92 h -1.92 L -9.6,0 -12,1.44 -9.12,1.92 -8.64,3.84 -7.68,2.4 -4.32,5.76 -5.76,1.92 Z" id="path180" style="fill:#ffff00;fill-opacity:1;fill-rule:nonzero;stroke:none"/>
<svg:path d="m 0,0 -5.76,-0.96 1.44,-2.4 -2.88,0.96 -1.92,-1.44 v 1.92 h -1.92 L -9.6,0 -12,1.44 -9.12,1.92 -8.64,3.84 -7.68,2.4 -4.32,5.76 -5.76,1.92 Z" id="path192" style="fill:#ffff00;fill-opacity:1;fill-rule:nonzero;stroke:none"/>
<svg:path d="m 0,0 -5.76,-0.96 1.44,-2.4 -2.88,0.96 -1.92,-1.44 v 1.92 h -1.92 L -9.6,0 -12,1.44 -9.12,1.92 -8.64,3.84 -7.68,2.4 -4.32,5.76 -5.76,1.92 Z" id="path204" style="fill:#ffff00;fill-opacity:1;fill-rule:nonzero;stroke:none"/>
<svg:path d="m 0,0 -5.76,-0.96 1.44,-2.4 -2.88,0.96 -1.92,-1