In [14]:
from lxml import etree
import cv2
import numpy as np

In [15]:
alto_elements = []
xml_tree = etree.parse(("105-Fg003-R-C01-R01-D03012012-T135030-LR924_012_F.xml_algn.xml"))
root = xml_tree.getroot()

In [16]:
for elem in root.getiterator():
    elem.tag = etree.QName(elem).localname

In [17]:
def xml_to_coordinates(t):
    result = []
    data=t.split(' ')
    for i,k in zip(data[0::2], data[1::2]):
        result.append((int(i),int(k)))
    result=np.array(result)
    return result

In [18]:
def centroid_from_coordinates(coords):
    #the coords are ordered as counter clock wise
    cx=int(coords[0][0]+(coords[2][0]-coords[0][0])/2)
    cy=int(coords[1][1]+(coords[3][1]-coords[1][1])/2)
    return (cx,cy)

In [19]:
def inner_polygon_by_threshold(coords,thresh):
    
    x0=coords[0][0]+thresh
    y0=coords[0][1]+thresh
    
    x1=coords[1][0]+thresh
    y1=coords[1][1]-thresh
    
    x2=coords[2][0]-thresh
    y2=coords[2][1]-thresh
    
    x3=coords[3][0]-thresh
    y3=coords[3][1]+thresh
    
    inner_polygon=[[x0,y0],[x1,y1],[x2,y2],[x3,y3]]
    
    inner_polygon=np.array(inner_polygon)
    
    return inner_polygon

In [20]:
def inner_polygon_by_percentage(coords,percent):
    
    x0=coords[0][0]
    y0=coords[0][1]
    
    x1=coords[1][0]
    y1=coords[1][1]
    
    x2=coords[2][0]
    y2=coords[2][1]
    
    x3=coords[3][0]
    y3=coords[3][1]
    
    ly=abs(y1-y0)
    my=int(ly*percent/200)
    
    lx=abs(x2-x1)
    mx=int(lx*percent/200)
        
    x0=coords[0][0]+mx
    y0=coords[0][1]+my
    
    x1=coords[1][0]+mx
    y1=coords[1][1]-my
    
    x2=coords[2][0]-mx
    y2=coords[2][1]-my
    
    x3=coords[3][0]-mx
    y3=coords[3][1]+my
    
    inner_polygon=[[x0,y0],[x1,y1],[x2,y2],[x3,y3]]
    
    inner_polygon=np.array(inner_polygon)
    
    return inner_polygon

In [21]:
def create_circular_mask(h, w, center, radius):
    Y, X = np.ogrid[:h, :w]
    dist_from_center = np.sqrt((X - center[0])**2 + (Y-center[1])**2)
    mask = dist_from_center <= radius
    return mask

In [22]:
fragment_image = cv2.imread("105-Fg003-R-C01-R01-D03012012-T135030-LR924_012_F.jpg", 1)
h,w,c= fragment_image.shape
char_mask = np.zeros([h,w])
line_mask = np.zeros([h,w])

In [23]:
for line in root.findall("Layout/Page/PrintSpace/TextBlock/TextLine"):  
    for string in line.findall("String"):
        for glyph in string.findall("Glyph"):
            letter = glyph.get("CONTENT")
            if letter!=' ':
                for shape in glyph.findall("Shape"):
                    for polygon in shape.findall("Polygon"):
                        polygon=polygon.get("POINTS")
                        coords=xml_to_coordinates(polygon)
                        #cv2.drawContours(fragment_image, [coords], -1, (0, 0, 255), 3)
                        #cv2.fillConvexPoly(line_mask, np.array(coords, 'int32'), 255)
                        
                        #inner_polygon_coords = inner_polygon(coords,40)
                        #cv2.drawContours(fragment_image, [inner_polygon_coords], -1, (0, 255, 0), 1)
                        #cv2.fillConvexPoly(char_mask, np.array(inner_polygon_coords, 'int32'), 255)
                        
                        centroid=centroid_from_coordinates(coords)
                        #cv2.circle(fragment_image, centroid, 3, (0, 0, 255), -1)
                        cv2.circle(char_mask, centroid, 7, (255, 255, 255), -1)


  

In [24]:
cv2.imwrite('fragment.png',fragment_image)

True

In [25]:
cv2.imwrite('char_mask.png',char_mask)

True

In [None]:
cv2.imwrite('line_mask.png',line_mask)