In [60]:
import io
from google.cloud import vision
from google.cloud.vision import types

from enum import Enum
from PIL import Image, ImageDraw
# import json, jsonpickle
from pprint import pprint

In [2]:
with io.open("./convert/7.012noteslindrew-0.png", 'rb') as image_file:
    image = types.Image(content = image_file.read())

In [3]:
client = vision.ImageAnnotatorClient()



In [4]:
response = client.document_text_detection(image=image)

In [5]:
with open("response.txt", "w") as f:
    f.write(str(response.full_text_annotation))

In [6]:
class FeatureType(Enum):
    PAGE = 1
    BLOCK = 2
    PARA = 3
    WORD = 4
    SYMBOL = 5

In [18]:
def draw_boxes(image, bounds, color, thick = False):
    """Draw a border around the image using the hints in the vector list."""
    draw = ImageDraw.Draw(image)

    for bound in bounds:
        if not thick:
            draw.polygon([
                bound.vertices[0].x, bound.vertices[0].y,
                bound.vertices[1].x, bound.vertices[1].y,
                bound.vertices[2].x, bound.vertices[2].y,
                bound.vertices[3].x, bound.vertices[3].y], None, color)
        else:
            points = [(i.x, i.y) for i in bound.vertices]
            points.append(points[0])
            draw.line(points, fill=color, width=9)
            
    return image

In [8]:
def get_document_bounds(response, feature):
    """Returns document bounds given an image."""

    bounds = []
    document = response.full_text_annotation

    # Collect specified feature bounds by enumerating all document features
    for page in document.pages:
        for block in page.blocks:
            for paragraph in block.paragraphs:
                for word in paragraph.words:
                    for symbol in word.symbols:
                        if (feature == FeatureType.SYMBOL):
                            bounds.append(symbol.bounding_box)

                    if (feature == FeatureType.WORD):
                        bounds.append(word.bounding_box)

                if (feature == FeatureType.PARA):
                    bounds.append(paragraph.bounding_box)

            if (feature == FeatureType.BLOCK):
                bounds.append(block.bounding_box)

        if (feature == FeatureType.PAGE):
            bounds.append(block.bounding_box)

    # The list `bounds` contains the coordinates of the bounding boxes.
    return bounds

In [26]:
def render_doc_text(reponse, filein, fileout):    
    image = Image.open(filein)
    bounds = get_document_bounds(response, FeatureType.PAGE)
    draw_boxes(image, bounds, 'blue', True)
    bounds = get_document_bounds(response, FeatureType.BLOCK)
    draw_boxes(image, bounds, 'green', True)
    bounds = get_document_bounds(response, FeatureType.PARA)
    draw_boxes(image, bounds, 'red')
    bounds = get_document_bounds(response, FeatureType.WORD)
    draw_boxes(image, bounds, 'yellow')
    
    if fileout is not 0:
        image.save(fileout)
    else:
        image.show()

In [20]:
with io.open("./convert/7.012noteslindrew-0.png", 'rb') as image_file:
    image = types.Image(content = image_file.read())
response = client.document_text_detection(image=image)

In [29]:
def get_response(filein):
    with io.open(filein, 'rb') as image_file:
        image = types.Image(content = image_file.read())
    response = client.document_text_detection(image=image)
    return response

In [28]:
render_doc_text(response, "./convert/7.012noteslindrew-0.png", "tmp0.png")

In [30]:
response3 = get_response("./convert/7.012noteslindrew-3.png")

In [31]:
render_doc_text(response3, "./convert/7.012noteslindrew-3.png", "tmp3.png")

In [211]:
response_list = []
for i in range(10):
    filein = "./convert/7.012noteslindrew-{}.png".format(i)
    response_list.append(get_response(filein))
    render_doc_text(response_list[-1], filein, "tmp{}.png".format(i))

In [52]:
with open("response.txt", 'w') as f:
    f.write(str(response))
with open("response_fta.txt", 'w') as f:
    f.write(str(response.full_text_annotation))
# with open("response_fta.json", 'w') as f:
#     f.write(jsonpickle.decode(response.full_text_annotation))
with open("response3.txt", 'w') as f:
    f.write(str(response3))
with open("response3_fta.txt", 'w') as f:
    f.write(str(response3.full_text_annotation))

In [215]:
special = "&%$#_{}~^\\"
special_map = {}
for s in special:
    special_map[s] = "\\" + s
special_map["~"] = "\\textasciitilde"
special_map["^"] = "\\textasciicircum"
special_map["\\"] = "\\textbackslash"
special_map["["] = "{[}"
special_map["]"] = "{]}"

In [137]:
def parse_sym(s):
    if s in special_map.keys():
        return special_map[s]
    return s

In [207]:
def parse_detected_break(text, detected_break, in_itemize = False):
    break_text = ""
    kind = detected_break.type
    
    if kind:
        if kind == 1:
            break_text = " "
        elif kind == 2:
            break_text = "    "
        elif kind == 3:
            if in_itemize:
                break_text = "\n"
            else:
                break_text = "\\\\\n"
#             break_text = "$3\n"
        elif kind == 5:
            if in_itemize:
                break_text = "\n"
            else:
                break_text = "\\\\\n"
#             break_text = "$5\n"
#         elif kind == 2:
#             break_text = " "
        
#         elif 

    if detected_break.is_prefix:
        return break_text + text
    else:
        return text + break_text
    

In [None]:
def avg_sym_width(block):
    widths = []
    for paragraph in block.paragraphs:
        for word in paragraph.words:
            for sym in word.symbols:
                widths.append(sym.bounding_box.vertices[1].x - sym.bounding_box.vertices[0].x)
                

In [214]:
def extract_block(block):
    b = ""
    
    in_itemize = False
    
    for paragraph in block.paragraphs:
        p = ""
        for word in paragraph.words:
            w = ""
            for symbol in word.symbols:
                detected_break = symbol.property.detected_break
                text = parse_sym(symbol.text)
                
                if text == "." and (p == "1" or p == "|"):
                    p = ""
                    text = "•"
                elif len(p) >= 2 and p[-2:] == "\n1":
                    text = "•"
                    p = p[:-1]
                    
                if text in "•-":
                    text = ""
                    if not in_itemize:
                        text += "\\begin{itemize}"
                        in_itemize = True                        
                    if in_itemize:
                        text += "\\item "            
                w += parse_detected_break(text, detected_break, in_itemize)
            
            p += parse_detected_break(w, word.property.detected_break, in_itemize)
            
        if in_itemize:
            p += "\\end{itemize}\n"
            in_itemize = False
        
        b += parse_detected_break(p, paragraph.property.detected_break, in_itemize)
        
    text = parse_detected_break(b, block.property.detected_break)
    text = text.replace("\\\\\\begin{itemize}", "\\begin{itemize}")
    text = text.replace("\\end{itemize}\n\\begin{itemize}", "")
    return text

In [142]:
# with open("take3.tex", 'w') as f:
    
#     for page in response.full_text_annotation.pages:
#         for block in page.blocks:
#             f.write(extract_block(block))

In [209]:
# with open("take5.tex", 'w') as f:
    
#     for page in response.full_text_annotation.pages:
#         for block in page.blocks:
#             text = extract_block(block)
        
#             f.write(text)

\begin{itemize}\item  Office hours, recitations start next week
\item  Fill out the mock submission survey!

here
\begin{itemize}\item  Office hours, recitations start next week
\item  Fill out the mock submission survey!
\item  MITX?

here
You Do need to show up.\\

here
What is this class?\\

here
What is this class?\\
\begin{itemize}\item  Medicine rapidly developing in the present

here


In [221]:
with open("take6.tex", 'w') as f:
    fmt = open("format.tex", "r").read()
    
    al = ""
    for response in response_list:
        for page in response.full_text_annotation.pages:
            for block in page.blocks:
                text = extract_block(block)
                al += text
                
    text = fmt + al + "\\end{document}"
                
    f.write(text)

In [222]:
edited_responses = response_list

In [225]:
test = edited_responses[0].full_text_annotation

In [229]:
for page in test.pages:
    i = 0
    while i < len(page.blocks) - 1:
        cur = page.blocks[i].bounding_box
        nex = page.blocks[i + 1].bounding_box
        
        y1 = [cur.vertices[0].y, cur.vertices[2].y]
        y2 = [nex.vertices[0].y, nex.vertices[2].y]
        
        inter = set(range(y1[0], y1[1])).intersection(set(range(y2[0], y2[1])))
        
        if len(inter) >= 0.1 * min(y1[1] - y1[0], y1[1] - y1[0]):
            print(i, "merge!")
            
        i += 1

1 merge!
3 merge!
4 merge!
5 merge!
10 merge!


In [240]:
def preprocess(response):
    test = response
    
    for page in test.full_text_annotation.pages:
        i = 0
        while i < len(page.blocks) - 1:
            cur = page.blocks[i].bounding_box
            nex = page.blocks[i + 1].bounding_box
            
            y1 = [cur.vertices[0].y, cur.vertices[2].y]
            y2 = [nex.vertices[0].y, nex.vertices[2].y]
            
            inter = set(range(y1[0], y1[1])).intersection(set(range(y2[0], y2[1])))
            
            if len(inter) >= 0.1 * min(y1[1] - y1[0], y1[1] - y1[0]):
                for paragraph in page.blocks[i + 1].paragraphs:
                    page.blocks[i].paragraphs.append(paragraph)
                page.blocks.pop(i + 1)
                i -= 1
#                 print(i, "merge!")
                
            i += 1
            
    return test

In [241]:
with open("take7.tex", 'w') as f:
    fmt = open("format.tex", "r").read()
    
    al = ""
    for response in response_list:
        response = preprocess(response)
        for page in response.full_text_annotation.pages:
            for block in page.blocks:
                text = extract_block(block)
                al += text
                
    text = fmt + al + "\\end{document}"
                
    f.write(text)

In [258]:
with io.open("./crop.jpg", 'rb') as image_file:
    image = types.Image(content = image_file.read())

In [257]:
cropresponse = client.document_text_detection(image=image)

In [254]:
render_doc_text(cropresponse, "./crop.jpg", "cropout.jpg")

In [251]:
print(str(cropresponse))




In [255]:
print(cropresponse)

text_annotations {
  locale: "und"
  description: "+\n+\n\345\215\201\344\270\200\n"
  bounding_poly {
    vertices {
      x: 127
      y: 357
    }
    vertices {
      x: 380
      y: 357
    }
    vertices {
      x: 380
      y: 672
    }
    vertices {
      x: 127
      y: 672
    }
  }
}
text_annotations {
  description: "+"
  bounding_poly {
    vertices {
      x: 325
      y: 357
    }
    vertices {
      x: 356
      y: 357
    }
    vertices {
      x: 356
      y: 465
    }
    vertices {
      x: 325
      y: 465
    }
  }
}
text_annotations {
  description: "+"
  bounding_poly {
    vertices {
      x: 340
      y: 471
    }
    vertices {
      x: 366
      y: 469
    }
    vertices {
      x: 372
      y: 561
    }
    vertices {
      x: 346
      y: 563
    }
  }
}
text_annotations {
  description: "\345\215\201\344\270\200"
  bounding_poly {
    vertices {
      x: 127
      y: 591
    }
    vertices {
      x: 380
      y: 592
    }
    vertices {
      x: 380
  

In [259]:
response = client.text_detection(image=image)

In [261]:
print(response)

text_annotations {
  locale: "und"
  description: "+\nD\n+\n"
  bounding_poly {
    vertices {
      x: 153
      y: 370
    }
    vertices {
      x: 416
      y: 370
    }
    vertices {
      x: 416
      y: 1945
    }
    vertices {
      x: 153
      y: 1945
    }
  }
}
text_annotations {
  description: "+"
  bounding_poly {
    vertices {
      x: 306
      y: 370
    }
    vertices {
      x: 389
      y: 371
    }
    vertices {
      x: 389
      y: 442
    }
    vertices {
      x: 306
      y: 441
    }
  }
}
text_annotations {
  description: "D"
  bounding_poly {
    vertices {
      x: 341
      y: 1845
    }
    vertices {
      x: 416
      y: 1846
    }
    vertices {
      x: 414
      y: 1945
    }
    vertices {
      x: 339
      y: 1944
    }
  }
}
text_annotations {
  description: "+"
  bounding_poly {
    vertices {
      x: 237
      y: 608
    }
    vertices {
      x: 237
      y: 666
    }
    vertices {
      x: 153
      y: 665
    }
    vertices {
      x:

In [262]:
render_doc_text(response, "./crop.jpg", "cropout.jpg")