This will handle the first step of the process - creating the code from the images.

In [8]:
from azure.cognitiveservices.vision.computervision import ComputerVisionClient
from azure.cognitiveservices.vision.computervision.models import OperationStatusCodes
from azure.cognitiveservices.vision.computervision.models import VisualFeatureTypes
from msrest.authentication import CognitiveServicesCredentials

from array import array
import os
from PIL import Image
import sys
import time
from glob import glob
import secrets
import re

In [3]:
# basic setup and auth
subscription_key = secrets.SUB_KEY
endpoint = "https://test-image-rec.cognitiveservices.azure.com/"
computervision_client = ComputerVisionClient(endpoint, CognitiveServicesCredentials(subscription_key))
GH_BASE = 'https://raw.githubusercontent.com/GitToby/college-code-reincarnation/master/meta_work/img/'

In [4]:
# fetch all images
images = glob("img/*")

# replace the folder to get just the image filenames
for i in range(len(images)):
    images[i] = images[i].replace("img/","")

image_urls = [GH_BASE + im for im in images]

print(len(images), "Images")

37 Images


In [5]:
def make_request(img_url: str, print_res: bool = False) -> str:
    # Make initial request
    recognize_handw_results = computervision_client.read(img_url, raw=True)

    # Parse response to get op ID 
    operation_location_remote = recognize_handw_results.headers["Operation-Location"]
    operation_id = operation_location_remote.split("/")[-1]
    
    # Await the response while remote does processing (nast impl but I dont care) 
    while True:
        get_handw_text_results = computervision_client.get_read_result(operation_id)
        if get_handw_text_results.status not in ['notStarted', 'running']:
            break
        time.sleep(1)

    # Print the detected text, line by line
    if get_handw_text_results.status == OperationStatusCodes.succeeded:
        for text_result in get_handw_text_results.analyze_result.read_results:
            if print_res:
                for line in text_result.lines:
                    print(line.text)
            return text_result.lines

In [6]:
def pipe_image_to_txt(image_file_path: str) -> (str,str):
    # form known url
    image_url = GH_BASE + image_file_path

    # Make request and get data
    res = make_request(image_url)

    # Join resulting data to a string
    out_text = "\n".join(t.text for t in res)
    
    # dump into an out file
    out_file_name = image_file_path.replace(".jpg",".txt")
    with open(f"out/{out_file_name}", "w") as f:
        f.write(out_text)

    # util result
    return image_file_path, out_file_name


In [42]:
for i, image_file_path in enumerate(sorted(images)):
    original, test_file = pipe_image_to_txt(image_file_path)
    print(i, "|", original, "->", test_file)

0 | SessionTimetable2.jpg -> SessionTimetable2.txt
1 | add new member 1.jpg -> add new member 1.txt
2 | addNewMember2.jpg -> addNewMember2.txt
3 | addNewMember3.jpg -> addNewMember3.txt
4 | addNewMember4.jpg -> addNewMember4.txt
5 | addNewMember5.jpg -> addNewMember5.txt
6 | addRemoveMembers1.jpg -> addRemoveMembers1.txt
7 | addRemoveMembers2.jpg -> addRemoveMembers2.txt
8 | addRemoveMembers3.jpg -> addRemoveMembers3.txt
9 | addRemoveMembers4.jpg -> addRemoveMembers4.txt
10 | editMemberInfo1.jpg -> editMemberInfo1.txt
11 | editMemberInfo2.jpg -> editMemberInfo2.txt
12 | editMemberInfo3.jpg -> editMemberInfo3.txt
13 | editMemberInfo5.jpg -> editMemberInfo5.txt
14 | editMemberInfo6.jpg -> editMemberInfo6.txt
15 | mainMenu1.jpg -> mainMenu1.txt
16 | mainmenu2.jpg -> mainmenu2.txt
17 | memberPage1.jpg -> memberPage1.txt
18 | memberPage2.jpg -> memberPage2.txt
19 | memberPage3.jpg -> memberPage3.txt
20 | memberPage4.jpg -> memberPage4.txt
21 | memberTypes1.jpg -> memberTypes1.txt
22 | membe

In [9]:
files = glob("out/*.txt")
f_map = dict()

# Group all files by their 'className'
for f in files:
    # Funky regexp to isolate them
    f_key = re.search("[a-zA-Z]+[0-9]+",f).group()[:-1]
    if f_key in f_map.keys():
        f_map[f_key] += [f]
    else:
        f_map[f_key] = [f]

# dump them into code files
for class_name in f_map.keys():
    
    # get list of files
    txt_files = f_map[class_name]
    
    # extract and append to content
    content = ""
    for txt_f in sorted(txt_files):
        with open(txt_f, "r") as f:
            content = content + f.read()

    # write this out to a file
    with open(f"code/{class_name}.vb", "w") as f:
        f.write(content)
