In [None]:
import base64
import numpy as np
import cv2
import os
import json
import requests
import mimetypes
import datetime
import traceback
import uuid

In [None]:
def rgb_to_grayscale(rgb_image):
    """
    Converts a cv2 RGB image to grayscale.
    """
    gray_img = cv2.cvtColor(rgb_image, cv2.COLOR_RGB2GRAY)
    return gray_img

def convert_bytestring_to_cv2(bytestring):
    """
    Converts an image bytestring to a cv2 image
    :param bytestring: bytestring of an image file
    :return: np array
    """
    arr = np.frombuffer(base64.b64decode(bytestring), dtype=np.uint8)
    return cv2.imdecode(arr, flags=1)

def send_request_to_preprocessor(resource, content=None, type="post"):
    """
    Sends request preprocessor resource and returns response json. If return status code is not 200, will return
    dictionary with key "ERROR".
    :param resource: the REST resource to be called, e.g. /drawing/get/1 (include leading /)
    :param content: the payload of the request, e.g. json data for saving a drawing
    :param type: post, get, or delete
    :return: json response from endpoint
    """
    url = "localhost:6101" + resource  # change to ur port/ url
    return send_request_to(url, content, type)


def send_request_to(url, content, type="post"):
    """
    Sends request to url and returns response json. If return status code is not 200, will return dictionary with key
    "ERROR".
    :param url: url to sent content to
    :param content: content to sent to url
    :param type: post, get, or delete
    :return: json response from endpoint
    """
    try:
        if type == "get":
            response = requests.get(url, json=content, timeout=100)  # timeout of 100 seconds
        elif type == "post":
            response = requests.post(url, json=content, timeout=100)
        elif type == "delete":
            response = requests.delete(url, timeout=100)
        else:
            return {"ERROR": "invalid request type"}
    except requests.exceptions.Timeout:
        return {"ERROR": "timed out"}
    if response.status_code == 200 or response.status_code == 201:
        if type == "get" or type == "post":
            return response.json()
        elif type == "delete":
            return True
        else:
            return None
    else:
        return {"ERROR": str(response.content)}

def get_vis_img(img, ocr_classes, ocr_bbs, ocr_text):
    draw_img = img.copy()

    possible_classes = ["material","general_tolerance","surface","gdt","thread","iso"]

    colors = [(237, 5, 233),  # pink ->  material
              (247, 92, 2),  # orange -> general tol
              (4, 79, 9), # dark green -> surface
              (0, 56, 240),  # blue -> gdt
              (2, 217, 250),  # cyan -> thread
              (0, 168, 120),  # iso
              (2, 250, 2),  #  green -> measure
              (250, 2, 2)]  # red -> text

    shapes = np.zeros_like(img)

    for bb, text, c in zip(ocr_bbs, ocr_text, ocr_classes, strict=True):
        [classification, is_text] = c
        if classification in possible_classes:
            color = colors[possible_classes.index(classification)]
        else:
            if is_text:
                color = colors[-1]
            else:
                color = colors[-2]

        shapes = cv2.rectangle(shapes, (bb[0], bb[1]), (bb[0] + bb[2], bb[1] + bb[3]), color, cv2.FILLED)

    alpha = 0.5
    mask = shapes.astype(bool)
    draw_img[mask] = cv2.addWeighted(img, alpha, shapes, 1 - alpha, 0)[mask]

    return draw_img


def get_rec_images(img, bbs):
    rec_imgs = []
    for bb in bbs:
        rec_imgs.append(img[bb[1]:bb[1]+bb[3], bb[0]:bb[0]+bb[2]])
    return rec_imgs

In [None]:
def get_smallest_tolerance(list_of_tols):
    tol_classes = ["f", "m", "c", "v", "-"]
    gdt_tol_classes = ["h", "k", "l", "-"]
    smallest_tol_class = 4
    smallest_gdt_class = 3
    for tolerance in list_of_tols:
        dim_class = tolerance[0]
        geom_class = tolerance[1]
        try:
            tol_class_id = tol_classes.index(dim_class)
            gdt_class_id = gdt_tol_classes.index(geom_class)
            if tol_class_id < smallest_tol_class:
                smallest_tol_class = tol_class_id
            if gdt_class_id < smallest_gdt_class:
                smallest_gdt_class = gdt_class_id
        except ValueError:
            continue

    return {
        "char1": tol_classes[smallest_tol_class] if smallest_tol_class < 4 else "",
        "char2": gdt_tol_classes[smallest_gdt_class] if smallest_gdt_class < 3 else "",
    }


def convert_outer_measures(outer_measures):

    outer_measures_cleaned = [measure[0] for measure in outer_measures]

    while len(outer_measures_cleaned) < 3:
        outer_measures_cleaned.append(max(outer_measures_cleaned))

    return outer_measures_cleaned


def convert_preprocessor_response_to_dict(preprocessor_response):
    drawing_data = preprocessor_response['drawing_data']
    return {
        "material": drawing_data["material"],
        "general_tolerances": get_smallest_tolerance(drawing_data["general_tolerances"]),
        "name": "NONE",
        "surfaces": drawing_data["surfaces"],
        "gdts": drawing_data["gdts"],
        "threads": drawing_data["threads"],
        "outer_measures": convert_outer_measures(drawing_data["outer_dimensions"])
    }

def get_preprocessor_response(filename):
    pdf_base_dir = "./data/pdfs"
    with open(os.path.join(pdf_base_dir, filename), "rb") as f:
        file_bytes = f.read()
    content_string = base64.b64encode(file_bytes).decode("utf-8")
    content_type = mimetypes.guess_type(filename)[0]

    file_data = {"file_name": filename, "file_content": content_string, "file_type": content_type}
    response_data = send_request_to_preprocessor(resource="/image_to_vector", content=file_data, type="post")
    return convert_preprocessor_response_to_dict(response_data)


In [None]:
with open("./labels.json", "r") as f:
    data = json.load(f)

results = []

start = datetime.datetime.now()

for img_data in os.listdir("./path_to_your_dataset"):

    # TODO: add handling of your dataset
    try:
        print(img_data)
        response_data = get_preprocessor_response(img_data)
        result = {
            "ground_truth": img_data["data"],
            "preprocessor_reponse": response_data
        }
        results.append(result)
    except Exception as e:
        traceback.print_exc()
        continue

end = datetime.datetime.now()

output = {
    "prompt": "None",
    "model": "peprocessor_25_09_11",
    "time_spent": (end - start).total_seconds(),
    "results": results
}

In [None]:
import regex as re
def convert_pp_gdt(pp_gdt):
    """
    GDTs that are returned from the preprocessor are strings like 'ￌ 0.02 B'. need to convert that into a dict with the appropriate name and runout values.
    """

    pp_gdt = pp_gdt.replace(" ", "")

    symbols = {
        "⌾": "Concentricity",
        "◯": "Circularity",
        "◠": "Profile of a Line",
        "⌓": "Profile of a Surface",
        "ￌ": "Perpendicularity",
        "↗": "Circular Runout",
        "⌰": "Total Runout",
        "=": "Symmetry",
        "//": "Parallelism",
        "▱": "Flatness",
        "∠": "Angularity",
        "⌖": "Position",
        "-": "Straightness"
    }
    name = ""
    for symbol, value in symbols.items():
        if symbol in pp_gdt:
            name = value
            pp_gdt.replace(symbol, "")
            break

    runout = 0.0
    if len(name) > 0:
        match = re.search(r"\d+\.?\d*", pp_gdt)
        start = match.starts()[0]
        end = match.ends()[0]

        runout = float(pp_gdt[start:end])

    return {'name': name, 'runout': runout}

In [None]:
results = output["results"]
for i, datum in enumerate(results):
    converted_gdts = [convert_pp_gdt(gdt) for gdt in datum["preprocessor_reponse"]["gdts"]]
    output["results"][i]["preprocessor_reponse"]["gdts"] = converted_gdts

In [None]:
with open("preprocessor___" + str(uuid.uuid4()) + ".json", "w") as f:
    json.dump(output, f)