# Tesseract toGoogle Vision Endpoints

### Stake holders would like a product that eventually no longer utalizes google vision for text image processessing. 
This note book is set up to provide documentation on the StorySquadApp as to where end points need to be replaced with our own models. As more is done to craft the model and the preprocessing steps more hard code can be added to modulate into the codebase

From app/api/submission.py in the submission_text function the google API is called to transcribe the text

In [None]:
### Google Vision Endpoint at app/api/sumission
async def submission_text(sub: Submission):
    """Takes a Submission Object and calls the Google Vision API to text annotate
    the passed s3 link, then passes those concatenated transcriptions to the SquadScore
    method, returns:

    Arguments:
    ---
    `sub`: Submission - Submission object **see `help(Submission)` for more info**
    Returns:
    ---
    ```
    {"SubmissionID": int, "IsFlagged": boolean,"LowConfidence": boolean, "Complexity": int}
    ```
    """
    transcriptions = ""
    confidence_flags = []
    # unpack links for files in submission object
    for page_num in sub.Pages:
        # re-init the sha algorithm every file that is processed
        hash = sha512()
        # fetch file from s3 bucket
        r = get(sub.Pages[page_num]["URL"])
        # update the hash with the file's content
        hash.update(r.content)
        try:
            # assert that the hash is the same as the one passed with the file
            # link
            assert hash.hexdigest() == sub.Pages[page_num]["Checksum"]
        except AssertionError:
            # return some useful information about the error including what
            # caused it and the file affected
            return JSONResponse(
                status_code=422,
                content={"ERROR": "BAD CHECKSUM", "file": sub.Pages[page_num]},
            )
        # unpack response from GoogleAPI
        conf_flag, flagged, trans = await vision.transcribe(r.content)
        # concat transcriptions togeather
        transcriptions += trans + "\n"
        # add page to list of confidence flags
        confidence_flags.append(conf_flag)
    # score the transcription using SquadScore algorithm
    score = await squad_score(transcriptions, scaler)

    # return the complexity score to the web team with the SubmissionID
    return JSONResponse(
        status_code=200,
        content={
            "SubmissionID": sub.SubmissionID,
            "IsFlagged": flagged,
            "LowConfidence": True in confidence_flags,
            "Complexity": score,
        },
    )

From app/utils/img_processing/confidence_flag.py


In [None]:
def image_confidence(image_path):
    """
    Detects text in images and calculates the confidence level for each
    character. Returns a True boolean if the overall confidence for the
    page is less than 0.85. Otherwise, returns False

        Input: Path to file where image is stored
            One image per call: run function on each image in a submission
        Output: Boolean; True if confidence level for page is less than 0.85
                False if confidence is 0.85 or greater
    """

    # If image_path is local
    with io.open(image_path, "rb") as image_file:
        content = image_file.read()
    image = vision.types.Image(content=content)

    # # If image_path is a uri
    # image = vision.types.Image()
    # image.source.image_uri = uri

    # Set language to english only
    language = vision.types.ImageContext(language_hints=["en-t-i0-handwrit"])

    # Connect to Google API client
    creds = service_account.Credentials.from_service_account_file(
        "/Users/stevenchase/Desktop/Steven/Computer_Science/Lambda/labs/story_sqaud/Story Squad-6122da7459cf.json"
    )
    client = vision.ImageAnnotatorClient(credentials=creds)
    response = client.document_text_detection(
        image=image, image_context=language
    )

    # List of confidence levels of each character
    symbol_confidences = []

    for page in response.full_text_annotation.pages:
        for block in page.blocks:
            for paragraph in block.paragraphs:
                for word in paragraph.words:
                    for symbol in word.symbols:
                        symbol_confidences.append(symbol.confidence)

    # If there is no text on the page
    if len(symbol_confidences) == 0:
        return "No Text Detected"
    else:
        # Calculate the overall confidence for the page
        page_confidence = sum(symbol_confidences) / len(symbol_confidences)

        # Return flag: True under 85% confident, False 85% confident or over
        if page_confidence < 0.85:
            return True
        else:
            return False
