In [None]:
import boto3
import json
import boto3
import re
import csv
import sagemaker
from sagemaker import get_execution_role
from sagemaker.s3 import S3Uploader, S3Downloader
import uuid
import time
import io
from io import BytesIO
import sys
from pprint import pprint

from IPython.display import Image, display
from PIL import Image as PImage, ImageDraw

In [None]:
!pip install amazon-textract-response-parser

In [None]:
role = get_execution_role()
#print("RoleArn: {}".format(role))

sess = sagemaker.Session()
bucket = sess.default_bucket()
prefix = 'claims-process-textract'

# Valid Document

In [None]:
# Document
documentName = "validmedicalform.png"

display(Image(filename=documentName))

In [None]:
# process using image bytes
def calltextract(documentName): 
    client = boto3.client(service_name='textract',
         region_name= 'us-east-1',
         endpoint_url='https://textract.us-east-1.amazonaws.com')

    with open(documentName, 'rb') as file:
            img_test = file.read()
            bytes_test = bytearray(img_test)
            print('Image loaded', documentName)

    # process using image bytes
    response = client.analyze_document(Document={'Bytes': bytes_test}, FeatureTypes=['FORMS'])

    return response

In [None]:
response= calltextract(documentName)
print(response)

In [None]:
#Extract key values
# Iterate over elements in the document
from trp import Document
def getformkeyvalue(response):
    doc = Document(response)
    #print(doc)
    key_map = {}
    for page in doc.pages:
        # Print fields
        for field in page.form.fields:
            if field is None or field.key is None or field.value is None:
                continue
        #print("Field: Key: {}, Value: {}".format(field.key.text, field.value.text))
            key_map[field.key.text] = field.value.text
    return key_map

In [None]:
get_form_keys = getformkeyvalue(response)
print(get_form_keys)

# Check for validation using business rules
Checking if claim Id is 12 digit and zip code is digit

In [None]:
def validate(body):
    json_acceptable_string = body.replace("'", "\"")
    json_data = json.loads(json_acceptable_string)
    print(json_data)
    zip = json_data['ZIP CODE']
    id = json_data['ID NUMBER']

    if(not zip.strip().isdigit()):
        return False, id, "Zip code invalid"
    length = len(id.strip())
    if(length != 12):
        return False, id, "Invalid claim Id"
    return True, id, "Ok"

In [None]:
 # Validate 
textract_json= json.dumps(get_form_keys,indent=2)
res, formid, result = validate(textract_json)
print(result)
print(formid)

# Valid Medical Intake Form send to Comprehend medical to gain insights

In [None]:
comprehend = boto3.client(service_name='comprehendmedical')


In [None]:
# Detect medical entities
cm_json_data =  comprehend.detect_entities_v2(Text=textract_json)
print("\nMedical Entities\n========")
for entity in cm_json_data["Entities"]:
    print("- {}".format(entity["Text"]))
    print ("   Type: {}".format(entity["Type"]))
    print ("   Category: {}".format(entity["Category"]))
    if(entity["Traits"]):
        print("   Traits:")
        for trait in entity["Traits"]:
            print ("    - {}".format(trait["Name"]))
    print("\n")

Writing entities to CSV File

In [None]:

def printtocsv(cm_json_data,formid):       
        entities = cm_json_data['Entities']
        TEMP_FILE = 'cmresult.csv'
        with open(TEMP_FILE, 'w') as csvfile: # 'w' will truncate the file
            filewriter = csv.writer(csvfile, delimiter=',',
                            quotechar='|', quoting=csv.QUOTE_MINIMAL)
            filewriter.writerow([ 'ID','Category', 'Type', 'Text'])
            for entity in entities:
                filewriter.writerow([formid, entity['Category'], entity['Type'], entity['Text']])

        filename = "procedureresult/" + formid + ".csv"

      
        S3Uploader.upload(TEMP_FILE, 's3://{}/{}'.format(bucket, prefix))
        print("successfully parsed:" + filename)

In [None]:
printtocsv(cm_json_data,formid)

# Invalid Claim

In [None]:
InvalidDocument = "invalidmedicalform.png"

display(Image(filename=InvalidDocument))

In [None]:
response = calltextract(InvalidDocument)

In [None]:
get_form_keys = getformkeyvalue(response)
print(get_form_keys)

In [None]:
 #In Validate 
textract_json= json.dumps(get_form_keys,indent=2)
res, formid, result = validate(textract_json)
print(result)
print(formid)
print(res)

# Notify stakeholders that its Invalid

In [None]:
sns = boto3.client('sns')

# Go to https://console.aws.amazon.com/sns/v3/home?region=us-east-1#/homepage and create a topic as per book instruction

In [None]:
topicARN="<enter topic arn>"

In [None]:
snsbody = "Content:" + str(textract_json) + "Reason:" + str(result)
print(snsbody)

In [None]:
try:
    response = sns.publish(
                    TargetArn = topicARN,
                    Message= snsbody
    )
    print(response)
except Exception as e:
        print("Failed while doing validation")
        print(e.message)


# Check your email for notification

# Clean UP
Delete the topic you created from Console https://console.aws.amazon.com/sns/v3/home?region=us-east-1#/topic/

Delete the Amazon s3 bucket and the files in the buckethttps://docs.aws.amazon.com/AmazonS3/latest/userguide/delete-bucket.html