In [1]:
!pip3 install sqlalchemy psycopg2
!pip3 install -qU boto3

In [2]:
# %load_ext jupyter_black

# Imports

In [3]:
from sqlalchemy import create_engine, Column, Integer, String
from sqlalchemy.ext.declarative import declarative_base
from sqlalchemy.orm import sessionmaker
import sqlalchemy

In [4]:
# for connecting with Bedrock, use Boto3
import boto3, time, json
from botocore.config import Config
from datetime import datetime

# increase the standard time out limits in boto3, because Bedrock may take a while to respond to large requests.
my_config = Config(
    connect_timeout=60 * 3,
    read_timeout=60 * 3,
)
bedrock = boto3.client(service_name="bedrock-runtime", config=my_config)
bedrock_service = boto3.client(service_name="bedrock", config=my_config)

In [5]:
# check that it's working:
models = bedrock_service.list_foundation_models()
for line in models["modelSummaries"]:
    # print this out if you want to see all the models you have access to.
    #     print (line["modelId"])
    pass
if "anthropic.claude-3" in str(models):
    print("Claud-v3 found!")
else:
    print("Error, no model found.")

Claud-v3 found!


# Datastore (Ground Truth)

In [6]:
engine = create_engine("sqlite:///database.db")
Base = sqlalchemy.orm.declarative_base()

In [7]:
class User(Base):
    __tablename__ = "users"
    IDNumber = Column(String, primary_key=True)
    FirstName = Column(String)
    MiddleName = Column(String)
    LastName = Column(String)
    Address = Column(String)
    DOB = Column(String)
    IssueDate = Column(String)
    ExpiryDate = Column(String)

    def __repr__(self):
        return f"User(name='{self.FirstName} {self.MiddleName} {self.LastName}', IDNumber='{self.IDNumber}', Address='{self.Address}', DOB='{self.DOB}', IssueDate='{self.IssueDate}', ExpiryDate='{self.ExpiryDate}')"

In [8]:
Base.metadata.create_all(engine)
Session = sessionmaker(bind=engine)
session = Session()

In [9]:
data = [
    {
        "IDNumber": "A1234567",
        "FirstName": "John",
        "MiddleName": "Michael",
        "LastName": "Doe",
        "Address": "123 Main St, Anytown USA",
        "DOB": "1990-05-15",
        "IssueDate": "2015-06-01",
        "ExpiryDate": "2025-05-31",
    },
    {
        "IDNumber": "B7654321",
        "FirstName": "Emily",
        "MiddleName": "Jane",
        "LastName": "Smith",
        "Address": "456 Oak Rd, Someville CA",
        "DOB": "1985-11-22",
        "IssueDate": "2018-12-01",
        "ExpiryDate": "2028-11-30",
    },
    {
        "IDNumber": "C2468013",
        "FirstName": "Michael",
        "MiddleName": "David",
        "LastName": "Johnson",
        "Address": "789 Elm St, Othercity TX",
        "DOB": "1992-03-08",
        "IssueDate": "2017-04-01",
        "ExpiryDate": "2027-03-31",
    },
    {
        "IDNumber": "D5679012",
        "FirstName": "Sarah",
        "MiddleName": "Elizabeth",
        "LastName": "Williams",
        "Address": "321 Pine Ave, Somewhere NJ",
        "DOB": "1988-09-20",
        "IssueDate": "2016-10-01",
        "ExpiryDate": "2026-09-30",
    },
    {
        "IDNumber": "E8901234",
        "FirstName": "David",
        "MiddleName": "Robert",
        "LastName": "Brown",
        "Address": "654 Maple Ln, Anothertown CA",
        "DOB": "1995-07-03",
        "IssueDate": "2019-08-01",
        "ExpiryDate": "2029-07-31",
    },
    {
        "IDNumber": "F3456789",
        "FirstName": "Jessica",
        "MiddleName": "Marie",
        "LastName": "Davis",
        "Address": "987 Cedar Rd, Yetanotherplace NY",
        "DOB": "1982-12-28",
        "IssueDate": "2015-01-01",
        "ExpiryDate": "2025-12-31",
    },
    {
        "IDNumber": "G7890123",
        "FirstName": "Christopher",
        "MiddleName": "James",
        "LastName": "Miller",
        "Address": "246 Birch St, Placeville IL",
        "DOB": "1991-04-11",
        "IssueDate": "2018-05-01",
        "ExpiryDate": "2028-04-30",
    },
    {
        "IDNumber": "H2345678",
        "FirstName": "Amanda",
        "MiddleName": "Nicole",
        "LastName": "Wilson",
        "Address": "369 Spruce Ave, Towncity TX",
        "DOB": "1987-08-25",
        "IssueDate": "2017-09-01",
        "ExpiryDate": "2027-08-31",
    },
    {
        "IDNumber": "I6789012",
        "FirstName": "Matthew",
        "MiddleName": "Thomas",
        "LastName": "Moore",
        "Address": "159 Oak Blvd, Anotherplace CA",
        "DOB": "1993-02-14",
        "IssueDate": "2019-03-01",
        "ExpiryDate": "2029-02-28",
    },
    {
        "IDNumber": "J0123456",
        "FirstName": "Ashley",
        "MiddleName": "Elizabeth",
        "LastName": "Taylor",
        "Address": "753 Maple St, Citytown NJ",
        "DOB": "1989-10-05",
        "IssueDate": "2016-11-01",
        "ExpiryDate": "2026-10-31",
    },
    {
        "IDNumber": "K5678901",
        "FirstName": "Joshua",
        "MiddleName": "Andrew",
        "LastName": "Anderson",
        "Address": "246 Cedar Rd, Anotherburg IL",
        "DOB": "1994-06-18",
        "IssueDate": "2018-07-01",
        "ExpiryDate": "2028-06-30",
    },
    {
        "IDNumber": "L2345678",
        "FirstName": "Stephanie",
        "MiddleName": "Michelle",
        "LastName": "Thomas",
        "Address": "159 Spruce Ln, Placetown TX",
        "DOB": "1986-01-31",
        "IssueDate": "2017-02-01",
        "ExpiryDate": "2027-01-31",
    },
    {
        "IDNumber": "M7890123",
        "FirstName": "Daniel",
        "MiddleName": "Robert",
        "LastName": "Jackson",
        "Address": "753 Birch Ave, Anotherspot CA",
        "DOB": "1992-09-12",
        "IssueDate": "2019-10-01",
        "ExpiryDate": "2029-09-30",
    },
    {
        "IDNumber": "N3456789",
        "FirstName": "Brittany",
        "MiddleName": "Nicole",
        "LastName": "White",
        "Address": "369 Oak St, Towncity NJ",
        "DOB": "1990-07-27",
        "IssueDate": "2016-08-01",
        "ExpiryDate": "2026-07-31",
    },
    {
        "IDNumber": "O8901234",
        "FirstName": "Joseph",
        "MiddleName": "Michael",
        "LastName": "Harris",
        "Address": "246 Maple Rd, Anotherburg IL",
        "DOB": "1988-11-04",
        "IssueDate": "2018-12-01",
        "ExpiryDate": "2028-11-30",
    },
    {
        "IDNumber": "P5678901",
        "FirstName": "Samantha",
        "MiddleName": "Elizabeth",
        "LastName": "Martin",
        "Address": "159 Cedar Ave, Placetown TX",
        "DOB": "1993-05-19",
        "IssueDate": "2017-06-01",
        "ExpiryDate": "2027-05-31",
    },
    {
        "IDNumber": "Q2345678",
        "FirstName": "Andrew",
        "MiddleName": "David",
        "LastName": "Thompson",
        "Address": "753 Spruce Blvd, Anotherspot CA",
        "DOB": "1991-03-02",
        "IssueDate": "2019-04-01",
        "ExpiryDate": "2029-03-31",
    },
    {
        "IDNumber": "R7890123",
        "FirstName": "Melissa",
        "MiddleName": "Marie",
        "LastName": "Garcia",
        "Address": "369 Birch St, Towncity NJ",
        "DOB": "1987-12-16",
        "IssueDate": "2016-01-01",
        "ExpiryDate": "2026-12-31",
    },
    {
        "IDNumber": "S3456789",
        "FirstName": "Kevin",
        "MiddleName": "Thomas",
        "LastName": "Martinez",
        "Address": "246 Oak Ln, Anotherburg IL",
        "DOB": "1994-08-29",
        "IssueDate": "2018-09-01",
        "ExpiryDate": "2028-08-31",
    },
    {
        "IDNumber": "T8901434",
        "FirstName": "Lauren",
        "MiddleName": "Michelle",
        "LastName": "Robinson",
        "Address": "159 Maple Rd, Placetown TX",
        "DOB": "1989-06-10",
        "IssueDate": "2017-07-01",
        "ExpiryDate": "2027-06-30",
    },
    {
        "IDNumber": "T8901234",
        "FirstName": "Lauren",
        "MiddleName": "Michelle",
        "LastName": "Robinson",
        "Address": "159 Maple Rd, Placetown TX",
        "DOB": "1989-06-10",
        "IssueDate": "2017-07-01",
        "ExpiryDate": "2027-06-30",
    },
    {
        "IDNumber": "123456789",
        "FirstName": "Marie",
        "MiddleName": "",
        "LastName": "Michelle",
        "Address": "2345 Anywhere street, Albany NY",
        "DOB": "1990-10-31",
        "IssueDate": "2022-03-07",
        "ExpiryDate": "2029-10-31",
    },
    {
        "IDNumber": "123456788",
        "FirstName": "Murphy",
        "MiddleName": "Brenna Anna",
        "LastName": "C",
        "Address": "111 Anywhere street city, DL 67890-101",
        "DOB": "1970-01-01",
        "IssueDate": "2012-08-20",
        "ExpiryDate": "2020-08-20",
    },
]

In [10]:
# Generate and insert 20 mock data records
for i in range(len(data)):
    user = User(
        IDNumber=data[i]["IDNumber"],
        FirstName=data[i]["FirstName"],
        MiddleName=data[i]["MiddleName"],
        LastName=data[i]["LastName"],
        Address=data[i]["Address"],
        DOB=data[i]["DOB"],
        IssueDate=data[i]["IssueDate"],
        ExpiryDate=data[i]["ExpiryDate"],
    )
    session.add(user)

# Commit the changes to the database
session.commit()

In [11]:
users = session.query(User).all()
for user in users:
    print(user)

User(name='John Michael Doe', IDNumber='A1234567', Address='123 Main St, Anytown USA', DOB='1990-05-15', IssueDate='2015-06-01', ExpiryDate='2025-05-31')
User(name='Emily Jane Smith', IDNumber='B7654321', Address='456 Oak Rd, Someville CA', DOB='1985-11-22', IssueDate='2018-12-01', ExpiryDate='2028-11-30')
User(name='Michael David Johnson', IDNumber='C2468013', Address='789 Elm St, Othercity TX', DOB='1992-03-08', IssueDate='2017-04-01', ExpiryDate='2027-03-31')
User(name='Sarah Elizabeth Williams', IDNumber='D5679012', Address='321 Pine Ave, Somewhere NJ', DOB='1988-09-20', IssueDate='2016-10-01', ExpiryDate='2026-09-30')
User(name='David Robert Brown', IDNumber='E8901234', Address='654 Maple Ln, Anothertown CA', DOB='1995-07-03', IssueDate='2019-08-01', ExpiryDate='2029-07-31')
User(name='Jessica Marie Davis', IDNumber='F3456789', Address='987 Cedar Rd, Yetanotherplace NY', DOB='1982-12-28', IssueDate='2015-01-01', ExpiryDate='2025-12-31')
User(name='Christopher James Miller', IDNumb

In [12]:
user = session.query(User).filter_by(IDNumber="T8901234").first()
user

User(name='Lauren Michelle Robinson', IDNumber='T8901234', Address='159 Maple Rd, Placetown TX', DOB='1989-06-10', IssueDate='2017-07-01', ExpiryDate='2027-06-30')

# Evaluation

In [13]:
evaluation_system_prompt = """You are an expert in evaluating documents for potential fraud. Your task is to compare a provided document against a ground truth reference and categorize the document based on its accuracy and completeness.

Please categorize the provided document into one of the following categories:

- Category 'A': The provided document is accurate and complete, with no significant differences from the ground truth.
- Category 'B': The provided document contains minor inaccuracies or missing details compared to the ground truth, but these discrepancies do not indicate fraud.
- Category 'C': The provided document has significant inaccuracies or missing information compared to the ground truth, suggesting potential fraud, but further review is required.
- Category 'D': The provided document is clearly fraudulent, with substantial differences from the ground truth.

Your output should be structured as follows:

<REASON>
For each relevant entity or field, highlight the key similarities and differences between the ground truth and the provided document. Focus on factual accuracy, completeness, and relevance.
</REASON>

<SCORE>Based on the analysis in the REASON section, assign one of the categories (A, B, C, or D).</SCORE>

Please note that your task is solely to categorize the provided document based on its accuracy and completeness compared to the ground truth. You are not responsible for generating or validating the content itself."""

In [14]:
evaluation_prompt = """
Task: Determine if a document is fraudulent or not based on the provided data points.

Instructions:
1. Compare the following fields between the ground truth and the document provided:
   - IDNumber
   - DOB (Date of Birth)
   - IssueDate
   - ExpiryDate
   
   These fields must be an exact match for the document to be considered non-fraudulent.

2. For the following fields, minor differences are allowed:
   - FirstName
   - MiddleName
   - LastName
   - Address

Categorize the provided document into 'A', 'B', 'C', or 'D'. 

<ground_truth>{{TRUTH}}</ground_truth>

<document_provided>{{PREDICTION}}</document_provided>

"""

In [15]:
evaluation_few_shot_learning = """
Consider the following examples: 

<example_1>

    <ground_truth>
    {
        "IDNumber": "T8901434",
        "FirstName": "Lauren",
        "MiddleName": "Michelle",
        "LastName": "Robinson",
        "Address": "159 Maple Rd, Placetown TX",
        "DOB": "1989-06-10",
        "IssueDate": "2017-07-01",
        "ExpiryDate": "2027-06-30",
    }
    </ground_truth>

    <document_provided>
    {
        "IDNumber": "T8901434",
        "FirstName": "Lauren",
        "MiddleName": "Michelle",
        "LastName": "Robinson",
        "Address": "159 Maple Rd, Placetown TX",
        "DOB": "06/10/1989",
        "IssueDate": "07/01/2017",
        "ExpiryDate": "06/30/2027",
    }
    </document_provided>

    <REASON>
        IDNumber is exactly the same
        FirstName is exactly the same
        MiddleName is exactly the same
        LastName is exactly the same
        Address is exactly the same
        DOB is in different format but are exatly same with MM/DD/YYYY
        IssueDate is in different format but are exatly same with MM/DD/YYYY
        ExpiryDate is in different format but are exatly same with MM/DD/YYYY
    </REASON>
    
    <SCORE>A</SCORE>

</example_1>


<example_2>

    <ground_truth>    
    {
        "IDNumber": "M7890123",
        "FirstName": "Daniel",
        "MiddleName": "Robert",
        "LastName": "Jackson",
        "Address": "753 Birch Ave, Anotherspot CA",
        "DOB": "1992-09-12",
        "IssueDate": "2019-10-01",
        "ExpiryDate": "2029-09-30",
    }
    </ground_truth>

    <document_provided>
    {
        "IDNumber": "M7890123",
        "FirstName": "Daniel",
        "MiddleName": "Robert",
        "LastName": "Jackson",
        "Address": "753 Birch Avenue, Anotherspot California",
        "DOB": "1992-09-12",
        "IssueDate": "2019-10-01",
        "ExpiryDate": "2029-09-30",
    }
    </document_provided>

    <REASON>
        IDNumber is exactly the same
        FirstName is exactly the same
        MiddleName is exactly the same
        LastName is exactly the same
        Address has two differences: 
            1. Ave in Ground Truth Address is spelled out as Avenue in provided document
            2. CA in Ground Truth Address is spelled out as California in provided document 
        DOB is exactly the same
        IssueDate is exactly the same
        ExpiryDate is exactly the same
    </REASON>
    
    <SCORE>B</SCORE>


</example_2>


<example_3>

    <ground_truth>,
    {
        "IDNumber": "N3456789",
        "FirstName": "Brittany",
        "MiddleName": "Nicole",
        "LastName": "White",
        "Address": "369 Oak St, Towncity NJ",
        "DOB": "1990-07-27",
        "IssueDate": "2016-08-01",
        "ExpiryDate": "2026-07-31",
    }
    </ground_truth>

    <document_provideddocument_provided>
    {
        "IDNumber": "N3456789",
        "FirstName": "Brittany",
        "MiddleName": "",
        "LastName": "White",
        "Address": "Oak St, Towncity NJ",
        "DOB": "1990-07-27",
        "IssueDate": "2016-08-01",
        "ExpiryDate": "2026-07-31",
    }
    </document_provided>

    <REASON>
        IDNumber is exactly the same
        FirstName is exactly the same
        MiddleName is missing
        LastName is exactly the same
        Address is missing house number
        DOB is exactly the same
        IssueDate is exactly the same
        ExpiryDate is exactly the same
        
        Provided document might be fraudulent but requires further review. 
    </REASON>
    
    <SCORE>C</SCORE>

</example_3>


<example_4>

    <ground_truth>,
    {
        "IDNumber": "O8901234",
        "FirstName": "Joseph",
        "MiddleName": "Michael",
        "LastName": "Harris",
        "Address": "246 Maple Rd, Anotherburg IL",
        "DOB": "1988-11-04",
        "IssueDate": "2018-12-01",
        "ExpiryDate": "2028-11-30",
    }
    </ground_truth>

    <document_provided>
    {
        "IDNumber": "O8901534",
        "FirstName": "Joseph",
        "MiddleName": "Michael",
        "LastName": "Harris",
        "Address": "246 Maple Rd, Anotherburg IL",
        "DOB": "1988-11-04",
        "IssueDate": "2018-12-01",
        "ExpiryDate": "2028-11-30",
    }
    </document_provided>

    <REASON>
        IDNumber is not exactly the same
        FirstName is exactly the same
        MiddleName is exactly the same
        LastName is exactly the same
        Address is exactly the same
        DOB is exactly the same
        IssueDate is exactly the same
        ExpiryDate is exactly the same
    </REASON>
    
    <SCORE>D</SCORE>


</example_4>

<example_5>

    <ground_truth>,
    {
        "IDNumber": "O8901234",
        "FirstName": "Joseph",
        "MiddleName": "Michael",
        "LastName": "Harris",
        "Address": "246 Maple Rd, Anotherburg IL",
        "DOB": "1988-11-04",
        "IssueDate": "2018-12-01",
        "ExpiryDate": "2028-11-30",
    }
    </ground_truth>

    <document_provided>
    {
        "IDNumber": "O8901234",
        "FirstName": "Joseph",
        "MiddleName": "Michael",
        "LastName": "Harris",
        "Address": "246 Maple Rd, Anotherburg IL",
        "DOB": "1988-11-05",
        "IssueDate": "2018-12-01",
        "ExpiryDate": "2028-11-30",
    }
    </document_provided>


    <REASON>
        IDNumber is exactly the same
        FirstName is exactly the same
        MiddleName is exactly the same
        LastName is exactly the same
        Address is exactly the same
        DOB is not exactly the same
        IssueDate is exactly the same
        ExpiryDate is exactly the same
    </REASON>
    
    <SCORE>D</SCORE>


</example_5>

<example_6>

    <ground_truth>,
    {
        "IDNumber": "O8901234",
        "FirstName": "Joseph",
        "MiddleName": "Michael",
        "LastName": "Harris",
        "Address": "246 Maple Rd, Anotherburg IL",
        "DOB": "1988-11-04",
        "IssueDate": "2018-12-01",
        "ExpiryDate": "2028-11-30",
    }
    </ground_truth>

    <document_provided>
    {
        "IDNumber": "O8901234",
        "FirstName": "Joseph",
        "MiddleName": "Michael",
        "LastName": "Harris",
        "Address": "246 Maple Rd, Anotherburg IL",
        "DOB": "1988-11-04",
        "IssueDate": "2018-12-03",
        "ExpiryDate": "2028-11-30",
    }
    </document_provided>

    <REASON>
        IDNumber is exactly the same
        FirstName is exactly the same
        MiddleName is exactly the same
        LastName is exactly the same
        Address is exactly the same
        DOB is exactly the same
        IssueDate is not exactly the same
        ExpiryDate is exactly the same
    </REASON>
    
    <SCORE>D</SCORE>


</example_6>


<example_7>

    <ground_truth>,
    {
        "IDNumber": "O8901234",
        "FirstName": "Joseph",
        "MiddleName": "Michael",
        "LastName": "Harris",
        "Address": "246 Maple Rd, Anotherburg IL",
        "DOB": "1988-11-04",
        "IssueDate": "2018-12-01",
        "ExpiryDate": "2028-11-30",
    }
    </ground_truth>

    <document_provided>
    {
        "IDNumber": "O8901234",
        "FirstName": "Joseph",
        "MiddleName": "Michael",
        "LastName": "Harris",
        "Address": "246 Maple Rd, Anotherburg IL",
        "DOB": "1988-11-04",
        "IssueDate": "2018-12-01",
        "ExpiryDate": "2028-10-30",
    }
    </document_provided>


    <REASON>
        IDNumber is exactly the same
        FirstName is exactly the same
        MiddleName is exactly the same
        LastName is exactly the same
        Address is exactly the same
        DOB is exactly the same
        IssueDate is exactly the same
        ExpiryDate is not exactly the same
    </REASON>
    
    <SCORE>D</SCORE>


</example_7>
"""

In [16]:
output = session.query(User).filter_by(IDNumber="J0123456").first()

ground_truth = {
    "IDNumber": output.IDNumber,
    "FirstName": output.FirstName,
    "MiddleName": output.MiddleName,
    "LastName": output.LastName,
    "Address": output.Address,
    "DOB": output.DOB,
    "IssueDate": output.IssueDate,
    "ExpiryDate": output.ExpiryDate,
}

In [17]:
ground_truth

{'IDNumber': 'J0123456',
 'FirstName': 'Ashley',
 'MiddleName': 'Elizabeth',
 'LastName': 'Taylor',
 'Address': '753 Maple St, Citytown NJ',
 'DOB': '1989-10-05',
 'IssueDate': '2016-11-01',
 'ExpiryDate': '2026-10-31'}

In [18]:
predictions = [
    {
        "IDNumber": "J0123456",
        "FirstName": "Ashley",
        "MiddleName": "Elizabeth",
        "LastName": "Taylor",
        "Address": "753 Maple St, Citytown NJ",
        "DOB": "1989-10-05",
        "IssueDate": "2016-11-01",
        "ExpiryDate": "2026-10-31",
    },
    {
        "IDNumber": "J0123456",
        "FirstName": "Ashley",
        "MiddleName": "Elizabeth",
        "LastName": "",
        "Address": "753 Maple St, Citytown NJ",
        "DOB": "1989-10-05",
        "IssueDate": "2016-11-01",
        "ExpiryDate": "2026-10-31",
    },
    {
        "IDNumber": "J0123456",
        "FirstName": "Ashley",
        "MiddleName": "Elizabeth",
        "LastName": "Taylor",
        "Address": "753 Maple Street, Citytown NJ",
        "DOB": "1989-10-05",
        "IssueDate": "2016-11-01",
        "ExpiryDate": "2026-10-31",
    },
    {
        "IDNumber": "M0123456",
        "FirstName": "Ashley",
        "MiddleName": "Elizabeth",
        "LastName": "Taylor",
        "Address": "753 Maple St, Citytown NJ",
        "DOB": "1989-10-05",
        "IssueDate": "2016-11-01",
        "ExpiryDate": "2026-10-31",
    },
    {
        "IDNumber": "J0123456",
        "FirstName": "Ashley",
        "MiddleName": "Elizabeth",
        "LastName": "Taylor",
        "Address": "753 Maple St, Citytown NJ",
        "DOB": "1989-10-05",
        "IssueDate": "2017-11-01",
        "ExpiryDate": "2026-10-31",
    },
    {
        "IDNumber": "J0123456",
        "FirstName": "Ashley",
        "MiddleName": "Elizabeth",
        "LastName": "Taylor",
        "Address": "753 Maple St, Citytown NJ",
        "DOB": "1989-10-05",
        "IssueDate": "2016-11-01",
        "ExpiryDate": "2028-10-28",
    },
]

In [19]:
for idx, prediction in enumerate(predictions):
    messages = [
        {
            "role": "user",
            "content": [
                {
                    "text": evaluation_few_shot_learning,
                },
                {
                    "text": evaluation_prompt.replace(
                        "{{TRUTH}}", str(ground_truth)
                    ).replace("{{PREDICTION}}", str(prediction)),
                },
            ],
        }
    ]
    response = bedrock.converse(
        modelId="anthropic.claude-3-sonnet-20240229-v1:0",
        system=[{"text": evaluation_system_prompt}],
        messages=messages,
        inferenceConfig={"maxTokens": 2000, "temperature": 0.0, "topP": 0.5},
        additionalModelRequestFields={"top_k": 250},
    )

    print(f"-------------------------{idx + 1}-------------------------")
    print(f"Ground truth:\n{ground_truth}", f"\n\nPrediction:\n{prediction}", "\n\n" + response["output"]["message"]["content"][0]["text"] + "\n\n")

-------------------------1-------------------------
Ground truth:
{'IDNumber': 'J0123456', 'FirstName': 'Ashley', 'MiddleName': 'Elizabeth', 'LastName': 'Taylor', 'Address': '753 Maple St, Citytown NJ', 'DOB': '1989-10-05', 'IssueDate': '2016-11-01', 'ExpiryDate': '2026-10-31'} 

Prediction:
{'IDNumber': 'J0123456', 'FirstName': 'Ashley', 'MiddleName': 'Elizabeth', 'LastName': 'Taylor', 'Address': '753 Maple St, Citytown NJ', 'DOB': '1989-10-05', 'IssueDate': '2016-11-01', 'ExpiryDate': '2026-10-31'} 

<REASON>
IDNumber is exactly the same
FirstName is exactly the same
MiddleName is exactly the same
LastName is exactly the same
Address is exactly the same
DOB is exactly the same
IssueDate is exactly the same
ExpiryDate is exactly the same
</REASON>

<SCORE>A</SCORE>

The provided document matches the ground truth exactly for all fields, including the critical fields like IDNumber, DOB, IssueDate, and ExpiryDate. There are no discrepancies or missing information. Therefore, this documen

# Tool use

In [20]:
class ToolsList:
    def get_similarity_score(self, ground_truth, prediction):
        messages = [
            {
                "role": "user",
                "content": [
                    {
                        "text": evaluation_few_shot_learning,
                    },
                    {
                        "text": evaluation_prompt.replace(
                            "{{TRUTH}}", str(ground_truth)
                        ).replace("{{PREDICTION}}", str(prediction)),
                    },
                ],
            }
        ]

        response = bedrock.converse(
            modelId="anthropic.claude-3-sonnet-20240229-v1:0",
            system=[{"text": evaluation_system_prompt}],
            messages=messages,
            inferenceConfig={"maxTokens": 2000, "temperature": 0.0, "topP": 0.5},
            additionalModelRequestFields={"top_k": 250},
        )

        return response["output"]["message"]["content"][0]["text"]

    def get_ground_truth(self, IDNumber):
        output = session.query(User).filter_by(IDNumber=IDNumber).first()
        return {
            "IDNumber": output.IDNumber,
            "FirstName": output.FirstName,
            "MiddleName": output.MiddleName,
            "LastName": output.LastName,
            "Address": output.Address,
            "DOB": output.DOB,
            "IssueDate": output.IssueDate,
            "ExpiryDate": output.ExpiryDate,
        }

    def extract_info(
        self,
        IDNumber,
        FirstName,
        MiddleName,
        LastName,
        Address,
        DOB,
        IssueDate,
        ExpiryDate,
    ):
        return {
            "IDNumber": IDNumber,
            "FirstName": FirstName,
            "MiddleName": MiddleName,
            "LastName": LastName,
            "Address": Address,
            "DOB": DOB,
            "IssueDate": IssueDate,
            "ExpiryDate": ExpiryDate,
        }

## Tools

In [21]:
# Define the configuration for our tool...
toolConfig = {
    "tools": [],
    "toolChoice": {
        "auto": {},
        #'any': {},
        #'tool': {
        #    'name': 'get_weather'
        # }
    },
}

toolConfig["tools"].append(
    {
        "toolSpec": {
            "name": "get_similarity_score",
            "description": "Get similarity score between ground truth and prediction.",
            "inputSchema": {
                "json": {
                    "type": "object",
                    "properties": {
                        "ground_truth": {
                            "type": "object",
                            "description": "Ground truth to calculate similartiy score.",
                        },
                        "prediction": {
                            "type": "object",
                            "description": "Prediction to calculate similartiy score.",
                        },
                    },
                    "required": ["ground_truth", "prediction"],
                }
            },
        }
    }
)

toolConfig["tools"].append(
    {
        "toolSpec": {
            "name": "get_ground_truth",
            "description": "Get ground truth.",
            "inputSchema": {
                "json": {
                    "type": "object",
                    "properties": {
                        "IDNumber": {
                            "type": "object",
                            "description": "Ground truth to calculate similartiy score.",
                        }
                    },
                    "required": ["IDNumber"],
                }
            },
        }
    }
)

toolConfig["tools"].append(
    {
        "toolSpec": {
            "name": "extract_info",
            "description": "Extract the following details from the Driver's License provided",
            "inputSchema": {
                "json": {
                    "type": "object",
                    "properties": {
                        "IDNumber": {
                            "type": "string",
                            "description": "The ID Number of the document",
                        },
                        "FirstName": {
                            "type": "string",
                            "description": "The first name of the individual",
                        },
                        "MiddleName": {
                            "type": "string",
                            "description": "The middle name of the individual",
                        },
                        "LastName": {
                            "type": "string",
                            "description": "The last name of the individual",
                        },
                        "Address": {
                            "type": "string",
                            "description": "The Address of the individual",
                        },
                        "DOB": {
                            "type": "string",
                            "description": "The date of birth of the individual",
                        },
                        "IssueDate": {
                            "type": "string",
                            "description": "The issue date of the document",
                        },
                        "ExpiryDate": {
                            "type": "string",
                            "description": "The expiry date of the document",
                        },
                    },
                    "required": [
                        "IDNumber",
                        "FirstName",
                        "MiddleName",
                        "LastName",
                        "Address",
                        "DOB",
                        "IssueDate",
                        "ExpiryDate",
                    ],
                }
            },
        }
    }
)

## Utilities

In [22]:
# Function for caling the Bedrock Converse API...
def converse_with_tools(messages, toolConfig, system=""):
    modelId = "anthropic.claude-3-sonnet-20240229-v1:0"
    if system:
        response = bedrock.converse(
            modelId=modelId, system=system, messages=messages, toolConfig=toolConfig, inferenceConfig={"maxTokens": 2000, "temperature": 0.0, "topP": 0.5},
        additionalModelRequestFields={"top_k": 250},
        )
    else:
        response = bedrock.converse(
            modelId=modelId, messages=messages, toolConfig=toolConfig, inferenceConfig={"maxTokens": 2000, "temperature": 0.0, "topP": 0.5},
        additionalModelRequestFields={"top_k": 250},
        )
    return response

In [23]:
def converse(prompt, system, toolConfig, image_byte=None, image_type=None):
    status = True
    if image_byte is not None and image_type is not None:
        messages = []
        messages.append(
            {
                "role": "user",
                "content": [
                    {
                        "image": {
                            "format": image_type,
                            "source": {"bytes": image_byte},
                        }
                    },
                    {"text": prompt},
                ],
            }
        )
    elif type(prompt) == str:
        messages = []
        messages.append({"role": "user", "content": [{"text": prompt}]})
    else:
        messages = []
        messages.append({"role": "user", "content": prompt})


    # Invoke the model the first time:
    output = converse_with_tools(
        messages=messages, system=system, toolConfig=toolConfig
    )
    print(
        f"\n{datetime.now().strftime('%H:%M:%S')} - Output so far:\n{json.dumps(output['output'], indent=2, ensure_ascii=False)}"
    )

    # Add the intermediate output to the prompt:
    messages.append(output["output"]["message"])

    function_calling = next(
        (
            c["toolUse"]
            for c in output["output"]["message"]["content"]
            if "toolUse" in c
        ),
        None,
    )

    # Check if function calling is triggered:
    if function_calling:
        # Get the tool name and arguments:
        tool_name = function_calling["name"]
        tool_args = function_calling["input"] or {}

        # Run the tool:
        print(
            f"\n{datetime.now().strftime('%H:%M:%S')} - Running ({tool_name}) tool..."
        )
        tool_response = getattr(ToolsList(), tool_name)(**tool_args) or ""
        if tool_response:
            tool_status = "success"
        else:
            tool_status = "error"
            status = False

        return tool_response, status
    return output["output"]["message"]["content"][0]["text"], True

# Run

In [24]:
with open("DriverLicense1.png", "rb") as image:
    f = image.read()
    image_byte = bytearray(f)
    image_type = "png"

prediction, status = converse(
    prompt="Use the extract_info tool based on the image given",
    system=None,
    image_byte=image_byte,
    image_type=image_type,
    toolConfig=toolConfig,
)


01:30:29 - Output so far:
{
  "message": {
    "role": "assistant",
    "content": [
      {
        "text": "Okay, let's extract the relevant information from the driver's license image using the extract_info tool:"
      },
      {
        "toolUse": {
          "toolUseId": "tooluse_Jz25dc31QLybdMzL_U3_Iw",
          "name": "extract_info",
          "input": {
            "IDNumber": "123 456 789",
            "FirstName": "MICHELLE",
            "MiddleName": "MARIE",
            "LastName": "MOTORIST",
            "Address": "2345 ANYWHERE STREET ALBANY, NY 12222",
            "DOB": "10/31/1990",
            "IssueDate": "03/07/2022",
            "ExpiryDate": "10/31/2029"
          }
        }
      }
    ]
  }
}

01:30:29 - Running (extract_info) tool...


In [25]:
prediction

{'IDNumber': '123 456 789',
 'FirstName': 'MICHELLE',
 'MiddleName': 'MARIE',
 'LastName': 'MOTORIST',
 'Address': '2345 ANYWHERE STREET ALBANY, NY 12222',
 'DOB': '10/31/1990',
 'IssueDate': '03/07/2022',
 'ExpiryDate': '10/31/2029'}

In [26]:
ground_truth, status = converse(
    prompt=f"Get ground truth for IDNumber={prediction['IDNumber'].replace(' ', '')}",
    system=None,
    toolConfig=toolConfig,
)


01:30:35 - Output so far:
{
  "message": {
    "role": "assistant",
    "content": [
      {
        "text": "Here is how we can get the ground truth for IDNumber 123456789:"
      },
      {
        "toolUse": {
          "toolUseId": "tooluse_Jrnhk3UUTRKqm6YaQHufDg",
          "name": "get_ground_truth",
          "input": {
            "IDNumber": "123456789"
          }
        }
      }
    ]
  }
}

01:30:35 - Running (get_ground_truth) tool...


In [27]:
ground_truth

{'IDNumber': '123456789',
 'FirstName': 'Marie',
 'MiddleName': '',
 'LastName': 'Michelle',
 'Address': '2345 Anywhere street, Albany NY',
 'DOB': '1990-10-31',
 'IssueDate': '2022-03-07',
 'ExpiryDate': '2029-10-31'}

In [28]:
score, status = converse(
    prompt=[
        {
            "text": evaluation_few_shot_learning,
        },
        {
            "text": evaluation_prompt.replace("{{TRUTH}}", str(ground_truth)).replace(
                "{{PREDICTION}}", str(prediction)
            ),
        },
    ],
    system=[{"text": evaluation_system_prompt}],
    toolConfig=toolConfig,
)


01:30:45 - Output so far:
{
  "message": {
    "role": "assistant",
    "content": [
      {
        "text": "<REASON>\nIDNumber: The IDNumber in the provided document is formatted differently (with spaces) compared to the ground truth, but the digits are the same.\n\nFirstName: The provided document has the first and middle names swapped compared to the ground truth.\n\nMiddleName: The provided document has the first and middle names swapped compared to the ground truth.\n\nLastName: The LastName in the provided document is different from the ground truth.\n\nAddress: The provided document has the address in a different format (all uppercase, with ZIP code) compared to the ground truth, but the street and city details match.\n\nDOB: The DOB in the provided document is formatted differently (MM/DD/YYYY) compared to the ground truth, but the date is the same.\n\nIssueDate: The IssueDate in the provided document is formatted differently (MM/DD/YYYY) compared to the ground truth, but the

In [29]:
print(score)

<REASON>
IDNumber: The IDNumber in the provided document is formatted differently (with spaces) compared to the ground truth, but the digits are the same.

FirstName: The provided document has the first and middle names swapped compared to the ground truth.

MiddleName: The provided document has the first and middle names swapped compared to the ground truth.

LastName: The LastName in the provided document is different from the ground truth.

Address: The provided document has the address in a different format (all uppercase, with ZIP code) compared to the ground truth, but the street and city details match.

DOB: The DOB in the provided document is formatted differently (MM/DD/YYYY) compared to the ground truth, but the date is the same.

IssueDate: The IssueDate in the provided document is formatted differently (MM/DD/YYYY) compared to the ground truth, but the date is the same.

ExpiryDate: The ExpiryDate in the provided document is formatted differently (MM/DD/YYYY) compared to th

# Workflow Orchestration

In [30]:
def workflow(image_byte, image_type):

    prediction, status = converse(
        prompt="Use the extract_info tool based on the image given",
        system=None,
        image_byte=image_byte,
        image_type=image_type,
        toolConfig=toolConfig,
    )
    print(prediction)
    
    try:
        ground_truth, status = converse(
            prompt=f"Get ground truth for IDNumber={prediction['IDNumber'].replace(' ', '').replace('-', '')}",
            system=None,
            toolConfig=toolConfig,
        )
        print(ground_truth)
    except Exception as ex:
        status = False

    if status:
        score, status = converse(
            prompt=[
                {
                    "text": evaluation_few_shot_learning,
                },
                {
                    "text": evaluation_prompt.replace(
                        "{{TRUTH}}", str(ground_truth)
                    ).replace("{{PREDICTION}}", str(prediction)),
                },
            ],
            system=[{"text": evaluation_system_prompt}],
            toolConfig=toolConfig,
        )

    else:
        return "Requires human review!"

    return score

In [31]:
with open("DriverLicense3.PNG", "rb") as image:
    f = image.read()
    image_byte = bytearray(f)
    image_type = "png"
    score = workflow(image_byte, image_type)
    print(score)



01:30:49 - Output so far:
{
  "message": {
    "role": "assistant",
    "content": [
      {
        "toolUse": {
          "toolUseId": "tooluse_3g0wAZjWSlCRbtyyQajwlg",
          "name": "extract_info",
          "input": {
            "IDNumber": "123-45-789",
            "FirstName": "BRENNA",
            "MiddleName": "ANNA",
            "LastName": "MURPHY",
            "Address": "111 ANYWHERE STREET, NY, 67890-101",
            "DOB": "01/01/1970",
            "IssueDate": "08/20/2012",
            "ExpiryDate": "08/20/2020"
          }
        }
      }
    ]
  }
}

01:30:49 - Running (extract_info) tool...
{'IDNumber': '123-45-789', 'FirstName': 'BRENNA', 'MiddleName': 'ANNA', 'LastName': 'MURPHY', 'Address': '111 ANYWHERE STREET, NY, 67890-101', 'DOB': '01/01/1970', 'IssueDate': '08/20/2012', 'ExpiryDate': '08/20/2020'}

01:30:52 - Output so far:
{
  "message": {
    "role": "assistant",
    "content": [
      {
        "text": "Here is how we can get the ground truth for I