In [None]:
import boto3
import json
import numpy as np


def respond(err, res=None):
    ret = {
        "statusCode": "400" if err else "200",
        "body": err.message if err else json.dumps(res),
        "headers": {
            "Content-Type": "application/json",
        },
    }
    return json.dumps(ret)


"""
input:
{
  "inputTexts": ["Hello World!", "Hello Nordea!"],
  "operation": "GetEmbeddings"
}

output:
{
    "embeddings": [[0.1, 0.2, 0.3], [0.4, 0.5, 0.6]],
    "operation": "GetEmbeddings",
}
"""

client = boto3.client("bedrock-runtime")
accept = "application/json"
contentType = "application/json"

operation2ModelId = {
    "GetEmbeddings": "amazon.titan-embed-text-v1",
    "GetEmbeddingsAndSimilarity": "amazon.titan-embed-text-v1",
    "GetDifference": "meta.llama3-70b-instruct-v1:0",
}


def processEmbedding(event):
    operation = event["operation"]
    modelId = operation2ModelId[operation]

    # invoke model a few times to get embeddings for all input texts
    inputTexts = event["inputTexts"]
    embeddings = []
    for inputText in inputTexts:

        if len(inputText) == 0:
            embeddings.append([])
            continue
        body = json.dumps(
            {
                "inputText": inputText,
            }
        )

        response = client.invoke_model(
            body=body, modelId=modelId, accept=accept, contentType=contentType
        )
        response_body = json.loads(response.get("body").read())

        embeddings.append(response_body["embedding"])

    response_dict = {"embeddings": embeddings, "operation": operation}

    response = respond(None, response_dict)
    return response

def str2List(string, seperate_char="@"):
    return string.split(seperate_char)
def processEmbeddingAndSimilarity(event):
    operation = event["operation"]
    modelId = operation2ModelId[operation]

    # invoke model a few times to get embeddings for all input texts
    inputTexts = event["inputTexts"]
    embeddings = []
    for inputText in inputTexts:
        if len(inputText) == 0:
            embeddings.append([])
            continue
        body = json.dumps(
            {
                "inputText": inputText,
            }
        )

        response = client.invoke_model(
            body=body, modelId=modelId, accept=accept, contentType=contentType
        )
        response_body = json.loads(response.get("body").read())

        embeddings.append(response_body["embedding"])
    
    # calculate similarity
    textNum = len(embeddings)
    similarity = np.eye(textNum)
    for i in range(textNum):
        for j in range(i+1, textNum):
            if len(embeddings[i]) == 0 or len(embeddings[j]) == 0:
                similarity[i][j] = 0
                similarity[j][i] = 0
                continue
            similarity[i][j] = np.dot(embeddings[i], embeddings[j]) / (np.linalg.norm(embeddings[i]) * np.linalg.norm(embeddings[j]))
            similarity[j][i] = similarity[i][j]

    distance = 1 - similarity

    response_dict = {"embeddings": embeddings, "similarity": similarity.tolist(), "operation": operation, "distance": distance.tolist()}
    
    response = respond(None, response_dict)
    return response

def processDifference(event):
    operation = event["operation"]
    modelId = operation2ModelId[operation]

    # invoke model a few times to get embeddings for all input texts
    inputTexts = event["inputTexts"]
    assert len(inputTexts) == 2, "Only support 2 input texts for GetDifference operation"
    articleNumber = event["articleNumber"]

    prompt = f"""
PROMPT FOR LLAMA 3 INSTRUCT 70 B

You are a legal expert specializing in the financial services sector. I will provide you with two versions (OLD/NEW) of an article from within EU regulation, along with a heading (ARTICLE NUMBER). Compare the two versions and comment on the differences between them. Specifically. mention important deletions and additions in your response.  I will provide some response templates below, then give you the REAL DATA to work with.

Typical EXAMPLE RESPONSE:

Article <ARTICLE NUMBER> has been amended as follows:

- <change 1, implication>
- <change 2, implication>
...


If OLD is empty, then ARTICLE NUMBER is a newly introduced article. On the other hand, if NEW is empty, then ARTICLE NUMBER has been struck down. In both these cases, respond as:

Article <ARTICLE NUMBER>, dealing with <Topic> has been newly introduced/struck down.

 <Brief Summary of implications>


If the OLD and NEW texts are completely identical, just respond as:

Article <ARTICLE NUMBER> is UNCHANGED.


REAL DATA

ARTICLE NUMBER: "{articleNumber}"

OLD: "{inputTexts[0]}"

NEW: "{inputTexts[1]}"
"""

    body = json.dumps(
        {
            "prompt": prompt,
            # "max_gen_len":512,
            "temperature":0.5,
            "top_p":0.9
        }
    )

    print(prompt)

    response = client.invoke_model(
        body=body, modelId=modelId, accept=accept, contentType=contentType
    )
    response_body = json.loads(response.get("body").read())

    response_dict = {"operation": operation, **response_body}
    
    response = respond(None, response_dict)
    return response

'''
params = {
  "inputTitles": "22@12",
  "inputTexts": inputTextsStr,
  "distances": "0.1@0.2@0.3@0.4@0.5",
  "operation": "GetSorted"
}
'''

def processSort(event):
    operation = event["operation"]

    # sort the input texts based on the distances
    inputTexts = event["inputTexts"]
    distances = event["distances"]
    titles = event["inputTitles"]

    print("inputTexts", inputTexts, len(inputTexts))
    print("distances", distances, len(distances))
    print("titles", titles, len(titles))
    assert len(inputTexts) == len(distances), "Length of input texts and distances should be the same"
    assert len(inputTexts) == len(titles), "Length of input texts and titles should be the same"
    
    sortedIndices = np.argsort(distances)[::-1]
    sortedInputTexts = [inputTexts[i] for i in sortedIndices]
    sortedTitles = [titles[i] for i in sortedIndices]
    sortedDistances = [distances[i] for i in sortedIndices]

    response_dict = {"operation": operation, "sortedInputTexts": sortedInputTexts, "sortedTitles": sortedTitles, "sortedDistances": sortedDistances}

    response = respond(None, response_dict)
    return response

def lambda_handler(event, context):
    print("Received event: " + json.dumps(event, indent=2))

    if not "operation" in event:
        event = event["queryStringParameters"]
        event["inputTexts"] = str2List(event["inputTexts"])
        if "distances" in event:
            event["distances"] = str2List(event["distances"])
            event["distances"] = [float(distance) for distance in event["distances"]]
        if "inputTitles" in event:
            event["inputTitles"] = str2List(event["inputTitles"])
            
    
    operation = event["operation"]
    
    if operation == "GetEmbeddings":
        return processEmbedding(event)
    elif operation == "GetEmbeddingsAndSimilarity":
        return processEmbeddingAndSimilarity(event)
    elif operation == "GetDifference":
        return processDifference(event)
    elif operation == "GetSorted":
        return processSort(event)
    else:
        return respond(Exception("Invalid operation"), None)
    

In [15]:
import requests
import json

# The URL of the API endpoint
url = 'https://fdbhhaqp4i.execute-api.us-west-2.amazonaws.com/NordXDev/'

seperate_char = "@"

inputTexts = ["Hello World!", "Hello Nordea!", ""]
inputTextsStr = seperate_char.join(inputTexts)

# Optional: Parameters to send with the request (if needed)
params = {
    # "body": {
  "inputTexts": inputTextsStr,
  "operation": "GetEmbeddingsAndSimilarity"
}

# Making a GET request
response = requests.get(url, params=params)

print(response)
# Check if the request was successful
if response.status_code == 200:
    # Process the response data (if response is JSON)
    data = response.json()['body']
    print(data)
else:
    print("Failed to retrieve data:", response.status_code)

<Response [500]>
Failed to retrieve data: 500


In [14]:
import requests
import json

# The URL of the API endpoint
url = 'https://fdbhhaqp4i.execute-api.us-west-2.amazonaws.com/NordXDev/'

seperate_char = "@"

inputTexts = ["Hello World!", "Hello Nordea!"]
inputTextsStr = seperate_char.join(inputTexts)

# Optional: Parameters to send with the request (if needed)
params = {
  "inputTexts": inputTextsStr,
  "articleNumber": "22",
  "operation": "GetDifference"
}

# Making a GET request
response = requests.get(url, params=params)

print(response)
# Check if the request was successful
if response.status_code == 200:
    # Process the response data (if response is JSON)
    data = response.json()['body']
    print(data)
else:
    print("Failed to retrieve data:", response.status_code)

<Response [200]>
{"operation": "GetDifference", "generation": "```\nArticle 22 has been amended as follows:\n\n- The phrase \"World\" has been replaced with \"Nordea\", which may indicate a shift in focus towards the Nordea banking group.", "prompt_token_count": 247, "generation_token_count": 40, "stop_reason": "stop"}


In [11]:
import requests
import json

# The URL of the API endpoint
url = 'https://fdbhhaqp4i.execute-api.us-west-2.amazonaws.com/NordXDev/'

seperate_char = "@"

inputTexts = ["Hello World!", "Hello Nordea!", "Hello Amazon!"]
inputTextsStr = seperate_char.join(inputTexts)

# Optional: Parameters to send with the request (if needed)
params = {
  "inputTitles": "22@12@55",
  "inputTexts": inputTextsStr,
  "distances": "0.1@0.3@0.2",
  "operation": "GetSorted"
}

# Making a GET request
response = requests.get(url, params=params)

print(response)
# Check if the request was successful
if response.status_code == 200:
    # Process the response data (if response is JSON)
    data = response.json()['body']
    print(data)
else:
    print("Failed to retrieve data:", response.status_code)

<Response [200]>
{"operation": "GetSorted", "sortedInputTexts": ["Hello Nordea!", "Hello Amazon!", "Hello World!"], "sortedTitles": ["12", "55", "22"], "sortedDistances": [0.3, 0.2, 0.1]}
