In [3]:
region = 'us-west-2'
host = 'https://b19fa6b2rsx72rc9osh8.us-west-2.aoss.amazonaws.com'

In [4]:
# Knowledge Base --> A knowledge base is a database that contains information about
# a product, a service, a topic, or a knowledge domain. 

import boto3
import json
import requests
from requests_aws4auth import AWS4Auth

def get_embedding(bedrock, text):
    modelId = 'amazon.titan-embed-text-v1'
    accept = 'application/json'
    contentType = 'application/json'
    input = {
            'inputText': text
        }
    body=json.dumps(input)
    response = bedrock.invoke_model(
        body=body, modelId=modelId, accept=accept,contentType=contentType)
    response_body = json.loads(response.get('body').read())
    embedding = response_body['embedding']
    return embedding
    
# main function
service = 'aoss'
credentials = boto3.Session().get_credentials()
awsauth = AWS4Auth(credentials.access_key, credentials.secret_key, region, service, session_token=credentials.token)
index = 'demo-index'
datatype = '_doc'
url = host + '/' + index + '/' + datatype
headers = {'Content-Type': 'application/json'}
bedrock = boto3.client(service_name='bedrock-runtime')
dataset = {
    'The theory of general relativity says that the observed gravitational effect between masses results from their warping of spacetime.',
    'Quantum mechanics allows the calculation of properties and behaviour of physical systems. It is typically applied to microscopic systems: molecules, atoms and sub-atomic particles.', 
    'Wavelet theory is essentially the continuous-time theory that corresponds to dyadic subband transforms — i.e., those where the L (LL) subband is recursively split over and over.',
    'Every particle attracts every other particle in the universe with a force that is proportional to the product of their masses and inversely proportional to the square of the distance between their centers.',
    'The electromagnetic spectrum is the range of frequencies (the spectrum) of electromagnetic radiation and their respective wavelengths and photon energies.'
}
for entry in dataset:
    embedding = get_embedding(bedrock, entry)
    document = {
        'embedding': embedding,
        'content': entry
    }
    response = requests.post(url, auth=awsauth, json=document, headers=headers)

In [6]:
# Perform a Search

def get_embedding(bedrock, text):
    modelId = 'amazon.titan-embed-text-v1'
    accept = 'application/json'
    contentType = 'application/json'
    input = {
            'inputText': text
        }
    body=json.dumps(input)
    response = bedrock.invoke_model(
        body=body, modelId=modelId, accept=accept,contentType=contentType)
    response_body = json.loads(response.get('body').read())
    embedding = response_body['embedding']
    return embedding

def search(region, host, index, embedding, limit):
    credentials = boto3.Session().get_credentials()
    awsauth = AWS4Auth(
        credentials.access_key, 
        credentials.secret_key, 
        region, 
        "aoss", 
        session_token=credentials.token
    )
    datatype = '_search'
    url = host + '/' + index + '/' + datatype
    headers = {'Content-Type': 'application/json'}
    document = {
        'size': limit,
        'query': {
            'knn': {
                'embedding': {
                    'vector': embedding,
                    'k': limit
                }
            }
        }
    }
    response = requests.get(url, auth=awsauth, json=document, headers=headers)
    response.raise_for_status()
    return response.json()

# main function
bedrock = boto3.client(
    service_name='bedrock-runtime'
)
query = 'Albert Einstein'
embedding = get_embedding(bedrock, query)
index = 'demo-index'
limit = 3
result = search(region, host, index, embedding, limit)

for item in result['hits']['hits']:
    print(item['_source']['content'])


Albert Einstein teaches physics in London.
Albert Einstein teaches physics in Paris.
Albert Einstein works as a support engineer in London.
