In [2]:

# Import necessary modules for embeddings, LLMs, and document loading
from langchain.embeddings import BedrockEmbeddings
from langchain.llms.bedrock import Bedrock
import boto3


In [3]:

# Load the PDF documents with PyPDFLoader
from langchain.document_loaders import PyPDFLoader

# Load and split the API and User Guide PDF files
loader = PyPDFLoader("/Users/sascha/Desktop/development/gen-ai-hackathon-aws/hackathon-workshop/files/bedrock-api.pdf")
api_pages = loader.load_and_split()

loader = PyPDFLoader("/Users/sascha/Desktop/development/gen-ai-hackathon-aws/hackathon-workshop/files/bedrock-ug.pdf")
ug_pages = loader.load_and_split()


In [15]:
print(len(api_pages))
print(len(ug_pages))

1232
1232


In [4]:

# Import JSON and initialize the Bedrock client
import json
bedrockruntime = boto3.client('bedrock-runtime')

# Initialize Bedrock embeddings using the Amazon Titan model
bedrock_embeddings = BedrockEmbeddings(model_id="amazon.titan-embed-text-v1", client=bedrockruntime)


In [5]:

# Create a vectorstore with Chroma from the loaded documents
from langchain.vectorstores import chroma

bigvectorstore_chroma = chroma.Chroma.from_documents(
    api_pages + ug_pages,
    bedrock_embeddings
)


In [6]:

# Import modules for the retrieval-based QA system and prompt templates
from langchain.chains import RetrievalQA
from langchain.prompts import PromptTemplate

# Define the prompt template for the QA model
prompt_template = """
Human: Use the following pieces of context to provide an accurate answer to the question at the end.
If you don't know the answer, just say that you don't know, don't try to make up an answer.

<context>
{context}
</context>

Question: {question}

Assistant:
"""


In [7]:

# Initialize the LLM using the Bedrock API and Anthropic Claude-v2 model
llm = Bedrock(model_id="anthropic.claude-v2", client=bedrockruntime, model_kwargs={'max_tokens_to_sample': 1024})

# Create the prompt template object
PROMPT = PromptTemplate(
    template=prompt_template, input_variables=["context", "question"]
)


In [8]:

# Initialize the RetrievalQA chain with the vectorstore and the LLM
qa = RetrievalQA.from_chain_type(
    llm=llm,
    chain_type="stuff",
    retriever=bigvectorstore_chroma.as_retriever(
        search_type="similarity", search_kwargs={"k": 5}
    ),
    return_source_documents=False,
    chain_type_kwargs={"prompt": PROMPT},
    verbose=True
)


In [13]:

# Define the query to be answered by the RetrievalQA chain
query = '''How to invoke a bedrock model with boto3? Provide a concrete example'''

# Get the result from the RetrievalQA chain
result = qa({"query": query})




[1m> Entering new RetrievalQA chain...[0m

[1m> Finished chain.[0m


In [14]:

# Display the result in Markdown format
from IPython.display import Markdown, display

display(Markdown(result['result']))


 Here is an example of how to invoke a Bedrock model using the Boto3 Python SDK:

```python
import boto3

bedrock = boto3.client('bedrock')

response = bedrock.create_model_invocation_job(
    JobName='MyInferenceJob',
    ModelId='abc123', 
    InputDataConfig={
        'S3Uri': 's3://mybucket/input.jsonl'  
    },
    OutputDataConfig={
        'S3Uri': 's3://mybucket/output/'
    },
    RoleArn='arn:aws:iam::123456789012:role/BedrockAccessRole'
)

print(response['JobArn'])
```

The key steps are:

- Create a Bedrock client 
- Call `create_model_invocation_job()`
- Pass the model ID, input/output S3 locations, and an IAM role 
- The response will contain the `JobArn` to monitor the status of the batch inference job

So this shows how to kick off an asynchronous inference job in Bedrock using Boto3. You would then need to check the job status and retrieve the results from S3 once it completes.