In [1]:
import boto3
from langchain.llms.bedrock import Bedrock
from langchain.embeddings.bedrock import BedrockEmbeddings
from langchain.prompts import PromptTemplate
from langchain.chains import LLMChain

import numpy as np
from langchain.text_splitter import CharacterTextSplitter, RecursiveCharacterTextSplitter
from langchain.document_loaders import PyPDFLoader, PyPDFDirectoryLoader

from botocore.config import Config
retry_config = Config(
        region_name = 'us-east-1',
        retries = {
            'max_attempts': 10,
            'mode': 'standard'
        }
)

In [2]:
# Creating boto3 session by passing profile information. Profile can be parametrized depeding upon the env you are using
session = boto3.session.Session(profile_name='test-demo')

"""" 
btot3 provides two different client to ivoke bedrock operation.
1. bedrock : creating and managing Bedrock models.
2. bedrock-runtime : Running inference using Bedrock models.
"""
boto3_bedrock = session.client("bedrock", config=retry_config)
boto3_bedrock_runtime = session.client("bedrock-runtime", config=retry_config)


'''
We will implement RAG architecture. The goal is to build vectore store (Knowedge base to reduce hallucinations) 
so that model can refer to data we have provided.

To achieve this, we need to first source data (this can be archived PDF/docs/txt/csv/anyother datastore even sql tables) 
So the pipeline will be.
 1. Source datasets.
 2. Update If any transformation required. 
 3. Split and create chunks. [Used in NLP. It requires optimization to get  better output.]
 4. Create embedding using embedding modules [Can be used various modules available]
'''
EMBEDDINGS_MODEL_ID='amazon.titan-embed-text-v1'
brrkEmbeddings = BedrockEmbeddings(model_id=EMBEDDINGS_MODEL_ID,client=boto3_bedrock_runtime,)


In [3]:
def create_embeddings(directory_path):
    print(f"Loading directory {directory_path}")
    loader = PyPDFDirectoryLoader(directory_path)
    documents = loader.load()
    text_splitter = RecursiveCharacterTextSplitter(
    # Set a really small chunk size, just to show.
    chunk_size = 1000,
    chunk_overlap  = 100,
    )
    docs = text_splitter.split_documents(documents)
    avg_doc_length = lambda documents: sum([len(doc.page_content) for doc in documents])//len(documents)
    avg_char_count_pre = avg_doc_length(documents)
    avg_char_count_post = avg_doc_length(docs)
    print(f'Average length among {len(documents)} documents loaded is {avg_char_count_pre} characters.')
    print(f'After the split we have {len(docs)} documents more than the original {len(documents)}.')
    print(f'Average length among {len(docs)} documents (after split) is {avg_char_count_post} characters.')
    sample_embedding = np.array(brrkEmbeddings.embed_query(docs[0].page_content))
    print("Sample embedding of a document chunk: ", sample_embedding)
    print("Size of the embedding: ", sample_embedding.shape)


In [4]:
data_path = "C:\\Users\\RT\\OneDrive\\Desktop\\file"
create_embeddings(data_path)

Loading directory C:\Users\RT\OneDrive\Desktop\file
Average length among 16 documents loaded is 1517 characters.
After the split we have 35 documents more than the original 16.
Average length among 35 documents (after split) is 744 characters.
Sample embedding of a document chunk:  [ 0.19335938 -0.01031494  0.01507568 ...  0.23535156 -0.06787109
 -0.37890625]
Size of the embedding:  (1536,)


# New way

In [5]:
from langchain.chains import RetrievalQA
from langchain.prompts import PromptTemplate
import boto3
from langchain.llms.bedrock import Bedrock
from langchain.embeddings.bedrock import BedrockEmbeddings
from langchain.prompts import PromptTemplate
from langchain.chains import LLMChain
from langchain.vectorstores import FAISS

import os
from datetime import datetime

import numpy as np
from langchain.text_splitter import CharacterTextSplitter, RecursiveCharacterTextSplitter
from langchain.document_loaders import PyPDFLoader, PyPDFDirectoryLoader

from botocore.config import Config

In [6]:
retry_config = Config(
        region_name = 'us-east-1',
        retries = {
            'max_attempts': 10,
            'mode': 'standard'
        }
)

# Creating boto3 session by passing profile information. Profile can be parametrized depeding upon the env you are using
session = boto3.session.Session(profile_name='test-demo')

"""" 
btot3 provides two different client to ivoke bedrock operation.
1. bedrock : creating and managing Bedrock models.
2. bedrock-runtime : Running inference using Bedrock models.
"""
boto3_bedrock = session.client("bedrock", config=retry_config)
boto3_bedrock_runtime = session.client("bedrock-runtime", config=retry_config)

In [11]:
EMBEDDINGS_MODEL_ID='amazon.titan-embed-text-v1'
brrkEmbeddings = BedrockEmbeddings(model_id=EMBEDDINGS_MODEL_ID,client=boto3_bedrock_runtime,)
#bedrock_llm = Bedrock(model_id="ai21.j2-ultra-v1", client=boto3_bedrock_runtime, model_kwargs={'max_tokens_to_sample':200})
bedrock_llm = Bedrock(model_id="ai21.j2-ultra-v1", client=boto3_bedrock_runtime)

save_local_vector_store_path='C:\\Users\\RT/vectorstore/14112023235757.vs'

def load_local_vector_store(vector_store_path):
    try:
        with open(f"{vector_store_path}/embeddings_model_id", 'r') as f:
            embeddings_model_id = f.read()
        vector_store = FAISS.load_local(vector_store_path, brrkEmbeddings)
        print("Loaded vector store")
        return vector_store
    except Exception:
        print("Failed to load vector store, continuing creating one...")
        
print(load_local_vector_store(save_local_vector_store_path))

Loaded vector store
<langchain.vectorstores.faiss.FAISS object at 0x0000022C7A41CF90>


In [12]:
prompt_template = """Human: Use the following pieces of context to provide a concise answer to
the question at the end. If you don't know the answer, just say that you don't know, don't try to make up an answer.

{context}

Question: {question}
Assistant:"""
prompt = PromptTemplate(
    template=prompt_template, input_variables=["context", "question"]
)



In [13]:
def create_RetrievalQA_chain(query):
    print("Connecting to bedrock")
    vector_store = load_local_vector_store(save_local_vector_store_path)
    retriever = vector_store.as_retriever(
        search_type="similarity", search_kwargs={"k": 5, "include_metadata": True}
    )
    chain = RetrievalQA.from_chain_type(
        llm=bedrock_llm,
        chain_type="stuff",
        retriever=retriever,
        chain_type_kwargs={"prompt": prompt}
    )
    result = chain({"query": query})
    return result

In [21]:
query = "give names of LLM available to create QA app"
result=create_RetrievalQA_chain(query)
print(result['result'])

Connecting to bedrock
Loaded vector store

From the provided context, LLaMA and T5 by Google,  Einstein by Salesforce
