In [None]:
import json
import boto3
import os
import openai
from langchain.document_loaders import PyPDFLoader
from langchain.vectorstores import DeepLake 
from langchain_core.documents import Document 
from langchain_openai import OpenAIEmbeddings 
from langchain.retrievers.document_compressors import EmbeddingsFilter 
from langchain.retrievers import ContextualCompressionRetriever 
import requests 

In [None]:
# Assign Parameters
DEEP_LAKE_PATH = "s3://ragdata" #Path to S3 Bucket
PDF_FILE_PATH = "/path/to/your/file.pdf" #Add local file path to the PDF with your RAG data
OPENAI_API_KEY = "sk-xxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxx" #Add your OpenAI API Key for choosing embedding model


In [None]:
# Load RAG data 
try:
    
    pdf_loader = PyPDFLoader(PDF_FILE_PATH)  
    documents = pdf_loader.load()
    
except Exception as e:
    print(f"Error loading PDF: {e}")
    raise Exception("Error: failed to load RAG file")   

In [None]:
# Embed data to vector store

embeddings = OpenAIEmbeddings(api_key=OPENAI_API_KEY, model="text-embedding-3-small") 
vector_store = DeepLake(dataset_path=DEEP_LAKE_PATH, embedding=embeddings)

vector_store.add_documents(documents)

In [None]:
# Function to tailor document output 
def pretty_print_docs(docs):
    print(f"\n{'-' * 100}\n".join([f"Document {i+1}:\n\n" + d.page_content for i, d in enumerate(docs)]))

In [None]:
# Invoke basic retrieval across all documents

#embeddings = OpenAIEmbeddings(api_key=OPENAI_API_KEY, model="text-embedding-3-small") 
#vector_store = DeepLake(dataset_path=DEEP_LAKE_PATH, embedding_function=embeddings)
#retriever = vector_store.as_retriever()
#docs = retriever.invoke("How should I approach investing my 100000 dollars for retirement if I am 40 years old and risk adverse?")


In [None]:
# Optimized retrieval using a LLM search over documents

#from langchain.retrievers import ContextualCompressionRetriever
#from langchain.retrievers.document_compressors import LLMChainExtractor
#from langchain_openai import OpenAI

#choose scanner
#llm = OpenAI(api_key=OPENAI_API_KEY, temperature=0)
#choose extractor
#compressor = LLMChainExtractor.from_llm(llm)
# initialize retriever
#retriever = vector_store.as_retriever()
#compression_retriever = ContextualCompressionRetriever(base_compressor=compressor, base_retriever=retriever)

#compressed_docs = compression_retriever.invoke("How should I approach investing my 100000 dollars for retirement if I am 40 years old and risk adverse?")
#pretty_print_docs(compressed_docs)

In [None]:
# Optimized (AND LOWER COST) retrieval using a filter based on embeddings similarities and then a more targeted LLM search over documents
# https://python.langchain.com/docs/how_to/contextual_compression/


# choose embedding model for query
embeddings = OpenAIEmbeddings(api_key=OPENAI_API_KEY, model="text-embedding-3-small") 
# initialize vector store and retriever
vector_store = DeepLake(dataset_path=DEEP_LAKE_PATH, embedding=embeddings)
retriever = vector_store.as_retriever()

# set embedding similarity threshold to determine what information gets ignored (lower threshold = more relaxed constraint)
embeddings_filter = EmbeddingsFilter(embeddings=embeddings, similarity_threshold=0.3)
# initialize retriever based on filter settings and embedding settings 
compression_retriever = ContextualCompressionRetriever(base_compressor=embeddings_filter, base_retriever=retriever)

# pass the filtering query
compressed_docs = compression_retriever.invoke("How should I approach investing my 100000 dollars for retirement if I am 40 years old and risk adverse?")
# print queried results 
pretty_print_docs(compressed_docs)

In [None]:
# Define function to combine RAG, prompt engineering, and user request
def get_combined_response(user_query, compressed_docs, character):
    # Retrieve relevant documents based on the user query
    compressed_docs = compressed_docs
    character = character
    # Combine documents into a single text
    combined_docs_text = "\n".join([doc.page_content for doc in compressed_docs])
    
    # Combine user query and retrieved documents
    prompt = f"Who you are: {character}\n\nUser question: {user_query}\n\nContext from documents:\n{combined_docs_text}"
    

    return prompt
 

In [None]:
# Test the RAG Vector-Store Implementation - the below would be added to a Lambda function for production. 

# initiate bedrock
bedrock = boto3.client(service_name='bedrock', region_name='us-east-1')
bedrock_runtime = boto3.client(service_name='bedrock-runtime', region_name='us-east-1')

# run combined_response to bring all requests and context into the prompt
prompt = get_combined_response(user_query="How should I approach investing my 100000 dollars for retirement if I am 40 years old and risk adverse?", 
                               compressed_docs=compressed_docs, 
                               character="you are a financial advisor helping a user make financial decisions that benefit them")

# assign API inputs
inputs = json.dumps({
    "prompt": "\n\nHuman: "+ prompt + "\n\nAssistant:", 
    "temperature": 0.7, 
    "top_p": 0.901, 
    "top_k":250, 
    "max_tokens_to_sample": 3000, 
    "stop_sequences": ["\n\nHuman:"], 
    "anthropic_version": 'bedrock-2023-05-31'})
modelId = 'anthropic.claude-v2'
accept = 'application/json'
contentType = 'application/json'

# invoke bedrock API and call response
response = bedrock_runtime.invoke_model(body=inputs, modelId=modelId, accept=accept, contentType=contentType)
response_body = json.loads(response.get('body').read())
result = response_body['completion']

In [None]:
# Print response
print(result)