In [None]:
pip install boto3 langchain langchain-pinecone

In [None]:
import boto3
client = boto3.client('textract',aws_access_key_id='',aws_secret_access_key='',region_name='')
response = client.detect_document_text(
    Document={
        'S3Object': {
            'Bucket': 'documentpark1',
            'Name': 'Apollo-11-Mission.jpeg'
        }
    }
)
response

In [None]:
output_file_name = 'demo_rag_on_image.txt'

In [None]:
with open(output_file_name, 'w') as output_file_io:
  for x in response['Blocks']:
    if x['BlockType'] == 'LINE':
      output_file_io.write(x["Text"]+'\n')


In [None]:
import os
import json
boto3_bedrock = boto3.client('bedrock-runtime',region_name='us-east-1',aws_access_key_id='',aws_secret_access_key='',)

In [None]:
from langchain.docstore.document import Document
from langchain.document_loaders import TextLoader
from langchain.embeddings import BedrockEmbeddings
from langchain.text_splitter import CharacterTextSplitter

In [None]:
# load the sagemaker FAQ list
with open('/content/demo_rag_on_image.txt') as f:
    doc = f.read()

# create a loader
docs = []
loader = TextLoader('')
docs.append(Document(page_content=doc))

# split documents into chunks
text_splitter = CharacterTextSplitter(
    separator='\n',
    chunk_size=300,
    chunk_overlap=25,

)
split_docs = text_splitter.split_documents(docs)

In [None]:
split_docs

In [None]:
os.environ['PINECONE_API_KEY'] = ''

In [None]:
embedding_model = BedrockEmbeddings(
    client=boto3_bedrock,
    model_id="amazon.titan-embed-text-v1"
)
from langchain_pinecone import PineconeVectorStore

index_name = "samplevttextract"

docsearch = PineconeVectorStore.from_documents(split_docs, embedding_model, index_name=index_name)

In [None]:

from langchain import PromptTemplate

RAG_PROMPT_TEMPLATE = '''Here is some important context which can help inform the questions the Human asks.
Make sure to not make anything up to answer the question if it is not provided in the context.


{context}


Human: {human_input}

Assistant:
'''
PROMPT = PromptTemplate.from_template(RAG_PROMPT_TEMPLATE)

In [None]:
from langchain.vectorstores import Pinecone
existing_search = Pinecone.from_existing_index(index_name=index_name, embedding=embedding_model)

In [None]:
human_input = input("Enter your question : ")
search_results = existing_search.similarity_search(human_input, k=3)
context_string = '\n\n'.join([f'Document {ind+1}: ' + i.page_content for ind, i in enumerate(search_results)])
prompt_data = PROMPT.format(human_input=human_input, context=context_string)
inputText=prompt_data
body_part=json.dumps({'inputText': inputText,
 'textGenerationConfig': {'maxTokenCount': 8192,
  'stopSequences': [],
  'temperature': 0,
  'topP': 1}})
response = boto3_bedrock.invoke_model(
    body=body_part,
    contentType="application/json",
    accept="application/json",
    modelId='amazon.titan-text-express-v1'
)
output_text=json.loads(response['body'].read())['results'][0]['outputText']
output_text