**Install Required Packages**

In [3]:
!pip install -q git+https://github.com/huggingface/transformers
!pip install -qU langchain Faiss-gpu tiktoken sentence-transformers
!pip install -qU trl Py7zr auto-gptq optimum
!pip install -q rank_bm25
!pip install -q PyPdf

  Installing build dependencies ... [?25l[?25hdone
  Getting requirements to build wheel ... [?25l[?25hdone
  Preparing metadata (pyproject.toml) ... [?25l[?25hdone
  Building wheel for transformers (pyproject.toml) ... [?25l[?25hdone
[2K     [90m━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━[0m [32m2.0/2.0 MB[0m [31m13.6 MB/s[0m eta [36m0:00:00[0m
[2K     [90m━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━[0m [32m85.5/85.5 MB[0m [31m9.2 MB/s[0m eta [36m0:00:00[0m
[2K     [90m━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━[0m [32m2.0/2.0 MB[0m [31m92.0 MB/s[0m eta [36m0:00:00[0m
[2K     [90m━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━[0m [32m86.0/86.0 kB[0m [31m12.1 MB/s[0m eta [36m0:00:00[0m
[?25h  Preparing metadata (setup.py) ... [?25l[?25hdone
[2K     [90m━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━[0m [32m177.8/177.8 kB[0m [31m26.0 MB/s[0m eta [36m0:00:00[0m
[2K     [90m━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━[0m [32m48.2/48.2 kB[0m [31m6.8 MB/s[

**Import Necessary Packages**

In [4]:
import langchain
from langchain.embeddings import CacheBackedEmbeddings,HuggingFaceEmbeddings
from langchain.vectorstores import FAISS
from langchain.storage import LocalFileStore
from langchain.retrievers import BM25Retriever,EnsembleRetriever
from langchain.document_loaders import PyPDFLoader,DirectoryLoader
from langchain.llms import HuggingFacePipeline
from langchain.cache import InMemoryCache
from langchain.text_splitter import RecursiveCharacterTextSplitter
from langchain.schema import prompt
from langchain.chains import RetrievalQA
from langchain.callbacks import StdOutCallbackHandler
from langchain import PromptTemplate
#
from transformers import AutoModelForCausalLM, AutoTokenizer, pipeline

**Data parsing and Loading using LangChain**

In [9]:
dir_loader = DirectoryLoader("/content/sample_data",
                             glob="*.pdf",
                             loader_cls=PyPDFLoader)
docs = dir_loader.load()
#
print(f"len of documents in :{len(docs)}")

len of documents in :88


**Create Managebale pieces of text by using RecursiveCharacterTextSplitter to create chunks for reviews**


In [10]:
#
text_splitter = RecursiveCharacterTextSplitter(chunk_size=500,
                                      chunk_overlap=200,)
#
esops_documents = text_splitter.transform_documents(docs)
print(f"number of chunks in amazon documents : {len(esops_documents)}")


number of chunks in amazon documents : 1039


**Create Vectorstore**

*   Here we will leverage a CacheBackedEmbeddings to prevent us from re-embedding similar queries over and over again.
*   Structured documents will be passed into a useful format for querying ,retrieving and use in LLM application
*   here we will use FAISS(Facebook AI similarity search) as the vectorstore.

In [11]:
store = LocalFileStore("./cache/")
embed_model_id = 'BAAI/bge-small-en-v1.5'
core_embeddings_model = HuggingFaceEmbeddings(model_name=embed_model_id)
embedder = CacheBackedEmbeddings.from_bytes_store(core_embeddings_model,
                                                  store,
                                                  namespace=embed_model_id)
# Create VectorStore
vectorstore = FAISS.from_documents(esops_documents,embedder)

**Create Sparse Embedding**

In [12]:
bm25_retriever = BM25Retriever.from_documents(esops_documents)
bm25_retriever.k=5

**Retrieve passages from the vectorstore that are similar to the query**

In [13]:
query = "what is the revenue of AWS in 2022?"
embedding_vector = core_embeddings_model.embed_query(query)
print(len(embedding_vector))
#
docs_resp = vectorstore.similarity_search_by_vector(embedding_vector,k=3)
#
for page in docs_resp:
  print(page.page_content)
  print("\n")

384
ever in 2023. Overall, we remain confident about our plans to lower costs, reduce delivery times, and build a
meaningfully larger retail business with healthy operating margins.
AWS has an $85B annualized revenue run rate, is still early in its adoption curve, but at a juncture where it’s
critical to stay focused on what matters most to customers over the long-haul . Despite growing 29% year-over-


billion , and $58.3 billion  in 2021  and 2022 , which primarily reflect investments in technology infrastructure (the majority of 
which is to support AWS business growth) and in additional capacity to support our fulfillment network . We expect to continue 
these investments over time, with increased spending on technology infrastructure . We made cash payments, net of acquired


Included in “ Other long-term liabilities ” on our consolidated balance sheets was $2.2 billion  and $2.9 billion  of unearned 
revenue as of December 31, 2021  and 2022 . 
Additionally, we have performance o

**Check to see how much time CacheBackedEmbeddings pattern saves us**

In [14]:
%%timeit -n 1 -r 1
query = "what is the revenue of AWS in 2022?"
#
embedding_vector = core_embeddings_model.embed_query(query)
docs_resp = vectorstore.similarity_search_by_vector(embedding_vector,k=5)


22.9 ms ± 0 ns per loop (mean ± std. dev. of 1 run, 1 loop each)


**Setup Ensemble Retriever (Hybrid Search)**

In [15]:
faiss_retriever = vectorstore.as_retriever(search_kwargs={"k":5})
ensemble_retriever = EnsembleRetriever(retrievers=[bm25_retriever,faiss_retriever],
                                       weights=[0.5,0.5])

**Download the quantized GPTQ Model**

In [16]:
from transformers import AutoModelForCausalLM, AutoTokenizer, pipeline

model_name_or_path = "TheBloke/Mistral-7B-Instruct-v0.1-GPTQ"
# To use a different branch, change revision
# For example: revision="gptq-4bit-32g-actorder_True"
model = AutoModelForCausalLM.from_pretrained(model_name_or_path,
                                             device_map="auto",
                                             trust_remote_code=False,
                                             revision="gptq-8bit-32g-actorder_True")
#
tokenizer = AutoTokenizer.from_pretrained(model_name_or_path, use_fast=True)

config.json:   0%|          | 0.00/962 [00:00<?, ?B/s]



model.safetensors:   0%|          | 0.00/8.17G [00:00<?, ?B/s]

generation_config.json:   0%|          | 0.00/116 [00:00<?, ?B/s]

tokenizer_config.json:   0%|          | 0.00/1.46k [00:00<?, ?B/s]

tokenizer.model:   0%|          | 0.00/493k [00:00<?, ?B/s]

tokenizer.json:   0%|          | 0.00/1.80M [00:00<?, ?B/s]

special_tokens_map.json:   0%|          | 0.00/72.0 [00:00<?, ?B/s]

**Create Pipeline**

In [17]:
pipe = pipeline(
    "text-generation",
    model=model,
    tokenizer=tokenizer,
    max_new_tokens=512,
    do_sample=True,
    temperature=0.1,
    top_p=0.95,
    top_k=40,
    repetition_penalty=1.1
)

**Initialize LLM using a quantized GPTQ Model**

In [18]:
from langchain.llms import HuggingFacePipeline
llm = HuggingFacePipeline(pipeline=pipe)

**Setup Caching**

In [19]:
langchain.llm_cache = InMemoryCache()

**Formulate the Prompt Template**

In [20]:
PROMPT_TEMPLATE = '''
You are my financial advisor. You are great at providing tips on investments, savings, and on financial markets with your knowledge in finances.
With the information being provided about Amazon's revenue dataset, try to answer the question.
If you can't answer the question based on the information, either say you can't find an answer or unable to find an answer.
So try to understand in depth about the context and answer only based on the information provided in the Amazon revenue dataset. Don't generate irrelevant answers.

Context: {context}
Question: {question}
Do provide only helpful answers

Helpful answer:
'''
#
input_variables = ['context', 'question']
#
custom_prompt = PromptTemplate(template=PROMPT_TEMPLATE,
                            input_variables=input_variables)

**Setup Retrieval chain — without Hybrid Search**

In [26]:
handler = StdOutCallbackHandler()
#
qa_with_sources_chain = RetrievalQA.from_chain_type(
    llm=llm,
    chain_type="stuff",
    retriever = vectorstore.as_retriever(search_kwargs={"k":5}),
    verbose=True,
    callbacks=[handler],
    chain_type_kwargs={"prompt": custom_prompt},
    return_source_documents=True
)

**Process user query1**

In [23]:
%%time
query = "Give me an overview of aws revenue in 2022?"
response = qa_with_sources_chain({"query":query})
print(f"Response generated : \n {response['result']}")
print(f"Source Documents : \n {response['source_documents']}")



[1m> Entering new RetrievalQA chain...[0m





[1m> Finished chain.[0m
Response generated : 
 
The AWS segment of Amazon reported $84.9 billion in revenue in 2022, up 29% year-over-year from $62.0 billion in 2021. This represents an annualized revenue run rate of $85.0 billion.
Source Documents : 
 [Document(page_content='billion , and $58.3 billion  in 2021  and 2022 , which primarily reflect investments in technology infrastructure (the majority of \nwhich is to support AWS business growth) and in additional capacity to support our fulfillment network . We expect to continue \nthese investments over time, with increased spending on technology infrastructure . We made cash payments, net of acquired', metadata={'source': '/content/sample_data/NASDAQ_AMZN_2022.pdf', 'page': 32}), Document(page_content='Included in “ Other long-term liabilities ” on our consolidated balance sheets was $2.2 billion  and $2.9 billion  of unearned \nrevenue as of December 31, 2021  and 2022 . \nAdditionally, we have performance obligations, primarily

**Setup Retrieval chain — with Hybrid Search**

In [24]:
#
handler = StdOutCallbackHandler()
#
qa_with_sources_chain = RetrievalQA.from_chain_type(
    llm=llm,
    chain_type="stuff",
    retriever = ensemble_retriever,
    callbacks=[handler],
    chain_type_kwargs={"prompt": custom_prompt},
    return_source_documents=True
)

**Process user query**

In [25]:
%%time
query = "Give me an overview of aws revenue in 2022?"
response = qa_with_sources_chain({"query":query})
print(f"Response generated : \n {response['result']}")
print(f"Source Documents : \n {response['source_documents']}")




[1m> Entering new RetrievalQA chain...[0m

[1m> Finished chain.[0m
Response generated : 
 
The AWS segment of Amazon reported $85 billion in annualized revenue run rate in 2022. This represents a 29% year-over-year growth from the previous year's revenue base of $62 billion.
Source Documents : 
 [Document(page_content='memberships.  Our total unearned revenue as of December 31, 2021  was $14.0 billion , of which $11.3 billion  was recognized as \nrevenue during the year ended December 31, 2022  and our total unearned revenue as of December 31, 2022  was $16.1 billion . \nIncluded in “ Other long-term liabilities ” on our consolidated balance sheets was $2.2 billion  and $2.9 billion  of unearned \nrevenue as of December 31, 2021  and 2022 .', metadata={'source': '/content/sample_data/NASDAQ_AMZN_2022.pdf', 'page': 59}), Document(page_content='billion , and $58.3 billion  in 2021  and 2022 , which primarily reflect investments in technology infrastructure (the majority of \nwhich 