In [4]:
!pip install --upgrade --quiet langchain langchain-openai chromadb pypdf

In [20]:
import os
from google.colab import userdata
from langchain_community.document_loaders import PyPDFLoader
from langchain.text_splitter import RecursiveCharacterTextSplitter
from langchain_openai import OpenAI, OpenAIEmbeddings
from langchain_community.vectorstores import Chroma
from langchain.chains.question_answering import load_qa_chain
from langchain.chains import RetrievalQA, ConversationalRetrievalChain

os.environ['OPENAI_API_KEY'] = userdata.get('OPENAI_API_KEY')

In [6]:
loader = PyPDFLoader("/content/attention.pdf")
pages = loader.load()

In [7]:
text_splitter = RecursiveCharacterTextSplitter(
    # Set a really small chunk size, just to show.
    chunk_size=1000,
    chunk_overlap=800,
    length_function=len,
    is_separator_regex=False,
)

docs = text_splitter.split_documents(pages)

In [8]:
embeddings = OpenAIEmbeddings()

In [9]:
# save to disk
persist_directory="chroma_db"

index = Chroma.from_documents(docs, embeddings, persist_directory=persist_directory)

docs = index.similarity_search('What is attention mechanism')
print(docs)

[Document(page_content='described in section 3.2.\nSelf-attention, sometimes called intra-attention is an attention mechanism relating different positions\nof a single sequence in order to compute a representation of the sequence. Self-attention has been\nused successfully in a variety of tasks including reading comprehension, abstractive summarization,\ntextual entailment and learning task-independent sentence representations [4, 27, 28, 22].\nEnd-to-end memory networks are based on a recurrent attention mechanism instead of sequence-\naligned recurrence and have been shown to perform well on simple-language question answering and\nlanguage modeling tasks [34].\nTo the best of our knowledge, however, the Transformer is the first transduction model relying\nentirely on self-attention to compute representations of its input and output without using sequence-\naligned RNNs or convolution. In the following sections, we will describe the Transformer, motivate', metadata={'page': 1, 'source

In [10]:
# load from disk
persist_directory="chroma_db"

index = Chroma(persist_directory=persist_directory, embedding_function=embeddings)

docs = index.similarity_search('What is attention mechanism')
print(docs[0].page_content)

described in section 3.2.
Self-attention, sometimes called intra-attention is an attention mechanism relating different positions
of a single sequence in order to compute a representation of the sequence. Self-attention has been
used successfully in a variety of tasks including reading comprehension, abstractive summarization,
textual entailment and learning task-independent sentence representations [4, 27, 28, 22].
End-to-end memory networks are based on a recurrent attention mechanism instead of sequence-
aligned recurrence and have been shown to perform well on simple-language question answering and
language modeling tasks [34].
To the best of our knowledge, however, the Transformer is the first transduction model relying
entirely on self-attention to compute representations of its input and output without using sequence-
aligned RNNs or convolution. In the following sections, we will describe the Transformer, motivate


In [11]:
docs = index.similarity_search_with_score('What is attention mechanism')
docs

[(Document(page_content='described in section 3.2.\nSelf-attention, sometimes called intra-attention is an attention mechanism relating different positions\nof a single sequence in order to compute a representation of the sequence. Self-attention has been\nused successfully in a variety of tasks including reading comprehension, abstractive summarization,\ntextual entailment and learning task-independent sentence representations [4, 27, 28, 22].\nEnd-to-end memory networks are based on a recurrent attention mechanism instead of sequence-\naligned recurrence and have been shown to perform well on simple-language question answering and\nlanguage modeling tasks [34].\nTo the best of our knowledge, however, the Transformer is the first transduction model relying\nentirely on self-attention to compute representations of its input and output without using sequence-\naligned RNNs or convolution. In the following sections, we will describe the Transformer, motivate', metadata={'page': 1, 'sourc

# Retriever

In [12]:
retriever = index.as_retriever(search_type="mmr")

# Load QA Chain

In [13]:
llm = OpenAI(temperature=0)

chain = load_qa_chain(llm,
                      chain_type='stuff')

In [14]:
query = 'What is attention mechainsm?'
similarity_docs = index.similarity_search(query)
response = chain.run(question = query, input_documents = similarity_docs)
response

  warn_deprecated(


' Attention mechanism is a technique used in sequence modeling and transduction models to model dependencies between different positions of a sequence without considering their distance. It allows for parallelization and has been used successfully in various tasks such as reading comprehension, summarization, and language modeling.'

# Retrieval QA

In [15]:
query = 'What is attention mechanism ?'
qa_chain = RetrievalQA.from_chain_type(
    llm = llm,
    chain_type="stuff",
    retriever=retriever,
    return_source_documents=True
)

In [16]:
result = qa_chain.invoke({'query':query})

In [17]:
result['result']

' Attention mechanism is a computational method used in natural language processing tasks to relate different positions of a single sequence in order to compute a representation of the sequence. It has been successfully used in tasks such as reading comprehension, summarization, and language modeling. The Transformer model is the first to rely entirely on self-attention to compute representations without using recurrent neural networks or convolution.'

## Run contineously

In [19]:
while True:
  query = input('Question: ')

  if query == 'q' or query == 'quit' or query == 'exit':
    break
  elif query == ' ':
    continue
  else:
    result = qa_chain.invoke({'query':query})
    print(result['result'])

Question: What is attention mechanism
 Attention mechanism is a computational method used in natural language processing tasks to relate different positions of a sequence in order to compute a representation of the sequence. It has been successfully used in tasks such as reading comprehension, summarization, and language modeling. The Transformer model is the first to rely entirely on self-attention to compute representations without using recurrent neural networks or convolution.
Question: What is the main area of focus in attention paper
 The main area of focus in the attention paper is the Transformer, a sequence transduction model based entirely on attention. The paper discusses the use of attention in various tasks, such as translation, and compares it to other architectures, such as recurrent and convolutional layers. The paper also introduces a specific type of attention called "Scaled Dot-Product Attention" and compares it to other commonly used attention functions.
Question: q

# Adding Memory

In [21]:
convo_qa_chain = ConversationalRetrievalChain.from_llm(
    llm=llm,
    retriever=retriever,
    return_source_documents=True
)

In [22]:
# Todo: 6. Run chain
chat_history = []

while True:
    query = input('Question: ')

    if query == 'q' or query == 'quit' or query == 'exit':
      break
    elif query == ' ':
      continue
    else:
        result = convo_qa_chain.invoke({
            'question' : query,
            'chat_history' : chat_history
        })

        print(result)
        print("# ---------------------- #")
        print('History: ',result['chat_history'])
        print("# ---------------------- #")
        print('Answer: ',result['answer'])
        # Todo: We got to manually append chat history
        chat_history.append((query, result['answer']))

Question: What is attention mechanism
{'question': 'What is attention mechanism', 'chat_history': [], 'answer': ' Attention mechanism is a computational method used in natural language processing tasks to relate different positions of a sequence in order to compute a representation of the sequence. It has been successfully used in tasks such as reading comprehension, summarization, and language modeling. The Transformer model is the first to rely entirely on self-attention to compute representations without using recurrent neural networks or convolution.', 'source_documents': [Document(page_content='described in section 3.2.\nSelf-attention, sometimes called intra-attention is an attention mechanism relating different positions\nof a single sequence in order to compute a representation of the sequence. Self-attention has been\nused successfully in a variety of tasks including reading comprehension, abstractive summarization,\ntextual entailment and learning task-independent sentence re