In [1]:
from langchain.vectorstores import FAISS
from langchain.embeddings import OpenAIEmbeddings
from langchain.text_splitter import  RecursiveCharacterTextSplitter
from langchain.llms import cohere
from langchain.chains import RetrievalQA
from langchain.document_loaders import TextLoader
from langchain.document_loaders import DirectoryLoader
from langchain_community.document_loaders import PyPDFLoader
from langchain_huggingface import HuggingFaceEmbeddings
from langchain_community.llms import Cohere
from langchain import HuggingFacePipeline
from langchain_groq import ChatGroq


In [2]:
from dotenv import load_dotenv
import os

load_dotenv()

cohere_api_key = os.getenv("COHERE_API_KEY")
groq_api_key=os.getenv("GROQ_API_KEY")

In [3]:
#pip install ipywidgets
#!pip install transformers
#!pip install --upgrade --force-reinstall transformers tokenizers

In [4]:
from tqdm.notebook import tqdm

In [5]:
loader=PyPDFLoader("C:\\Users\\SAUNAK MITRA\\Downloads\\NIPS-2017-attention-is-all-you-need-Paper.pdf")
document=loader.load()

In [6]:
print(document[0].page_content)

Attention Is All You Need
Ashish Vaswani∗
Google Brain
avaswani@google.com
Noam Shazeer∗
Google Brain
noam@google.com
Niki Parmar∗
Google Research
nikip@google.com
Jakob Uszkoreit∗
Google Research
usz@google.com
Llion Jones∗
Google Research
llion@google.com
Aidan N. Gomez∗†
University of Toronto
aidan@cs.toronto.edu
Łukasz Kaiser ∗
Google Brain
lukaszkaiser@google.com
Illia Polosukhin∗‡
illia.polosukhin@gmail.com
Abstract
The dominant sequence transduction models are based on complex recurrent or
convolutional neural networks that include an encoder and a decoder. The best
performing models also connect the encoder and decoder through an attention
mechanism. We propose a new simple network architecture, the Transformer,
based solely on attention mechanisms, dispensing with recurrence and convolutions
entirely. Experiments on two machine translation tasks show these models to
be superior in quality while being more parallelizable and requiring signiﬁcantly
less time to train. Our model 

In [7]:
text_splitter=RecursiveCharacterTextSplitter(chunk_size=500,chunk_overlap=50)

In [8]:
text_chunks=text_splitter.split_documents(document);print(text_chunks[0].page_content)

Attention Is All You Need
Ashish Vaswani∗
Google Brain
avaswani@google.com
Noam Shazeer∗
Google Brain
noam@google.com
Niki Parmar∗
Google Research
nikip@google.com
Jakob Uszkoreit∗
Google Research
usz@google.com
Llion Jones∗
Google Research
llion@google.com
Aidan N. Gomez∗†
University of Toronto
aidan@cs.toronto.edu
Łukasz Kaiser ∗
Google Brain
lukaszkaiser@google.com
Illia Polosukhin∗‡
illia.polosukhin@gmail.com
Abstract


In [9]:
embedding=HuggingFaceEmbeddings()

In [10]:
vectorstore=FAISS.from_documents(text_chunks,embedding)

In [11]:
retriever=vectorstore.as_retriever()

In [12]:
from langchain.prompts import ChatPromptTemplate

In [13]:
template="""You are an assistant for question_answering tasks.
Use the following pieces of retrieved context to answer the question. 
If you don't know the answer ,just say you don't know.
Use ten sentences maximum and keep the answer concise. 
Question:{question}
context: {context}
Answer:
"""

In [14]:
prompt=ChatPromptTemplate.from_template(template)

In [15]:
from langchain.schema.runnable import  RunnablePassthrough
from langchain.schema.output_parser import  StrOutputParser

In [17]:
llm=Cohere(cohere_api_key=cohere_api_key)

In [18]:
rag_chain = (
                        {"context": retriever , "question": RunnablePassthrough()}
                        | prompt
                        | llm
                        | StrOutputParser()
                    )

In [19]:
rag_chain.invoke("what is the heading of this paper?")

' The title of the paper is "Attention Is All You Need". \nThis paper demonstrates the use of attention mechanisms in a variety of tasks including reading comprehension, abstractive summarization, textual entailment, and learning task-independent sentence representations. \nIt introduces self-attention, a novel attention mechanism relating different positions of a single sequence in order to compute a representation of the sequence. \nIt also introduces the concept of multi-head attention, which allows the model to jointly attend to information from different representation subspaces. \nThe authors provide a thorough analysis of the importance and efficacy of attention mechanisms in these tasks. \nThis paper has been published at the Neural Information Processing Systems conference in 2017. '

In [21]:
llm1= ChatGroq(temperature=0, groq_api_key=groq_api_key, model_name="mixtral-8x7b-32768")


In [22]:
rag_chain1 = (
                        {"context": retriever , "question": RunnablePassthrough()}
                        | prompt
                        | llm1
                        | StrOutputParser()
                    )

In [23]:
rag_chain1.invoke("What is the 5th page about?")

'The 5th page of the document discusses the training regime for the models used in the study. It mentions that self-attention layers have lower computational complexity than recurrent layers when the sequence length is smaller than the representation dimensionality. This is often the case in sentence representations used by state-of-the-art models in machine translations. The page also discusses the use of self-attention for tasks involving very long sequences, suggesting a restriction to consider only a neighborhood of a certain size.'