In [5]:
from dotenv import load_dotenv
load_dotenv()

import os
from langchain_openai import OpenAIEmbeddings
from langchain_community.vectorstores import Chroma
from langchain_community.document_loaders import PyPDFLoader
from langchain.text_splitter import RecursiveCharacterTextSplitter

# Set the OpenAI API key
os.environ['OPENAI_API_KEY'] = os.getenv('OPENAI_API_KEY')

# Load and split the PDF document
loader = PyPDFLoader('pdf_files/attention.pdf')
documents = loader.load()
text_splitter = RecursiveCharacterTextSplitter(chunk_size=1000, chunk_overlap=200)
splits = text_splitter.split_documents(documents)

# Create the embedding function
openai_embedding_function = OpenAIEmbeddings()

# Load the documents into Chroma with embeddings
openai_db = Chroma.from_documents(splits, openai_embedding_function, persist_directory="choma_embeddings/openai_embeddings")


In [2]:
query = "what is transformer"

In [3]:
retriever =openai_db.as_retriever(search_kwargs={"k": 1})
# Get relevant documents
relevant_documents = retriever.get_relevant_documents(query)

# Print the results
print("Relevant Documents:")
for doc in relevant_documents:
    print(doc.page_content)

  warn_deprecated(


Relevant Documents:
Attention Is All You Need
Ashish Vaswani∗
Google Brain
avaswani@google.comNoam Shazeer∗
Google Brain
noam@google.comNiki Parmar∗
Google Research
nikip@google.comJakob Uszkoreit∗
Google Research
usz@google.com
Llion Jones∗
Google Research
llion@google.comAidan N. Gomez∗†
University of Toronto
aidan@cs.toronto.eduŁukasz Kaiser∗
Google Brain
lukaszkaiser@google.com
Illia Polosukhin∗‡
illia.polosukhin@gmail.com
Abstract
The dominant sequence transduction models are based on complex recurrent or
convolutional neural networks that include an encoder and a decoder. The best
performing models also connect the encoder and decoder through an attention
mechanism. We propose a new simple network architecture, the Transformer,
based solely on attention mechanisms, dispensing with recurrence and convolutions
entirely. Experiments on two machine translation tasks show these models to
be superior in quality while being more parallelizable and requiring signiﬁcantly


In [6]:
import os
from langchain_cohere import CohereEmbeddings
from langchain_chroma import Chroma
from langchain_community.document_loaders import PyPDFLoader
from langchain_text_splitters import CharacterTextSplitter

# Set up the environment variable for the Cohere API key
os.environ['COHERE_API_KEY'] = os.getenv('COHERE_API_KEY')

# Create the embedding function
cohere_embedding_function = CohereEmbeddings()

# Load the documents into Chroma with embeddings
cohere_db = Chroma.from_documents(splits, cohere_embedding_function, persist_directory="choma_embeddings/cohere_embeddings")


In [7]:
retriever =cohere_db.as_retriever(search_kwargs={"k": 1})
# Get relevant documents
relevant_documents = retriever.get_relevant_documents(query)

# Print the results
print("Relevant Documents:")
for doc in relevant_documents:
    print(doc.page_content)

Relevant Documents:
multi-headed self-attention.
For translation tasks, the Transformer can be trained signiﬁcantly faster than architectures based
on recurrent or convolutional layers. On both WMT 2014 English-to-German and WMT 2014
English-to-French translation tasks, we achieve a new state of the art. In the former task our best
model outperforms even all previously reported ensembles.
We are excited about the future of attention-based models and plan to apply them to other tasks. We
plan to extend the Transformer to problems involving input and output modalities other than text and
to investigate local, restricted attention mechanisms to efﬁciently handle large inputs and outputs
such as images, audio and video. Making generation less sequential is another research goals of ours.
The code we used to train and evaluate our models is available at https://github.com/
tensorflow/tensor2tensor .
Acknowledgements We are grateful to Nal Kalchbrenner and Stephan Gouws for their fruitful


In [8]:
import bs4
from langchain_chroma import Chroma
from langchain_community.document_loaders import TextLoader
from langchain_community.embeddings.sentence_transformer import (
    SentenceTransformerEmbeddings,
)
from langchain_text_splitters import CharacterTextSplitter

# load the document and split it into chunks
# Load Documents


# split it into chunks
text_splitter = CharacterTextSplitter(chunk_size=1000, chunk_overlap=0)
docs = text_splitter.split_documents(documents)

# create the open-source embedding function
embedding_function = SentenceTransformerEmbeddings(model_name="all-mpnet-base-v2")

# load it into Chroma
mpnetdb = Chroma.from_documents(docs, embedding_function)




In [9]:
retriever =mpnetdb.as_retriever(search_kwargs={"k": 1})
# Get relevant documents
relevant_documents = retriever.get_relevant_documents(query)

# Print the results
print("Relevant Documents:")
for doc in relevant_documents:
    print(doc.page_content)

Relevant Documents:
Figure 1: The Transformer - model architecture.
wise fully connected feed-forward network. We employ a residual connection [ 10] around each of
the two sub-layers, followed by layer normalization [ 1]. That is, the output of each sub-layer is
LayerNorm( x+ Sublayer( x)), where Sublayer(x)is the function implemented by the sub-layer
itself. To facilitate these residual connections, all sub-layers in the model, as well as the embedding
layers, produce outputs of dimension dmodel = 512 .
Decoder: The decoder is also composed of a stack of N= 6identical layers. In addition to the two
sub-layers in each encoder layer, the decoder inserts a third sub-layer, which performs multi-head
attention over the output of the encoder stack. Similar to the encoder, we employ residual connections
around each of the sub-layers, followed by layer normalization. We also modify the self-attention
sub-layer in the decoder stack to prevent positions from attending to subsequent positions. T

In [11]:

from langchain_chroma import Chroma
from langchain_community.document_loaders import TextLoader
from langchain_community.embeddings.sentence_transformer import (
    SentenceTransformerEmbeddings,
)
from langchain_text_splitters import CharacterTextSplitter

# load the document and split it into chunks

# create the open-source embedding function
embedding_function = SentenceTransformerEmbeddings(model_name="all-MiniLM-L6-v2")
# load it into Chroma
minilmm= Chroma.from_documents(docs, embedding_function,persist_directory="choma_embeddings/minilm_embeddings")




In [12]:
retriever = minilmm.as_retriever(search_kwargs={"k": 1})
# Get relevant documents
relevant_documents = retriever.get_relevant_documents(query)

# Print the results
print("Relevant Documents:")
for doc in relevant_documents:
    print(doc.page_content)

Relevant Documents:
Figure 1: The Transformer - model architecture.
wise fully connected feed-forward network. We employ a residual connection [ 10] around each of
the two sub-layers, followed by layer normalization [ 1]. That is, the output of each sub-layer is
LayerNorm( x+ Sublayer( x)), where Sublayer(x)is the function implemented by the sub-layer
itself. To facilitate these residual connections, all sub-layers in the model, as well as the embedding
layers, produce outputs of dimension dmodel = 512 .
Decoder: The decoder is also composed of a stack of N= 6identical layers. In addition to the two
sub-layers in each encoder layer, the decoder inserts a third sub-layer, which performs multi-head
attention over the output of the encoder stack. Similar to the encoder, we employ residual connections
around each of the sub-layers, followed by layer normalization. We also modify the self-attention
sub-layer in the decoder stack to prevent positions from attending to subsequent positions. T