In [5]:
import os
from dotenv import load_dotenv
load_dotenv()
os.environ["GOOGLE_API_KEY"] = os.getenv("GEMINI_API_KEY")

In [1]:
# Data Ingestion using PDFLoader
from langchain_community.document_loaders import PyPDFLoader
loader=PyPDFLoader('attention.pdf')
docs=loader.load()
docs

[Document(metadata={'source': 'attention.pdf', 'page': 0, 'page_label': '1'}, page_content='Provided proper attribution is provided, Google hereby grants permission to\nreproduce the tables and figures in this paper solely for use in journalistic or\nscholarly works.\nAttention Is All You Need\nAshish Vaswani∗\nGoogle Brain\navaswani@google.com\nNoam Shazeer∗\nGoogle Brain\nnoam@google.com\nNiki Parmar∗\nGoogle Research\nnikip@google.com\nJakob Uszkoreit∗\nGoogle Research\nusz@google.com\nLlion Jones∗\nGoogle Research\nllion@google.com\nAidan N. Gomez∗ †\nUniversity of Toronto\naidan@cs.toronto.edu\nŁukasz Kaiser∗\nGoogle Brain\nlukaszkaiser@google.com\nIllia Polosukhin∗ ‡\nillia.polosukhin@gmail.com\nAbstract\nThe dominant sequence transduction models are based on complex recurrent or\nconvolutional neural networks that include an encoder and a decoder. The best\nperforming models also connect the encoder and decoder through an attention\nmechanism. We propose a new simple network arc

In [2]:
#Converting text docs into chunks
from langchain.text_splitter import RecursiveCharacterTextSplitter
text_splitter=RecursiveCharacterTextSplitter(chunk_size=1000,chunk_overlap=200)
documents=text_splitter.split_documents(docs)
documents, len(documents)

([Document(metadata={'source': 'attention.pdf', 'page': 0, 'page_label': '1'}, page_content='Provided proper attribution is provided, Google hereby grants permission to\nreproduce the tables and figures in this paper solely for use in journalistic or\nscholarly works.\nAttention Is All You Need\nAshish Vaswani∗\nGoogle Brain\navaswani@google.com\nNoam Shazeer∗\nGoogle Brain\nnoam@google.com\nNiki Parmar∗\nGoogle Research\nnikip@google.com\nJakob Uszkoreit∗\nGoogle Research\nusz@google.com\nLlion Jones∗\nGoogle Research\nllion@google.com\nAidan N. Gomez∗ †\nUniversity of Toronto\naidan@cs.toronto.edu\nŁukasz Kaiser∗\nGoogle Brain\nlukaszkaiser@google.com\nIllia Polosukhin∗ ‡\nillia.polosukhin@gmail.com\nAbstract\nThe dominant sequence transduction models are based on complex recurrent or\nconvolutional neural networks that include an encoder and a decoder. The best\nperforming models also connect the encoder and decoder through an attention\nmechanism. We propose a new simple network ar

In [6]:
from pinecone import Pinecone, ServerlessSpec

pc = Pinecone(api_key=os.getenv("PINECONE_API_KEY"))
index_name = "langchainvectors"
if index_name not in pc.list_indexes().names():
    pc.create_index(
        name=index_name,
        dimension=768,
        metric="cosine", 
        spec=ServerlessSpec(
            cloud="aws",
            region="us-east-1"
        ) 
    )

In [7]:
index = pc.Index(index_name)
index

<pinecone.data.index.Index at 0x1c4b692de80>

In [8]:
from langchain_google_genai import GoogleGenerativeAIEmbeddings

embed_model = GoogleGenerativeAIEmbeddings(model="models/embedding-001")
text_chunks = [doc.page_content for doc in documents]
embeddings = embed_model.embed_documents(text_chunks)
len(embeddings), embeddings

(52,
 [[-0.014783564954996109,
   -0.010832996107637882,
   -0.05402161180973053,
   0.03313504159450531,
   0.04836973920464516,
   0.023917362093925476,
   0.020124075934290886,
   -0.025185899809002876,
   -0.011640895158052444,
   -0.01757628470659256,
   -0.008158260956406593,
   0.027458935976028442,
   -0.025302782654762268,
   -0.03439885005354881,
   0.01890101097524166,
   -0.0075389365665614605,
   0.013638617470860481,
   0.013587983325123787,
   -0.0025064884684979916,
   -0.03545078635215759,
   -0.013095646165311337,
   -0.024831872433423996,
   0.03743739798665047,
   -0.0077382479794323444,
   -0.05231320858001709,
   -0.011458029970526695,
   0.021971793845295906,
   -0.05617021769285202,
   -0.05826670303940773,
   0.03535720333456993,
   -0.011493894271552563,
   -0.009352674707770348,
   -0.035181883722543716,
   0.03590897470712662,
   -0.004555846564471722,
   -0.01777585968375206,
   -0.003188343718647957,
   -0.016020115464925766,
   0.004911986645311117,
   0.

In [9]:
vectors = [
    (f"pdf-chunk-{i}", embeddings[i], {"text": text_chunks[i]}) 
    for i in range(len(text_chunks))
]
index.upsert(vectors)

{'upserted_count': 52}

In [10]:
query = "What is Attention?"
query_embedding = embed_model.embed_query(query)
results = index.query(vector=query_embedding, top_k=3, include_metadata=True)
results

{'matches': [{'id': 'pdf-chunk-12',
              'metadata': {'text': '3.2 Attention\n'
                                   'An attention function can be described as '
                                   'mapping a query and a set of key-value '
                                   'pairs to an output,\n'
                                   'where the query, keys, values, and output '
                                   'are all vectors. The output is computed as '
                                   'a weighted sum\n'
                                   '3'},
              'score': 0.745946646,
              'values': []},
             {'id': 'pdf-chunk-7',
              'metadata': {'text': 'in the distance between positions, '
                                   'linearly for ConvS2S and logarithmically '
                                   'for ByteNet. This makes\n'
                                   'it more difficult to learn dependencies '
                                   'between di