# Initialization

## Load libraries

In [36]:
import os
from pprint import pprint

from dotenv import load_dotenv

from pinecone import Pinecone

from langchain.document_loaders import PyPDFDirectoryLoader
from langchain.text_splitter import RecursiveCharacterTextSplitter
from langchain_openai.embeddings import OpenAIEmbeddings
from langchain_openai.llms import OpenAI
from langchain.chains import RetrievalQA
from langchain.prompts import PromptTemplate
from langchain.vectorstores import Pinecone as VSPinecome

## Set constants

In [None]:
INFORMATION_PATH = "pdfs"
INDEX_NAME = "pinecone-test"

## Load environment variables

In [14]:
_ = load_dotenv()

# Set up Pinecone connection

In [None]:
pc = Pinecone(
    api_key=os.getenv("PINECONE_API_KEY"),
)

# Initialize OpenAI LLM connection

In [None]:
llm = OpenAI(openai_api_key=os.getenv("OPENAI_API_KEY"))

# Load documents

In [None]:
loader = PyPDFDirectoryLoader(INFORMATION_PATH)
data = loader.load()

In [None]:
text_splitter = RecursiveCharacterTextSplitter(chunk_size=500, chunk_overlap=200)
text_chunks = text_splitter.split_documents(data)

# Set up OpenAI embedding

In [19]:
embedding = OpenAIEmbeddings(openai_api_key=os.getenv("OPENAI_API_KEY"))

In [25]:
doc_search = VSPinecome.from_texts(
    [chunk.page_content for chunk in text_chunks],
    embedding,
    index_name=INDEX_NAME
)

# Test query

In [26]:
query="What is the key vector?"
doc_search.similarity_search(query)

[Document(metadata={}, page_content='More succinctly, we can write it as\nwhere the matrix  is the matrix whose rows are . Note that the querying vector, , is not necessarily the same asthe key-value vector . In fact, it is theoretically possible for query, key, and value vectors to all be different, though that israrely done in practice.'),
 Document(metadata={}, page_content='This attention scheme has been compared to the Query-Key analogy of relational databases. That comparison suggests an asymmetric role for the Query and Keyvectors, where one item of interest (the Query vector "that") is matched against all possible items (the Key vectors of each word in the sentence). However,Attention\'s parallel calculations matches all words of a sentence with itself; therefore the roles of these vectors are symmetric. Possibly because the simplisticdatabase analogy is'),
 Document(metadata={}, page_content='This is then used to compute the context vector:\nwhere  are the value vectors, linea

# Use the Vector DB for Retrieval-Augmented Generation (RAG)

## Set up retriever

In [33]:
qa = RetrievalQA.from_chain_type(
    llm=llm,
    chain_type="refine",
    retriever=doc_search.as_retriever()
)

In [37]:
answer = qa.invoke(query)

pprint(answer.get("result"))

('\n'
 '\n'
 'The key vector is a fundamental component of the attention mechanism in deep '
 'learning. It is used to calculate the similarity between the query and the '
 'key vectors, which in turn determines the importance of different parts of '
 'the input data. This process is similar to the Query-Key analogy of '
 'relational databases, where the query vector is matched against all possible '
 'key vectors. However, in attention, the roles of the query and key vectors '
 'are symmetric as all words of a sentence are matched with each other. This '
 'allows for more comprehensive and parallel calculations, improving the '
 "model's performance. Therefore, the key vector serves as an essential "
 'component in the attention mechanism, enabling the model to effectively '
 'process and understand complex data. Additionally, the key vector is used to '
 'compute the context vector, which is formed by linearly transforming the '
 'value vectors. This allows the model to have more fle