In [1]:
from langchain_community.document_loaders import PyPDFLoader, PyPDFDirectoryLoader
from langchain_text_splitters import RecursiveCharacterTextSplitter
from langchain_community.vectorstores import FAISS
from langchain_community.embeddings import HuggingFaceBgeEmbeddings
from langchain.prompts import PromptTemplate
from langchain.chains import RetrievalQA

In [2]:
from dotenv import load_dotenv
load_dotenv()

True

In [3]:
loader = PyPDFDirectoryLoader("./data")
documents = loader.load()
text_splitter = RecursiveCharacterTextSplitter(chunk_size=1000, chunk_overlap=200)
docs = text_splitter.split_documents(documents)
len(docs)

52

In [4]:
embeddings = HuggingFaceBgeEmbeddings(model_name="BAAI/bge-large-en-v1.5", model_kwargs={"device":"cpu"}, encode_kwargs={"normalize_embeddings": True})



In [5]:
import numpy as np
print(np.array(embeddings.embed_query(docs[0].page_content)))
print(np.array(embeddings.embed_query(docs[0].page_content)).shape)


[ 0.01504208  0.01111973 -0.0143903  ...  0.02992566 -0.01091165
 -0.03430151]
(1024,)


In [6]:
db = FAISS.from_documents(docs, embeddings)

In [7]:
query = "What is positional encoding?"
relevant_documents = db.similarity_search(query)

In [8]:
print(relevant_documents[0].page_content)

tokens in the sequence. To this end, we add "positional encodings" to the input embeddings at the
bottoms of the encoder and decoder stacks. The positional encodings have the same dimension dmodel
as the embeddings, so that the two can be summed. There are many choices of positional encodings,
learned and fixed [9].
In this work, we use sine and cosine functions of different frequencies:
PE(pos,2i)=sin(pos/100002i/d model)
PE(pos,2i+1)=cos(pos/100002i/d model)
where posis the position and iis the dimension. That is, each dimension of the positional encoding
corresponds to a sinusoid. The wavelengths form a geometric progression from 2πto10000 ·2π. We
chose this function because we hypothesized it would allow the model to easily learn to attend by
relative positions, since for any fixed offset k,PEpos+kcan be represented as a linear function of
PEpos.
We also experimented with using learned positional embeddings [ 9] instead, and found that the two


In [9]:
retriever = db.as_retriever(search_type="similarity", search_kwargs={"k": 5})
retriever

VectorStoreRetriever(tags=['FAISS', 'HuggingFaceBgeEmbeddings'], vectorstore=<langchain_community.vectorstores.faiss.FAISS object at 0x000002B35B3D6230>, search_kwargs={'k': 5})

In [10]:
import os
os.environ["HUGGINGFACE_API_TOKEN"] = os.getenv("HUGGINGFACE_API_TOKEN")

In [11]:
from langchain_community.llms import HuggingFaceEndpoint 

hf = HuggingFaceEndpoint(
    repo_id="mistralai/Mistral-7B-Instruct-v0.2", 
    temperature=0.4,
    max_new_tokens=2048, 
    verbose=True, 
    huggingfacehub_api_token=os.getenv("HUGGINGFACE_API_TOKEN"),
    add_to_git_credential=True)
query = "What is positional encoding?"
hf.invoke(query)

                    add_to_git_credential was transferred to model_kwargs.
                    Please make sure that add_to_git_credential is what you intended.


The token has not been saved to the git credentials helper. Pass `add_to_git_credential=True` in this function directly or `--add-to-git-credential` if using via `huggingface-cli` if you want to set the git credential as well.
Token is valid (permission: read).
Your token has been saved to C:\Users\FARZIN\.cache\huggingface\token
Login successful


'\n\nPositional encoding is a technique used in deep learning models to provide the model with information about the position of the input data in the sequence. It is often used in models that process sequential data, such as recurrent neural networks (RNNs) and transformers.\n\nThe idea behind positional encoding is that the model should be able to learn the relationships between different positions in the sequence, without having to learn the position information itself. This allows the model to focus on learning the underlying patterns in the data, rather than memorizing the positions of the inputs.\n\nPositional encoding is typically implemented as a fixed, learnable vector that is added to each input vector in the sequence. The vector is designed to have specific patterns that correspond to different positions in the sequence. For example, in the case of sinusoidal positional encoding, the vector has components that are sinusoidal functions of the position.\n\nPositional encoding 

In [12]:
# Hugging face models run locally thorugh hugging face pipelines
from langchain_community.llms.huggingface_pipeline import HuggingFacePipeline

hf_pipeline = HuggingFacePipeline.from_model_id(
    model_id="mistralai/Mistral-7B-Instruct-v0.2",
    task="text-generation",
    pipeline_kwargs={"temperature": 0.4, "max_new_tokens": 2048},
)
hf_pipeline.invoke(query)

Loading checkpoint shards:   0%|          | 0/3 [00:00<?, ?it/s]



In [None]:
prompt_template = """
Use the following context to generate a response to the following question:
Please try to answer to the question concisely and accurately.
Context: {context}
Question: {question}
"""

In [None]:
prompt = PromptTemplate(prompt_template, input_variable =["context,", "question"])

In [None]:
retrieval_qa = RetrievalQA.from_chain_type(llm=hf_pipeline, chain_type="stuff", retriever=retriever, return_source_document=True, chain_type_kwargs={"prompt": prompt})

In [None]:
query = "Differences in the uninsured rate by state in 2022" 

In [None]:
result = RetrievalQA.invoke(query)
print(result)