# Langchain with HuggingFace and Mistral

In [141]:
# Iteract with Operating System
import os
# Array Preprocessing
import numpy as np 
# Load Data Sources
from langchain_community.document_loaders import PyPDFLoader, PyPDFDirectoryLoader
# Chunk Method
from langchain_text_splitters import RecursiveCharacterTextSplitter
# Vector Store
from langchain.vectorstores import LanceDB, FAISS
# Open Source Embeddings
from langchain_community.embeddings import HuggingFaceEmbeddings
# Prompt Template
from langchain.prompts import PromptTemplate
# Sequence of actions for question answering - deprecated, use create_retrieval_chain instead
from langchain.chains import RetrievalQA, create_retrieval_chain
# Pass list of documents to the model
from langchain.chains.combine_documents import create_stuff_documents_chain
# HuggingFace Hub
from langchain_community.llms import HuggingFaceHub, HuggingFacePipeline





## Load PDF data

In [12]:
pdf_dir_loader = PyPDFDirectoryLoader('./pdf_sources')
pdf_dir_loader



<langchain_community.document_loaders.pdf.PyPDFDirectoryLoader at 0x125d011f0>

In [14]:
# pdf_docs = pdf_dir_loader.load_and_split()
pdf_docs = pdf_dir_loader.load()

pdf_docs

[Document(metadata={'source': 'pdf_sources/pdf_data_source1.pdf', 'page': 0}, page_content='1\nRetrieval-Augmented Generation for Large\nLanguage Models: A Survey\nYunfan Gaoa, Yun Xiongb, Xinyu Gaob, Kangxiang Jiab, Jinliu Panb, Yuxi Bic, Yi Daia, Jiawei Suna, Meng\nWangc, and Haofen Wanga,c\naShanghai Research Institute for Intelligent Autonomous Systems, Tongji University\nbShanghai Key Laboratory of Data Science, School of Computer Science, Fudan University\ncCollege of Design and Innovation, Tongji University\nAbstract ‚ÄîLarge Language Models (LLMs) showcase impres-\nsive capabilities but encounter challenges like hallucination,\noutdated knowledge, and non-transparent, untraceable reasoning\nprocesses. Retrieval-Augmented Generation (RAG) has emerged\nas a promising solution by incorporating knowledge from external\ndatabases. This enhances the accuracy and credibility of the\ngeneration, particularly for knowledge-intensive tasks, and allows\nfor continuous knowledge updates an

## Split Text Data

In [15]:
text_splitter = RecursiveCharacterTextSplitter(chunk_size=1000, chunk_overlap=200)
text_splitter

<langchain_text_splitters.character.RecursiveCharacterTextSplitter at 0x12624a390>

In [16]:
# Chunk the document data
docs_split = text_splitter.split_documents(pdf_docs)
docs_split

[Document(metadata={'source': 'pdf_sources/pdf_data_source1.pdf', 'page': 0}, page_content='1\nRetrieval-Augmented Generation for Large\nLanguage Models: A Survey\nYunfan Gaoa, Yun Xiongb, Xinyu Gaob, Kangxiang Jiab, Jinliu Panb, Yuxi Bic, Yi Daia, Jiawei Suna, Meng\nWangc, and Haofen Wanga,c\naShanghai Research Institute for Intelligent Autonomous Systems, Tongji University\nbShanghai Key Laboratory of Data Science, School of Computer Science, Fudan University\ncCollege of Design and Innovation, Tongji University\nAbstract ‚ÄîLarge Language Models (LLMs) showcase impres-\nsive capabilities but encounter challenges like hallucination,\noutdated knowledge, and non-transparent, untraceable reasoning\nprocesses. Retrieval-Augmented Generation (RAG) has emerged\nas a promising solution by incorporating knowledge from external\ndatabases. This enhances the accuracy and credibility of the\ngeneration, particularly for knowledge-intensive tasks, and allows\nfor continuous knowledge updates an

In [17]:
len(docs_split)

345

## Convert text into embeddings (HuggingFace)

In [37]:
# Pre-trained embedding model from HuggingFace Model Hub
hf_embeddings = HuggingFaceEmbeddings(
    # Models are from HuggingFace Model Hub
    model_name="BAAI/bge-small-en-v1.5", # sentence-transfromers/all-MiniLM-16-v2
    # model_name = "SciPhi/Triplex",
    # model_name = "sentence-transfromers/all-MiniLM-16-v2",
    # model_name = "nisten/Biggie-SmoLlm-0.15B-Base",
    # model_name = "intervitens/mini-magnum-12b-v1.1",
    model_kwargs={"device": "cpu"},
    encode_kwargs={"normalize_embeddings": True}
)

In [38]:
hf_embeddings

HuggingFaceEmbeddings(client=SentenceTransformer(
  (0): Transformer({'max_seq_length': 512, 'do_lower_case': True}) with Transformer model: BertModel 
  (1): Pooling({'word_embedding_dimension': 384, 'pooling_mode_cls_token': True, 'pooling_mode_mean_tokens': False, 'pooling_mode_max_tokens': False, 'pooling_mode_mean_sqrt_len_tokens': False, 'pooling_mode_weightedmean_tokens': False, 'pooling_mode_lasttoken': False, 'include_prompt': True})
  (2): Normalize()
), model_name='BAAI/bge-small-en-v1.5', cache_folder=None, model_kwargs={'device': 'cpu'}, encode_kwargs={'normalize_embeddings': True}, multi_process=False, show_progress=False)

In [46]:
# Example of extracting the first element in final documents and conveting it into an embedding using HuggingFace
example_text = docs_split[0].page_content
example_text

'1\nRetrieval-Augmented Generation for Large\nLanguage Models: A Survey\nYunfan Gaoa, Yun Xiongb, Xinyu Gaob, Kangxiang Jiab, Jinliu Panb, Yuxi Bic, Yi Daia, Jiawei Suna, Meng\nWangc, and Haofen Wanga,c\naShanghai Research Institute for Intelligent Autonomous Systems, Tongji University\nbShanghai Key Laboratory of Data Science, School of Computer Science, Fudan University\ncCollege of Design and Innovation, Tongji University\nAbstract ‚ÄîLarge Language Models (LLMs) showcase impres-\nsive capabilities but encounter challenges like hallucination,\noutdated knowledge, and non-transparent, untraceable reasoning\nprocesses. Retrieval-Augmented Generation (RAG) has emerged\nas a promising solution by incorporating knowledge from external\ndatabases. This enhances the accuracy and credibility of the\ngeneration, particularly for knowledge-intensive tasks, and allows\nfor continuous knowledge updates and integration of domain-\nspecific information. RAG synergistically merges LLMs‚Äô intrin-'

In [48]:
example_arr = np.array(hf_embeddings.embed_query(example_text))
example_arr, example_arr.shape, type(example_arr)

(array([-7.95341209e-02,  3.19632106e-02, -6.41358783e-03, -1.45122455e-02,
         9.74365231e-03,  1.58312451e-02, -6.47916123e-02, -8.12395650e-04,
         4.73544486e-02, -1.34253595e-02,  9.28087533e-03, -5.48795387e-02,
         7.20449835e-02,  2.34368835e-02,  9.51248631e-02,  3.94523218e-02,
        -1.00754555e-02,  5.89222349e-02,  2.66081337e-02, -7.08530024e-02,
         4.73551787e-02, -1.74102988e-02,  1.72852799e-02, -9.59778391e-03,
        -5.88379912e-02,  1.90077126e-02, -1.11017488e-02, -3.48335281e-02,
        -4.34108712e-02, -2.47452736e-01,  7.14063458e-03, -1.26586845e-02,
         8.43352526e-02,  4.99807335e-02, -2.30952669e-02,  3.07614934e-02,
        -4.34951670e-02,  2.54788622e-02,  2.75659049e-03,  3.81245948e-02,
         1.44664226e-02,  7.58427754e-03,  6.18414488e-03,  5.99731109e-04,
        -5.35147730e-03, -3.83535661e-02, -2.26179846e-02, -2.69830395e-02,
        -8.50705877e-02,  1.53063945e-02, -2.06251703e-02, -2.28924993e-02,
        -5.4

## Database

In [59]:
# Now that the documents have been converted into embeddings, it's time to save them in a vector database
# Remember data source (pdf files) is from ./rag_hf_mistral/
# For now just split the final documents so its not computationally extensive on my machine

# ArrowTypeError: Size of FixedSizeList is not the same. input list: fixed_size_list[384] output list: fixed_size_list[4096]
# db = LanceDB.from_documents(docs_split, embedding=hf_embeddings)
db = FAISS.from_documents(docs_split, embedding=hf_embeddings)
db

<langchain_community.vectorstores.faiss.FAISS at 0x1779fc980>

In [63]:
# Similarity Search

example_search = db.similarity_search("what is a rag system?")
example_search

[Document(metadata={'source': 'pdf_sources/pdf_data_source1.pdf', 'page': 13}, page_content='A series of benchmark tests and tools have been proposed\nto facilitate the evaluation of RAG.These instruments furnish\nquantitative metrics that not only gauge RAG model perfor-\nmance but also enhance comprehension of the model‚Äôs capabil-\nities across various evaluation aspects. Prominent benchmarks\nsuch as RGB, RECALL and CRUD [167]‚Äì[169] focus on\nappraising the essential abilities of RAG models. Concur-\nrently, state-of-the-art automated tools like RAGAS [164],\nARES [165], and TruLens8employ LLMs to adjudicate the\nquality scores. These tools and benchmarks collectively form\na robust framework for the systematic evaluation of RAG\nmodels, as summarized in Table IV.\nVII. D ISCUSSION AND FUTURE PROSPECTS\nDespite the considerable progress in RAG technology, sev-\neral challenges persist that warrant in-depth research.This\nchapter will mainly introduce the current challenges and f

In [64]:
# One result 
example_search[0].page_content

'A series of benchmark tests and tools have been proposed\nto facilitate the evaluation of RAG.These instruments furnish\nquantitative metrics that not only gauge RAG model perfor-\nmance but also enhance comprehension of the model‚Äôs capabil-\nities across various evaluation aspects. Prominent benchmarks\nsuch as RGB, RECALL and CRUD [167]‚Äì[169] focus on\nappraising the essential abilities of RAG models. Concur-\nrently, state-of-the-art automated tools like RAGAS [164],\nARES [165], and TruLens8employ LLMs to adjudicate the\nquality scores. These tools and benchmarks collectively form\na robust framework for the systematic evaluation of RAG\nmodels, as summarized in Table IV.\nVII. D ISCUSSION AND FUTURE PROSPECTS\nDespite the considerable progress in RAG technology, sev-\neral challenges persist that warrant in-depth research.This\nchapter will mainly introduce the current challenges and future\nresearch directions faced by RAG.\nA. RAG vs Long Context'

In [66]:
# Multiple results with retriever (interface for vector store to do retrieval)

retriever = db.as_retriever(search_type="similarity", search_kwargs={"k": 3})
retriever

VectorStoreRetriever(tags=['FAISS', 'HuggingFaceEmbeddings'], vectorstore=<langchain_community.vectorstores.faiss.FAISS object at 0x1779fc980>, search_kwargs={'k': 3})

In [92]:
# hf_api_token = os.getenv("HUGGINGFACEHUB_API_TOKEN")
os.environ["HUGGINGFACEHUB_API_TOKEN"] = os.getenv("HUGGINGFACEHUB_API_TOKEN")


In [133]:
query = "What are the benefits of RAG Systems?"


llm_hub = HuggingFaceHub(
    repo_id="mistralai/Mistral-7B-v0.1",
    model_kwargs = {"temperature": 0.1, "max_length": 500}
)

llm_hub

HuggingFaceHub(client=<InferenceClient(model='mistralai/Mistral-7B-v0.1', timeout=None)>, repo_id='mistralai/Mistral-7B-v0.1', task='text-generation', model_kwargs={'temperature': 0.1, 'max_length': 500})

In [134]:
llm_hub.invoke(query)

'What are the benefits of RAG Systems?\n\nRAG Systems is a cloud-based platform that allows you to manage your projects and tasks in a simple and efficient way. It provides a centralized platform for all your project management needs, including task management, time tracking, and reporting.\n\nRAG Systems also offers a variety of features that make it easy to manage your projects and tasks. For example, you can create custom workflows and templates, assign tasks to team members, and track progress in real-time. Additionally'

In [111]:
# Run HF models locally on your own machine opposed to using the HF servers.

llm_pipe = HuggingFacePipeline.from_model_id(
    # model_id="mistralai/Mistral-7B-v0.1",
    model_id="BAAI/bge-small-en-v1.5",
    task="text-generation",
    pipeline_kwargs = {
        "max_new_tokens": 100,
        "temperature": 0.1    }
)

If you want to use `BertLMHeadModel` as a standalone, add `is_decoder=True.`
Some weights of BertLMHeadModel were not initialized from the model checkpoint at BAAI/bge-small-en-v1.5 and are newly initialized: ['cls.predictions.bias', 'cls.predictions.decoder.bias', 'cls.predictions.transform.LayerNorm.bias', 'cls.predictions.transform.LayerNorm.weight', 'cls.predictions.transform.dense.bias', 'cls.predictions.transform.dense.weight']
You should probably TRAIN this model on a down-stream task to be able to use it for predictions and inference.


In [112]:
llm_pipe.invoke(query)



'What are the benefits of RAG Systems?mark influenzanovazuonal databasefanonalannlling tornonalannllingfanonal nonprofitnsteinownfanannlling torn logo influenza socialist'

## Prompt Template

In [152]:
prompt_template = """
                Answer the query where each point of your response is made as a item on a list.
                Example Output:
                    - RAG systems are usefult
                    - They are simple
                    - They are free to use
                 Context: {context}
                 Question: {question}   
                """


In [153]:
prompt = PromptTemplate(template=prompt_template, input_variables=["context", "question"])
prompt

PromptTemplate(input_variables=['context', 'question'], template='\n                Answer the query where each point of your response is made as a item on a list.\n                Example Output:\n                    - RAG systems are usefult\n                    - They are simple\n                    - They are free to use\n                 Context: {context}\n                 Question: {question}   \n                ')

In [154]:
# Contain all documents together in a chain
docs_chain = create_stuff_documents_chain(llm=llm_hub, prompt=prompt)
docs_chain

RunnableBinding(bound=RunnableBinding(bound=RunnableAssign(mapper={
  context: RunnableLambda(format_docs)
}), config={'run_name': 'format_inputs'})
| PromptTemplate(input_variables=['context', 'question'], template='\n                Answer the query where each point of your response is made as a item on a list.\n                Example Output:\n                    - RAG systems are usefult\n                    - They are simple\n                    - They are free to use\n                 Context: {context}\n                 Question: {question}   \n                ')
| HuggingFaceHub(client=<InferenceClient(model='mistralai/Mistral-7B-v0.1', timeout=None)>, repo_id='mistralai/Mistral-7B-v0.1', task='text-generation', model_kwargs={'temperature': 0.1, 'max_length': 500})
| StrOutputParser(), config={'run_name': 'stuff_documents_chain'})

In [155]:
# Create a chain for retrieval

retrieval_chain = create_retrieval_chain(retriever=retriever, combine_docs_chain=docs_chain)
retrieval_chain

RunnableBinding(bound=RunnableAssign(mapper={
  context: RunnableBinding(bound=RunnableLambda(lambda x: x['input'])
           | VectorStoreRetriever(tags=['FAISS', 'HuggingFaceEmbeddings'], vectorstore=<langchain_community.vectorstores.faiss.FAISS object at 0x1779fc980>, search_kwargs={'k': 3}), config={'run_name': 'retrieve_documents'})
})
| RunnableAssign(mapper={
    answer: RunnableBinding(bound=RunnableBinding(bound=RunnableAssign(mapper={
              context: RunnableLambda(format_docs)
            }), config={'run_name': 'format_inputs'})
            | PromptTemplate(input_variables=['context', 'question'], template='\n                Answer the query where each point of your response is made as a item on a list.\n                Example Output:\n                    - RAG systems are usefult\n                    - They are simple\n                    - They are free to use\n                 Context: {context}\n                 Question: {question}   \n                ')
     

In [168]:
input_data = {
    "input": "Details about RAG systems",
    # "question": "what can rag systems be used for?"  
    "question": "can you please explain how a rag system works?"  

}

# retrieval_chain.invoke({"context": "what can rag systems be used for?"})
# retrieval_chain.invoke({"question": "what can rag systems be used for?"})
# retrieval_chain.invoke({"input": "what can rag systems be used for?"})
retrieval_chain.invoke(input_data)

{'input': 'Details about RAG systems',
 'question': 'can you please explain how a rag system works?',
 'context': [Document(metadata={'source': 'pdf_sources/pdf_data_source1.pdf', 'page': 13}, page_content='A series of benchmark tests and tools have been proposed\nto facilitate the evaluation of RAG.These instruments furnish\nquantitative metrics that not only gauge RAG model perfor-\nmance but also enhance comprehension of the model‚Äôs capabil-\nities across various evaluation aspects. Prominent benchmarks\nsuch as RGB, RECALL and CRUD [167]‚Äì[169] focus on\nappraising the essential abilities of RAG models. Concur-\nrently, state-of-the-art automated tools like RAGAS [164],\nARES [165], and TruLens8employ LLMs to adjudicate the\nquality scores. These tools and benchmarks collectively form\na robust framework for the systematic evaluation of RAG\nmodels, as summarized in Table IV.\nVII. D ISCUSSION AND FUTURE PROSPECTS\nDespite the considerable progress in RAG technology, sev-\neral 