<a href="https://colab.research.google.com/github/HajarahM/Mistral-7B/blob/main/Chat_with_MultiplePDFs_Mistral_7B_Instruct1.ipynb" target="_parent"><img src="https://colab.research.google.com/assets/colab-badge.svg" alt="Open In Colab"/></a>

In [1]:
%pip install langchain
%pip install torch
%pip install sentence_transformers
%pip install faiss-cpu
%pip install huggingface-hub
%pip install pypdf
%pip -q install accelerate
%pip install llama-cpp-python
%pip -q install git+https://github.com/huggingface/transformers


Note: you may need to restart the kernel to use updated packages.
Note: you may need to restart the kernel to use updated packages.
Collecting torch>=1.6.0 (from sentence_transformers)
  Using cached torch-2.0.1-cp39-none-macosx_11_0_arm64.whl (55.8 MB)
Installing collected packages: torch
  Attempting uninstall: torch
    Found existing installation: torch 2.1.0
    Uninstalling torch-2.1.0:
      Successfully uninstalled torch-2.1.0
Successfully installed torch-2.0.1
Note: you may need to restart the kernel to use updated packages.
Note: you may need to restart the kernel to use updated packages.
Note: you may need to restart the kernel to use updated packages.
Note: you may need to restart the kernel to use updated packages.
Note: you may need to restart the kernel to use updated packages.
Note: you may need to restart the kernel to use updated packages.
Note: you may need to restart the kernel to use updated packages.


In [2]:
from langchain.chains import RetrievalQA
from langchain.embeddings import OllamaEmbeddings
from langchain.llms import Ollama
from langchain.text_splitter import RecursiveCharacterTextSplitter
from langchain.vectorstores import FAISS
from langchain.document_loaders import PyPDFDirectoryLoader
import os

In [None]:
#load pdf files
#os.environ["TOKENIZERS_PARALLELISM"] = "true"
documents = []
processed_directories=0
for dir in os.listdir("data"):
    try: 
        dir_path = './data/'+dir
        loader = PyPDFDirectoryLoader(dir_path)
        documents.extend(loader.load())
        processed_directories+=1
    except:
        print("issue with ", dir)
        pass
print("processed ",processed_directories," directories")

In [None]:
print(documents)

In [None]:
#Step 05: Split the Extracted Data into Text Chunks
text_splitter = RecursiveCharacterTextSplitter(chunk_size=500, chunk_overlap=50, separators=['\n\n', '\n', '.'])

text_chunks = text_splitter.split_documents(documents)


In [None]:
len(text_chunks)

In [None]:
#get the third chunk
text_chunks[30]

In [None]:
#Step 06:Downlaod the Embeddings
embeddings = OllamaEmbeddings(model="mistral")

In [None]:
from langchain.embeddings.sentence_transformer import SentenceTransformerEmbeddings
embedding_model = SentenceTransformerEmbeddings(model_name='all-mpnet-base-v2')

In [None]:
from langchain.vectorstores import Chroma
vector_store = Chroma.from_documents(text_chunks, embedding=embedding_model, persist_directory="./vector_db")
vector_store.persist()
print('saved embeddings to vector_store')

In [None]:
#Step 08: Create Embeddings for each of the Text Chunk
import os
vector_store = FAISS.from_documents(text_chunks, embedding=embedding_model)
print('saving embeddings to vector_store')
folder_path="FAISS_vector_store"
if os.path.exists(folder_path):
    faiss_index=FAISS.load_local(folder_path, embedding_model)
    faiss_index.merge_from(vector_store)
    faiss_index.save_local(folder_path)
else:
    vector_store.save_local(folder_path)

If there is an error with FAISS while adding new docs, use this fix code to replace vector_store:

text_embeddings = embeddings.embed_documents(text_chunks)
text_embedding_pairs = zip(text_chunks, text_embeddings)
text_embedding_pairs_list = list(text_embedding_pairs)
vector_store = FAISS.from_embeddings(text_embedding_pairs_list, embeddings)

In [None]:
#Import Model
from langchain.llms import Ollama
from langchain.callbacks.manager import CallbackManager
from langchain.callbacks.streaming_stdout import StreamingStdOutCallbackHandler
llm = Ollama(
    base_url="http://localhost:11434",
    model = "mistral",
    verbose=True,
    callback_manager = CallbackManager([StreamingStdOutCallbackHandler()])
    )
#vector_store_path = "./FAISS_vector_store"
#vector_store = Chroma(vector_store_path, embedding_model)
qa = RetrievalQA.from_chain_type(llm=llm, chain_type="stuff", retriever=vector_store.as_retriever(search_kwargs={"k": 4}))
query = "Summarize the Mining Act commencement instrument of 2004"
qa.run(query)


Try different Model - Zephyr

In [None]:
from langchain.llms import Ollama
from langchain.callbacks.manager import CallbackManager
from langchain.callbacks.streaming_stdout import StreamingStdOutCallbackHandler
llm = Ollama(
    base_url="http://localhost:11434",
    model = "zephyr",
    verbose=True,
    callback_manager = CallbackManager([StreamingStdOutCallbackHandler()])
    )

Or mistral LLM model

In [None]:
llm = Ollama(
    base_url="http://localhost:11434",
    model = "dolphin2.2-mistral:7b-q6_K",
    verbose=True,
    callback_manager = CallbackManager([StreamingStdOutCallbackHandler()])
    )

In [None]:
from langchain import hub
QA_CHAIN_PROMPT = hub.pull("rlm/rag-prompt-mistral")

Different Prompt here (custom)

In [None]:
from langchain.prompts import PromptTemplate
from langchain.chains import ConversationalRetrievalChain
from langchain.memory import ConversationBufferMemory
from decouple import config

In [None]:
# create prompt
QA_prompt = PromptTemplate(
    template="""You are a lawyer. Be thorough in your search within the context of only the documents in the database and detailed in your response. 
    After end of each sentence move to the next line. At the end of the response name the specific source document.
    Use the following pieces of context to answer the user question. 
chat_history: {chat_history}
Context: {text}
Question: {question}
Answer:""",
    input_variables=["text", "question", "chat_history"]
)   

Create Memory

In [None]:
# create memory
memory = ConversationBufferMemory(
    return_messages=True, memory_key="chat_history")

Create Retriever Chain

In [None]:
# create converstational retriever chain
def retrieval_qa_chain(llm, vectorstore):
    qa_chain = ConversationalRetrievalChain.from_llm(
        llm=llm,
        memory=memory,
        retriever=vectorstore.as_retriever(
            search_kwargs={'fetch_k': 4, 'k': 3}, search_type='mmr'),
        chain_type="refine",
        chain_type_kwargs={"prompt": QA_prompt},
        return_source_documents=True
    )
    return qa_chain

OR:

In [None]:
def retrieval_qa_chain(llm, vectorstore):
    qa_chain = RetrievalQA.from_chain_type(
        llm,
        retriever = vectorstore.as_retriever(
            search_kwargs={'fetch_k':4, 'k': 3}, search_type='mmr'),
        chain_type = "staff",
        chain_type_kwargs={"prompt": QA_CHAIN_PROMPT},
        return_source_documents=True
    )
    return qa_chain

In [None]:
def qa_bot():
    db_path = "vector_db/"
    vectorstore = Chroma(db_path,embedding_model)
    qa = retrieval_qa_chain(llm, vectorstore)
    return qa

query = "What are the national content requirements during procurement of goods.When answering, after end of each sentence, move to the next line"
qa.run(query)

In [None]:
def qa_bot():
    db_path = "FAISS_vector_store/"
    vectorstore = FAISS.load_local(db_path,embedding_model)
    qa = retrieval_qa_chain(llm, vectorstore)
    return qa

In [None]:
query = "What are the national content requirements during procurement of goods. When answering, after end of each sentence, move to the next line"
qa.run(query)

Create RAG function


In [None]:
def rag(question: str) -> str:
    # call QA chain
    response = qa.run({"question": question})

    return response.get("answer")

For Archiving - this below

In [None]:
import sys

while True:
  user_input = input(f"Input Prompt: ")
  if user_input == 'exit':
    print('Exiting')
    sys.exit()
  if user_input == '':
    continue
  result = qa({'query': user_input})
  print(f"Answer: {result['result']}")