<a href="https://colab.research.google.com/github/ShamsRupak/ai-doc-processing-suite/blob/main/Integrating_Open_Source_LLMs_into_RAG.ipynb" target="_parent"><img src="https://colab.research.google.com/assets/colab-badge.svg" alt="Open In Colab"/></a>

In [7]:
!pip install pymupdf
!pip install llama-index-llms-llama-cpp
!pip install llama-index-embeddings-huggingface



In [3]:
import fitz  # PyMuPDF

# Load the sample contract PDF
pdf_path = "sample_contract.pdf"
doc = fitz.open(pdf_path)

# Extract text from all pages
text = "\n".join([page.get_text() for page in doc])

print(f"Extracted {len(text.split())} words from the PDF.")

Extracted 315 words from the PDF.


In [4]:
from llama_index.core import VectorStoreIndex, Document, get_response_synthesizer
from llama_index.core.retrievers import VectorIndexRetriever
from llama_index.core.query_engine import RetrieverQueryEngine
from llama_index.core.settings import Settings
from llama_index.llms.llama_cpp import LlamaCPP
from llama_index.embeddings.huggingface import HuggingFaceEmbedding

# Configure the LLM
llm = LlamaCPP(
    model_path="/content/mistral.gguf", # Use the path where the model was downloaded
    temperature=0.7,
    max_new_tokens=512,
    context_window=2048,
    model_kwargs={"n_gpu_layers": 1}
)

# Configure open-source embedding model
embed_model = HuggingFaceEmbedding(
    model_name="BAAI/bge-small-en-v1.5"  # Lightweight but effective embedding model
)

# Set as the default LLM and embedding model
Settings.llm = llm
Settings.embed_model = embed_model

# Create documents from your text
documents = [Document(text=text)]  # 'text' should be your document content

# Build index
index = VectorStoreIndex.from_documents(documents)

# Configure retriever
retriever = VectorIndexRetriever(
    index=index,
    similarity_top_k=2,  # Retrieve 2 most similar chunks
)

# Configure response synthesizer
response_synthesizer = get_response_synthesizer(
    response_mode="tree_summarize",
)

llama_model_loader: loaded meta data with 20 key-value pairs and 291 tensors from /content/mistral.gguf (version GGUF V2)
llama_model_loader: Dumping metadata keys/values. Note: KV overrides do not apply in this output.
llama_model_loader: - kv   0:                       general.architecture str              = llama
llama_model_loader: - kv   1:                               general.name str              = mistralai_mistral-7b-instruct-v0.1
llama_model_loader: - kv   2:                       llama.context_length u32              = 32768
llama_model_loader: - kv   3:                     llama.embedding_length u32              = 4096
llama_model_loader: - kv   4:                          llama.block_count u32              = 32
llama_model_loader: - kv   5:                  llama.feed_forward_length u32              = 14336
llama_model_loader: - kv   6:                 llama.rope.dimension_count u32              = 128
llama_model_loader: - kv   7:                 llama.attention.head_coun

In [5]:
query = "What are the late payment penalties in this contract?"

# Assemble query engine
query_engine = RetrieverQueryEngine(
    retriever=retriever,
    response_synthesizer=response_synthesizer,
)


# Test query
response = query_engine.query(query)
print(response)

llama_perf_context_print:        load time =  133799.42 ms
llama_perf_context_print: prompt eval time =  133798.76 ms /   557 tokens (  240.21 ms per token,     4.16 tokens per second)
llama_perf_context_print:        eval time =    8610.37 ms /    15 runs   (  574.02 ms per token,     1.74 tokens per second)
llama_perf_context_print:       total time =  142416.80 ms /   572 tokens


1.5% per month from the due date until paid in full.


In [6]:
# Download the Mistral 7B model
!wget https://huggingface.co/TheBloke/Mistral-7B-Instruct-v0.1-GGUF/resolve/main/mistral-7b-instruct-v0.1.Q4_0.gguf -O /content/mistral.gguf

--2025-07-20 15:36:36--  https://huggingface.co/TheBloke/Mistral-7B-Instruct-v0.1-GGUF/resolve/main/mistral-7b-instruct-v0.1.Q4_0.gguf
Resolving huggingface.co (huggingface.co)... 13.35.202.34, 13.35.202.121, 13.35.202.40, ...
Connecting to huggingface.co (huggingface.co)|13.35.202.34|:443... connected.
HTTP request sent, awaiting response... 302 Found
Location: https://cdn-lfs.hf.co/repos/46/12/46124cd8d4788fd8e0879883abfc473f247664b987955cc98a08658f7df6b826/c0fff3ee02f4b8f7296fbb560155b68a13644b12b9e1e761744c05fb637ade7c?response-content-disposition=inline%3B+filename*%3DUTF-8%27%27mistral-7b-instruct-v0.1.Q4_0.gguf%3B+filename%3D%22mistral-7b-instruct-v0.1.Q4_0.gguf%22%3B&Expires=1753029396&Policy=eyJTdGF0ZW1lbnQiOlt7IkNvbmRpdGlvbiI6eyJEYXRlTGVzc1RoYW4iOnsiQVdTOkVwb2NoVGltZSI6MTc1MzAyOTM5Nn19LCJSZXNvdXJjZSI6Imh0dHBzOi8vY2RuLWxmcy5oZi5jby9yZXBvcy80Ni8xMi80NjEyNGNkOGQ0Nzg4ZmQ4ZTA4Nzk4ODNhYmZjNDczZjI0NzY2NGI5ODc5NTVjYzk4YTA4NjU4ZjdkZjZiODI2L2MwZmZmM2VlMDJmNGI4ZjcyOTZmYmI1NjAxNTViNjhhMT