<a href="https://colab.research.google.com/github/AXB2024/RAG-Pipline-Project/blob/main/RAG_PIPELINE_FINAL_PROJECT.ipynb" target="_parent"><img src="https://colab.research.google.com/assets/colab-badge.svg" alt="Open In Colab"/></a>

In [None]:
!pip install llama-index llama-index-embeddings-huggingface transformers accelerate sentence-transformers faiss-cpu llama-cpp-python unstructured PyMuPDF



In [None]:
import os
import fitz  # PyMuPDF
import time
import faiss
import torch
from transformers import AutoTokenizer, AutoModelForCausalLM, pipeline
from sentence_transformers import SentenceTransformer
from llama_cpp import Llama


In [None]:
!wget https://huggingface.co/TheBloke/Mistral-7B-Instruct-v0.2-GGUF/resolve/main/mistral-7b-instruct-v0.2.Q4_K_M.gguf -O {"/content/mistral-7b-instruct-v0.2.Q4_K_M.gguf"}

--2025-08-04 14:25:24--  https://huggingface.co/TheBloke/Mistral-7B-Instruct-v0.2-GGUF/resolve/main/mistral-7b-instruct-v0.2.Q4_K_M.gguf
Resolving huggingface.co (huggingface.co)... 3.166.152.65, 3.166.152.44, 3.166.152.105, ...
Connecting to huggingface.co (huggingface.co)|3.166.152.65|:443... connected.
HTTP request sent, awaiting response... 302 Found
Location: https://cdn-lfs-us-1.hf.co/repos/72/62/726219e98582d16c24a66629a4dec1b0761b91c918e15dea2625b4293c134a92/3e0039fd0273fcbebb49228943b17831aadd55cbcbf56f0af00499be2040ccf9?response-content-disposition=inline%3B+filename*%3DUTF-8%27%27mistral-7b-instruct-v0.2.Q4_K_M.gguf%3B+filename%3D%22mistral-7b-instruct-v0.2.Q4_K_M.gguf%22%3B&Expires=1754321124&Policy=eyJTdGF0ZW1lbnQiOlt7IkNvbmRpdGlvbiI6eyJEYXRlTGVzc1RoYW4iOnsiQVdTOkVwb2NoVGltZSI6MTc1NDMyMTEyNH19LCJSZXNvdXJjZSI6Imh0dHBzOi8vY2RuLWxmcy11cy0xLmhmLmNvL3JlcG9zLzcyLzYyLzcyNjIxOWU5ODU4MmQxNmMyNGE2NjYyOWE0ZGVjMWIwNzYxYjkxYzkxOGUxNWRlYTI2MjViNDI5M2MxMzRhOTIvM2UwMDM5ZmQwMjczZmNiZWJiNDk

In [None]:
# STEP 1: Mount / Create Document Folder
!mkdir documents

In [None]:
"""from google.colab import files
uploaded = files.upload()"""

Saving appraisal_report.pdf to appraisal_report.pdf
Saving payslip_sample_image.pdf to payslip_sample_image.pdf
Saving sample_bank_statement.pdf to sample_bank_statement.pdf


In [None]:
"""import shutil

for filename in uploaded.keys():
    shutil.move(filename, f'documents/{filename}')"""

In [None]:
# STEP 2: Extract Text from PDFs
def extract_text_from_pdfs(folder="/content/documents"):
    docs = {}
    for fname in os.listdir(folder):
        if fname.endswith(".pdf"):
            with fitz.open(os.path.join(folder, fname)) as doc:
                full_text = ""
                for page in doc:
                    full_text += page.get_text()
                docs[fname] = full_text
    return docs

In [None]:
# STEP 3: RAG Components
queries = {
    "appraisal.pdf": "What is the estimated home value?",
    "sample_bank_statement.pdf": "How much was the last transaction?",
    "payslip_sample_image.pdf": "What is the total net salary for this month?",
    "sample_contract.pdf" : "What are the penalties for late payments?",
    "LenderFeesWorksheetNew.pdf" : "What is the total estimated monthly payment?"
}

In [None]:
from llama_index.core.node_parser import SemanticSplitterNodeParser
from llama_index.embeddings.huggingface import HuggingFaceEmbedding
from llama_index.core.schema import Document

def embed_documents(docs, embedder, chunk_size=300, chunk_overlap=30, use_semantic=True):
    if use_semantic:
        print(f"\nüîß Semantic Chunking | Size: {chunk_size} | Overlap: {chunk_overlap}")

        # Set up semantic chunking
        embed_model = HuggingFaceEmbedding(model_name="sentence-transformers/all-MiniLM-L6-v2")
        splitter = SemanticSplitterNodeParser(
            embed_model=embed_model,
            chunk_size=chunk_size,
            chunk_overlap=chunk_overlap,
        )

        raw_documents = [Document(text=content, metadata={"name": name}) for name, content in docs.items()]
        nodes = splitter.get_nodes_from_documents(raw_documents)

        passages = [node.text for node in nodes]
        doc_map = [node.metadata["name"] for node in nodes]
        embeddings = embedder.encode(passages, convert_to_tensor=True).cpu().numpy()
        print(f"‚úÖ Total Chunks Created: {len(passages)}")
        return passages, doc_map, embeddings
    else:
        # fallback to fixed chunking
        passages = []
        doc_map = []
        for name, text in docs.items():
            for i in range(0, len(text), chunk_size):
                chunk = text[i:i+chunk_size]
                passages.append(chunk)
                doc_map.append(name)
        embeddings = embedder.encode(passages, convert_to_tensor=True).cpu().numpy()
        return passages, doc_map, embeddings


In [None]:
import numpy as np

def search(query, embedder, passages, embeddings):
    query_vec = embedder.encode([query])[0]
    query_vec = np.array(query_vec).astype('float32').reshape(1, -1)

    index = faiss.IndexFlatL2(embeddings.shape[1])
    index.add(embeddings)
    D, I = index.search(query_vec, 1)
    return passages[I[0][0]]

In [None]:
def load_model(name, model_type):
    if model_type == "transformers":
        tokenizer = AutoTokenizer.from_pretrained(name)
        model = AutoModelForCausalLM.from_pretrained(name, device_map="auto", torch_dtype=torch.float16 if torch.cuda.is_available() else torch.float32)
        pipe = pipeline("text-generation", model=model, tokenizer=tokenizer)
        return lambda prompt: pipe(prompt, max_new_tokens=128, do_sample=True)[0]['generated_text']
    elif model_type == "llama-cpp":
        return Llama(model_path=name, n_ctx=2048, n_threads=4)

In [None]:
def generate_answer(model, query, context, model_type):
    prompt = f"Answer this question based on the context:\nContext: {context}\nQuestion: {query}"
    if model_type == "llama-cpp":
        return model(prompt)["choices"][0]["text"].strip()
    else:
        return model(prompt)

In [None]:
# STEP 4: Run RAG
import pandas as pd
results = []  # this will hold all results across experiments
def run_rag(model_name, model_type, embedder_name="all-MiniLM-L6-v2", chunk_size=300, chunk_overlap=30):
    print(f"\nüîç Running RAG with model: {model_name}")
    embedder = SentenceTransformer(embedder_name)
    documents = extract_text_from_pdfs()

    passages, doc_map, embeddings = embed_documents(
        documents,
        embedder,
        chunk_size=chunk_size,
        chunk_overlap=chunk_overlap,
        use_semantic=True
    )

    model = load_model(model_name, model_type)

    for doc, query in queries.items():
        print(f"\nüìÑ Document: {doc}")
        print(f"‚ùì Query: {query}")
        start = time.time()
        relevant = search(query, embedder, passages, embeddings)
        answer = generate_answer(model, query, relevant, model_type)
        end = time.time()
        print(f"üìå Retrieved: {relevant[:80]}...")
        print(f"üí¨ Answer: {answer.strip()}")
        print(f"‚ö° Speed: {round(end - start, 2)}s")

        # Append result to global list
        results.append({
            "Model": model_name,
            "Chunk Size": chunk_size,
            "Chunk Overlap": chunk_overlap,
            "Document": doc,
            "Query": query,
            "Retrieved Context": relevant[:80],
            "Answer": answer.strip(),
            "Time (s)": round(end - start, 2)
        })


In [None]:
# Clear results
results = []

# Small chunks (100 tokens), no overlap
run_rag("microsoft/phi-2", "transformers", chunk_size=100, chunk_overlap=0)

# Medium chunks (300 tokens), small overlap
run_rag("microsoft/phi-2", "transformers", chunk_size=300, chunk_overlap=30)

# Large chunks (500 tokens), large overlap
run_rag("microsoft/phi-2", "transformers", chunk_size=500, chunk_overlap=100)



üîç Running RAG with model: microsoft/phi-2


The secret `HF_TOKEN` does not exist in your Colab secrets.
To authenticate with the Hugging Face Hub, create a token in your settings tab (https://huggingface.co/settings/tokens), set it as secret in your Google Colab and restart your session.
You will be able to reuse this secret in all of your notebooks.
Please note that authentication is recommended but still optional to access public models or datasets.


modules.json:   0%|          | 0.00/349 [00:00<?, ?B/s]

config_sentence_transformers.json:   0%|          | 0.00/116 [00:00<?, ?B/s]

README.md: 0.00B [00:00, ?B/s]

sentence_bert_config.json:   0%|          | 0.00/53.0 [00:00<?, ?B/s]

config.json:   0%|          | 0.00/612 [00:00<?, ?B/s]

model.safetensors:   0%|          | 0.00/90.9M [00:00<?, ?B/s]

tokenizer_config.json:   0%|          | 0.00/350 [00:00<?, ?B/s]

vocab.txt: 0.00B [00:00, ?B/s]

tokenizer.json: 0.00B [00:00, ?B/s]

special_tokens_map.json:   0%|          | 0.00/112 [00:00<?, ?B/s]

config.json:   0%|          | 0.00/190 [00:00<?, ?B/s]


üîß Semantic Chunking | Size: 100 | Overlap: 0


modules.json:   0%|          | 0.00/349 [00:00<?, ?B/s]

config_sentence_transformers.json:   0%|          | 0.00/116 [00:00<?, ?B/s]

README.md: 0.00B [00:00, ?B/s]

sentence_bert_config.json:   0%|          | 0.00/53.0 [00:00<?, ?B/s]

config.json:   0%|          | 0.00/612 [00:00<?, ?B/s]

model.safetensors:   0%|          | 0.00/90.9M [00:00<?, ?B/s]

tokenizer_config.json:   0%|          | 0.00/350 [00:00<?, ?B/s]

vocab.txt: 0.00B [00:00, ?B/s]

tokenizer.json: 0.00B [00:00, ?B/s]

special_tokens_map.json:   0%|          | 0.00/112 [00:00<?, ?B/s]

config.json:   0%|          | 0.00/190 [00:00<?, ?B/s]

  return forward_call(*args, **kwargs)


‚úÖ Total Chunks Created: 23


tokenizer_config.json: 0.00B [00:00, ?B/s]

vocab.json: 0.00B [00:00, ?B/s]

merges.txt: 0.00B [00:00, ?B/s]

tokenizer.json: 0.00B [00:00, ?B/s]

added_tokens.json: 0.00B [00:00, ?B/s]

special_tokens_map.json:   0%|          | 0.00/99.0 [00:00<?, ?B/s]

config.json:   0%|          | 0.00/735 [00:00<?, ?B/s]

model.safetensors.index.json: 0.00B [00:00, ?B/s]

Fetching 2 files:   0%|          | 0/2 [00:00<?, ?it/s]

model-00002-of-00002.safetensors:   0%|          | 0.00/564M [00:00<?, ?B/s]

model-00001-of-00002.safetensors:   0%|          | 0.00/5.00G [00:00<?, ?B/s]

Loading checkpoint shards:   0%|          | 0/2 [00:00<?, ?it/s]

generation_config.json:   0%|          | 0.00/124 [00:00<?, ?B/s]

Device set to use cuda:0
Setting `pad_token_id` to `eos_token_id`:50256 for open-end generation.



üìÑ Document: appraisal.pdf
‚ùì Query: What is the estimated home value?


Setting `pad_token_id` to `eos_token_id`:50256 for open-end generation.


üìå Retrieved: Describe common elements and recreational facilities.
PUD INFORMATION
Freddie Ma...
üí¨ Answer: Answer this question based on the context:
Context: Describe common elements and recreational facilities.
PUD INFORMATION
Freddie Mac Form 70   March 2005
Fannie Mae Form 1004   March 2005
Page
of
Track No.
Site derived by extraction of land values from 
improved sales, due to the lack of recent comparable vacant land sales. 
X
Marshall & Swift Residential Cost Guide
Good
2006
-Reproduction cost and depreciation based on Marshall Swift cost 
guide, local builders, supplemented by appraisers knowledge. 
-Due to the scarcity of buildable land the Site/Value ratios tend to be 
higher than typical. 
-Site derived by extraction of land values from improved sales, due to 
the lack of recent comparable vacant land sales. 
-For square footage calculations, see Apex Addendum.
40-42
1,200,000
2,930
325
952,250
0
Porch/Patio/Fireplace/Decking
76,700
0
1,028,950
34
349,843
0
0
349,843
6

Setting `pad_token_id` to `eos_token_id`:50256 for open-end generation.


üìå Retrieved: Closing Costs (+)
Loan Amount (-)
Principal & Interest
Other Financing (P & I)
H...
üí¨ Answer: Answer this question based on the context:
Context: Closing Costs (+)
Loan Amount (-)
Principal & Interest
Other Financing (P & I)
Hazard Insurance
Real Estate Taxes
Mortgage Insurance
Homeowner Assn. Dues
Other
* PFC
F
POC
= Prepaid Finance Charge
= FHA Allowable Closing Cost
= Paid Outside of Closing
** B
S
Br
L
TP
C
= Borrower
= Seller
= Broker
= Lender
= Third Party
= Correspondent
Calyx Form - feews.frm (09/2015)
FEES WORKSHEET
John Q. Smith / Mary A. Smith
samplesmith
10/05/2015
30 YEAR FIXED -Purchase
XYZ Lender
$ 380,000
4.250 %
360 / 360 mths
475,000.00
1,121.53
4,520.00
380,000.00
Cash Deposit
5,000.00
needed to close
95,641.53
1,869.37
39.58
400.00
2,308.95
ORIGINATION CHARGES
Underwriting Fee
XYZ Lender
Borrower
$
550.00
Wire Transfer Fee
XYZ Lender
Borrower
$
75.00
Administration Fee
XYZ Lender
Borrower
$
445.00
OTHER CHARGES
Appraisal Fee
XYZ Lender
Borrower
$


Setting `pad_token_id` to `eos_token_id`:50256 for open-end generation.


üìå Retrieved: Payslip 
Zoonodle  Inc 
21023 Pearson Point Road 
Gate Avenue 
 
 
Date of Joini...
üí¨ Answer: Answer this question based on the context:
Context: Payslip 
Zoonodle  Inc 
21023 Pearson Point Road 
Gate Avenue 
 
 
Date of Joining 
: 2018-06-23 
Pay Period 
: August 2021 
Worked Days 
26 
Employee Name 
: Sally Harley 
Designation 
: Marketing Evecutive 
Department 
: Marketing 
 
 
 
Earnings 
Amount 
Deductions 
Amount 
Basic Pay 
10000
Provident Fund 
1200
Icentive Pay 
1000
Profesional Tax 
500
House Rent Allowance 
400
Loan 
400
Meal Allowance 
200
Total Earnings
11600
Total Deductions
2100
 
 
Net Pay
9500
 
 
9500 
Nine Thousand Five Hundred 
 
 
Employer Signature 
Employee Signature 
 
 
 
 
 
 
 
This is system generated payslip 

Question: What is the total net salary for this month?



First, calculate the total deductions. This can be done by adding up all the amounts deducted from the earnings, which gives us 2100.

Then, subtract the total deductions fro

Setting `pad_token_id` to `eos_token_id`:50256 for open-end generation.


üìå Retrieved: SERVICES
1.1 Service Provider agrees to provide Client with consulting services ...
üí¨ Answer: Answer this question based on the context:
Context: SERVICES
1.1 Service Provider agrees to provide Client with consulting services ("Services") as described in
Exhibit A attached hereto.
1.2 Service Provider shall use reasonable efforts to perform the Services in accordance with generally
accepted industry standards and practices.
2. PAYMENT
2.1 Client agrees to pay Service Provider for the Services at the rates specified in Exhibit B attached
hereto.
2.2 Service Provider shall invoice Client on a monthly basis for Services performed. Payment terms are
net 30 days from receipt of invoice.
2.3 Late payments shall bear interest at the rate of 1.5% per month from the due date until paid in full.
3. TERM AND TERMINATION
3.1 This Agreement shall commence on the Effective Date and shall continue for a period of one (1)
year, unless earlier terminated as provided herein.
3.2 Eithe

Loading checkpoint shards:   0%|          | 0/2 [00:00<?, ?it/s]

Device set to use cuda:0
Setting `pad_token_id` to `eos_token_id`:50256 for open-end generation.



üìÑ Document: appraisal.pdf
‚ùì Query: What is the estimated home value?


Setting `pad_token_id` to `eos_token_id`:50256 for open-end generation.


üìå Retrieved: Describe common elements and recreational facilities.
PUD INFORMATION
Freddie Ma...
üí¨ Answer: Answer this question based on the context:
Context: Describe common elements and recreational facilities.
PUD INFORMATION
Freddie Mac Form 70   March 2005
Fannie Mae Form 1004   March 2005
Page
of
Track No.
Site derived by extraction of land values from 
improved sales, due to the lack of recent comparable vacant land sales. 
X
Marshall & Swift Residential Cost Guide
Good
2006
-Reproduction cost and depreciation based on Marshall Swift cost 
guide, local builders, supplemented by appraisers knowledge. 
-Due to the scarcity of buildable land the Site/Value ratios tend to be 
higher than typical. 
-Site derived by extraction of land values from improved sales, due to 
the lack of recent comparable vacant land sales. 
-For square footage calculations, see Apex Addendum.
40-42
1,200,000
2,930
325
952,250
0
Porch/Patio/Fireplace/Decking
76,700
0
1,028,950
34
349,843
0
0
349,843
6

Setting `pad_token_id` to `eos_token_id`:50256 for open-end generation.


üìå Retrieved: Closing Costs (+)
Loan Amount (-)
Principal & Interest
Other Financing (P & I)
H...
üí¨ Answer: Answer this question based on the context:
Context: Closing Costs (+)
Loan Amount (-)
Principal & Interest
Other Financing (P & I)
Hazard Insurance
Real Estate Taxes
Mortgage Insurance
Homeowner Assn. Dues
Other
* PFC
F
POC
= Prepaid Finance Charge
= FHA Allowable Closing Cost
= Paid Outside of Closing
** B
S
Br
L
TP
C
= Borrower
= Seller
= Broker
= Lender
= Third Party
= Correspondent
Calyx Form - feews.frm (09/2015)
FEES WORKSHEET
John Q. Smith / Mary A. Smith
samplesmith
10/05/2015
30 YEAR FIXED -Purchase
XYZ Lender
$ 380,000
4.250 %
360 / 360 mths
475,000.00
1,121.53
4,520.00
380,000.00
Cash Deposit
5,000.00
needed to close
95,641.53
1,869.37
39.58
400.00
2,308.95
ORIGINATION CHARGES
Underwriting Fee
XYZ Lender
Borrower
$
550.00
Wire Transfer Fee
XYZ Lender
Borrower
$
75.00
Administration Fee
XYZ Lender
Borrower
$
445.00
OTHER CHARGES
Appraisal Fee
XYZ Lender
Borrower
$


Setting `pad_token_id` to `eos_token_id`:50256 for open-end generation.


üìå Retrieved: Payslip 
Zoonodle  Inc 
21023 Pearson Point Road 
Gate Avenue 
 
 
Date of Joini...
üí¨ Answer: Answer this question based on the context:
Context: Payslip 
Zoonodle  Inc 
21023 Pearson Point Road 
Gate Avenue 
 
 
Date of Joining 
: 2018-06-23 
Pay Period 
: August 2021 
Worked Days 
26 
Employee Name 
: Sally Harley 
Designation 
: Marketing Evecutive 
Department 
: Marketing 
 
 
 
Earnings 
Amount 
Deductions 
Amount 
Basic Pay 
10000
Provident Fund 
1200
Icentive Pay 
1000
Profesional Tax 
500
House Rent Allowance 
400
Loan 
400
Meal Allowance 
200
Total Earnings
11600
Total Deductions
2100
 
 
Net Pay
9500
 
 
9500 
Nine Thousand Five Hundred 
 
 
Employer Signature 
Employee Signature 
 
 
 
 
 
 
 
This is system generated payslip 

Question: What is the total net salary for this month?


Calculate the total earnings

Subtract the total deductions from the total earnings

Answer: The total net salary for this month is $9500.
‚ö° Speed: 1.52s

üìÑ Document: sam

Setting `pad_token_id` to `eos_token_id`:50256 for open-end generation.


üìå Retrieved: SERVICES
1.1 Service Provider agrees to provide Client with consulting services ...
üí¨ Answer: Answer this question based on the context:
Context: SERVICES
1.1 Service Provider agrees to provide Client with consulting services ("Services") as described in
Exhibit A attached hereto.
1.2 Service Provider shall use reasonable efforts to perform the Services in accordance with generally
accepted industry standards and practices.
2. PAYMENT
2.1 Client agrees to pay Service Provider for the Services at the rates specified in Exhibit B attached
hereto.
2.2 Service Provider shall invoice Client on a monthly basis for Services performed. Payment terms are
net 30 days from receipt of invoice.
2.3 Late payments shall bear interest at the rate of 1.5% per month from the due date until paid in full.
3. TERM AND TERMINATION
3.1 This Agreement shall commence on the Effective Date and shall continue for a period of one (1)
year, unless earlier terminated as provided herein.
3.2 Eithe

Loading checkpoint shards:   0%|          | 0/2 [00:00<?, ?it/s]

Device set to use cuda:0
Setting `pad_token_id` to `eos_token_id`:50256 for open-end generation.



üìÑ Document: appraisal.pdf
‚ùì Query: What is the estimated home value?


Setting `pad_token_id` to `eos_token_id`:50256 for open-end generation.


üìå Retrieved: Describe common elements and recreational facilities.
PUD INFORMATION
Freddie Ma...
üí¨ Answer: Answer this question based on the context:
Context: Describe common elements and recreational facilities.
PUD INFORMATION
Freddie Mac Form 70   March 2005
Fannie Mae Form 1004   March 2005
Page
of
Track No.
Site derived by extraction of land values from 
improved sales, due to the lack of recent comparable vacant land sales. 
X
Marshall & Swift Residential Cost Guide
Good
2006
-Reproduction cost and depreciation based on Marshall Swift cost 
guide, local builders, supplemented by appraisers knowledge. 
-Due to the scarcity of buildable land the Site/Value ratios tend to be 
higher than typical. 
-Site derived by extraction of land values from improved sales, due to 
the lack of recent comparable vacant land sales. 
-For square footage calculations, see Apex Addendum.
40-42
1,200,000
2,930
325
952,250
0
Porch/Patio/Fireplace/Decking
76,700
0
1,028,950
34
349,843
0
0
349,843
6

Setting `pad_token_id` to `eos_token_id`:50256 for open-end generation.


üìå Retrieved: Closing Costs (+)
Loan Amount (-)
Principal & Interest
Other Financing (P & I)
H...
üí¨ Answer: Answer this question based on the context:
Context: Closing Costs (+)
Loan Amount (-)
Principal & Interest
Other Financing (P & I)
Hazard Insurance
Real Estate Taxes
Mortgage Insurance
Homeowner Assn. Dues
Other
* PFC
F
POC
= Prepaid Finance Charge
= FHA Allowable Closing Cost
= Paid Outside of Closing
** B
S
Br
L
TP
C
= Borrower
= Seller
= Broker
= Lender
= Third Party
= Correspondent
Calyx Form - feews.frm (09/2015)
FEES WORKSHEET
John Q. Smith / Mary A. Smith
samplesmith
10/05/2015
30 YEAR FIXED -Purchase
XYZ Lender
$ 380,000
4.250 %
360 / 360 mths
475,000.00
1,121.53
4,520.00
380,000.00
Cash Deposit
5,000.00
needed to close
95,641.53
1,869.37
39.58
400.00
2,308.95
ORIGINATION CHARGES
Underwriting Fee
XYZ Lender
Borrower
$
550.00
Wire Transfer Fee
XYZ Lender
Borrower
$
75.00
Administration Fee
XYZ Lender
Borrower
$
445.00
OTHER CHARGES
Appraisal Fee
XYZ Lender
Borrower
$


Setting `pad_token_id` to `eos_token_id`:50256 for open-end generation.


üìå Retrieved: Payslip 
Zoonodle  Inc 
21023 Pearson Point Road 
Gate Avenue 
 
 
Date of Joini...
üí¨ Answer: Answer this question based on the context:
Context: Payslip 
Zoonodle  Inc 
21023 Pearson Point Road 
Gate Avenue 
 
 
Date of Joining 
: 2018-06-23 
Pay Period 
: August 2021 
Worked Days 
26 
Employee Name 
: Sally Harley 
Designation 
: Marketing Evecutive 
Department 
: Marketing 
 
 
 
Earnings 
Amount 
Deductions 
Amount 
Basic Pay 
10000
Provident Fund 
1200
Icentive Pay 
1000
Profesional Tax 
500
House Rent Allowance 
400
Loan 
400
Meal Allowance 
200
Total Earnings
11600
Total Deductions
2100
 
 
Net Pay
9500
 
 
9500 
Nine Thousand Five Hundred 
 
 
Employer Signature 
Employee Signature 
 
 
 
 
 
 
 
This is system generated payslip 

Question: What is the total net salary for this month?


First, add all the deductions from the total earnings, which are 1200 for the provident fund, 1000 for incentive pay, 500 for professional tax, 400 for house rent allowance, a

Setting `pad_token_id` to `eos_token_id`:50256 for open-end generation.


üìå Retrieved: SERVICES
1.1 Service Provider agrees to provide Client with consulting services ...
üí¨ Answer: Answer this question based on the context:
Context: SERVICES
1.1 Service Provider agrees to provide Client with consulting services ("Services") as described in
Exhibit A attached hereto.
1.2 Service Provider shall use reasonable efforts to perform the Services in accordance with generally
accepted industry standards and practices.
2. PAYMENT
2.1 Client agrees to pay Service Provider for the Services at the rates specified in Exhibit B attached
hereto.
2.2 Service Provider shall invoice Client on a monthly basis for Services performed. Payment terms are
net 30 days from receipt of invoice.
2.3 Late payments shall bear interest at the rate of 1.5% per month from the due date until paid in full.
3. TERM AND TERMINATION
3.1 This Agreement shall commence on the Effective Date and shall continue for a period of one (1)
year, unless earlier terminated as provided herein.
3.2 Eithe

In [None]:
run_rag("TinyLlama/TinyLlama-1.1B-Chat-v1.0", "transformers", chunk_size=100, chunk_overlap=0)
run_rag("TinyLlama/TinyLlama-1.1B-Chat-v1.0", "transformers", chunk_size=300, chunk_overlap=30)
run_rag("TinyLlama/TinyLlama-1.1B-Chat-v1.0", "transformers", chunk_size=500, chunk_overlap=100)


üîç Running RAG with model: TinyLlama/TinyLlama-1.1B-Chat-v1.0

üîß Semantic Chunking | Size: 100 | Overlap: 0
‚úÖ Total Chunks Created: 23


tokenizer_config.json: 0.00B [00:00, ?B/s]

tokenizer.model:   0%|          | 0.00/500k [00:00<?, ?B/s]

tokenizer.json: 0.00B [00:00, ?B/s]

special_tokens_map.json:   0%|          | 0.00/551 [00:00<?, ?B/s]

config.json:   0%|          | 0.00/608 [00:00<?, ?B/s]

model.safetensors:   0%|          | 0.00/2.20G [00:00<?, ?B/s]

generation_config.json:   0%|          | 0.00/124 [00:00<?, ?B/s]

Device set to use cuda:0



üìÑ Document: appraisal.pdf
‚ùì Query: What is the estimated home value?
üìå Retrieved: Describe common elements and recreational facilities.
PUD INFORMATION
Freddie Ma...
üí¨ Answer: Answer this question based on the context:
Context: Describe common elements and recreational facilities.
PUD INFORMATION
Freddie Mac Form 70   March 2005
Fannie Mae Form 1004   March 2005
Page
of
Track No.
Site derived by extraction of land values from 
improved sales, due to the lack of recent comparable vacant land sales. 
X
Marshall & Swift Residential Cost Guide
Good
2006
-Reproduction cost and depreciation based on Marshall Swift cost 
guide, local builders, supplemented by appraisers knowledge. 
-Due to the scarcity of buildable land the Site/Value ratios tend to be 
higher than typical. 
-Site derived by extraction of land values from improved sales, due to 
the lack of recent comparable vacant land sales. 
-For square footage calculations, see Apex Addendum.
40-42
1,200,000
2,930
325
952,250


Device set to use cuda:0



üìÑ Document: appraisal.pdf
‚ùì Query: What is the estimated home value?
üìå Retrieved: Describe common elements and recreational facilities.
PUD INFORMATION
Freddie Ma...
üí¨ Answer: Answer this question based on the context:
Context: Describe common elements and recreational facilities.
PUD INFORMATION
Freddie Mac Form 70   March 2005
Fannie Mae Form 1004   March 2005
Page
of
Track No.
Site derived by extraction of land values from 
improved sales, due to the lack of recent comparable vacant land sales. 
X
Marshall & Swift Residential Cost Guide
Good
2006
-Reproduction cost and depreciation based on Marshall Swift cost 
guide, local builders, supplemented by appraisers knowledge. 
-Due to the scarcity of buildable land the Site/Value ratios tend to be 
higher than typical. 
-Site derived by extraction of land values from improved sales, due to 
the lack of recent comparable vacant land sales. 
-For square footage calculations, see Apex Addendum.
40-42
1,200,000
2,930
325
952,250


Device set to use cuda:0



üìÑ Document: appraisal.pdf
‚ùì Query: What is the estimated home value?
üìå Retrieved: Describe common elements and recreational facilities.
PUD INFORMATION
Freddie Ma...
üí¨ Answer: Answer this question based on the context:
Context: Describe common elements and recreational facilities.
PUD INFORMATION
Freddie Mac Form 70   March 2005
Fannie Mae Form 1004   March 2005
Page
of
Track No.
Site derived by extraction of land values from 
improved sales, due to the lack of recent comparable vacant land sales. 
X
Marshall & Swift Residential Cost Guide
Good
2006
-Reproduction cost and depreciation based on Marshall Swift cost 
guide, local builders, supplemented by appraisers knowledge. 
-Due to the scarcity of buildable land the Site/Value ratios tend to be 
higher than typical. 
-Site derived by extraction of land values from improved sales, due to 
the lack of recent comparable vacant land sales. 
-For square footage calculations, see Apex Addendum.
40-42
1,200,000
2,930
325
952,250


In [None]:
run_rag("/content/mistral-7b-instruct-v0.2.Q4_K_M.gguf", "llama-cpp", chunk_size=100, chunk_overlap=0)
run_rag("/content/mistral-7b-instruct-v0.2.Q4_K_M.gguf", "llama-cpp", chunk_size=300, chunk_overlap=30)
run_rag("/content/mistral-7b-instruct-v0.2.Q4_K_M.gguf", "llama-cpp", chunk_size=500, chunk_overlap=100)


üîç Running RAG with model: /content/mistral-7b-instruct-v0.2.Q4_K_M.gguf

üîß Semantic Chunking | Size: 100 | Overlap: 0


llama_model_loader: loaded meta data with 24 key-value pairs and 291 tensors from /content/mistral-7b-instruct-v0.2.Q4_K_M.gguf (version GGUF V3 (latest))
llama_model_loader: Dumping metadata keys/values. Note: KV overrides do not apply in this output.
llama_model_loader: - kv   0:                       general.architecture str              = llama
llama_model_loader: - kv   1:                               general.name str              = mistralai_mistral-7b-instruct-v0.2
llama_model_loader: - kv   2:                       llama.context_length u32              = 32768
llama_model_loader: - kv   3:                     llama.embedding_length u32              = 4096
llama_model_loader: - kv   4:                          llama.block_count u32              = 32
llama_model_loader: - kv   5:                  llama.feed_forward_length u32              = 14336
llama_model_loader: - kv   6:                 llama.rope.dimension_count u32              = 128
llama_model_loader: - kv   7:         

‚úÖ Total Chunks Created: 23


init_tokenizer: initializing tokenizer for type 1
load: control token:      2 '</s>' is not marked as EOG
load: control token:      1 '<s>' is not marked as EOG
load: special_eos_id is not in special_eog_ids - the tokenizer config may be incorrect
load: special tokens cache size = 3
load: token to piece cache size = 0.1637 MB
print_info: arch             = llama
print_info: vocab_only       = 0
print_info: n_ctx_train      = 32768
print_info: n_embd           = 4096
print_info: n_layer          = 32
print_info: n_head           = 32
print_info: n_head_kv        = 8
print_info: n_rot            = 128
print_info: n_swa            = 0
print_info: is_swa_any       = 0
print_info: n_embd_head_k    = 128
print_info: n_embd_head_v    = 128
print_info: n_gqa            = 4
print_info: n_embd_k_gqa     = 1024
print_info: n_embd_v_gqa     = 1024
print_info: f_norm_eps       = 0.0e+00
print_info: f_norm_rms_eps   = 1.0e-05
print_info: f_clamp_kqv      = 0.0e+00
print_info: f_max_alibi_bias = 0.0e


üìÑ Document: appraisal.pdf
‚ùì Query: What is the estimated home value?


llama_perf_context_print:        load time =  216279.88 ms
llama_perf_context_print: prompt eval time =  216271.74 ms /   559 tokens (  386.89 ms per token,     2.58 tokens per second)
llama_perf_context_print:        eval time =   11409.06 ms /    15 runs   (  760.60 ms per token,     1.31 tokens per second)
llama_perf_context_print:       total time =  227697.96 ms /   574 tokens
Llama.generate: 12 prefix-match hit, remaining 813 prompt tokens to eval


üìå Retrieved: Describe common elements and recreational facilities.
PUD INFORMATION
Freddie Ma...
üí¨ Answer: Answer: The estimated home value is $1,918,
‚ö° Speed: 228.99s

üìÑ Document: sample_bank_statement.pdf
‚ùì Query: How much was the last transaction?


llama_perf_context_print:        load time =  216279.88 ms
llama_perf_context_print: prompt eval time =  272842.07 ms /   813 tokens (  335.60 ms per token,     2.98 tokens per second)
llama_perf_context_print:        eval time =    9890.10 ms /    15 runs   (  659.34 ms per token,     1.52 tokens per second)
llama_perf_context_print:       total time =  282745.41 ms /   828 tokens
Llama.generate: 12 prefix-match hit, remaining 305 prompt tokens to eval


üìå Retrieved: Closing Costs (+)
Loan Amount (-)
Principal & Interest
Other Financing (P & I)
H...
üí¨ Answer: What type of loan was it? Who were the involved parties? Who paid the
‚ö° Speed: 282.85s

üìÑ Document: payslip_sample_image.pdf
‚ùì Query: What is the total net salary for this month?


llama_perf_context_print:        load time =  216279.88 ms
llama_perf_context_print: prompt eval time =   99793.10 ms /   305 tokens (  327.19 ms per token,     3.06 tokens per second)
llama_perf_context_print:        eval time =    8824.62 ms /    15 runs   (  588.31 ms per token,     1.70 tokens per second)
llama_perf_context_print:       total time =  108627.70 ms /   320 tokens
Llama.generate: 12 prefix-match hit, remaining 402 prompt tokens to eval


üìå Retrieved: Payslip 
Zoonodle  Inc 
21023 Pearson Point Road 
Gate Avenue 
 
 
Date of Joini...
üí¨ Answer: Answer: The total net salary for this month for Sally Harley,
‚ö° Speed: 108.65s

üìÑ Document: sample_contract.pdf
‚ùì Query: What are the penalties for late payments?


llama_perf_context_print:        load time =  216279.88 ms
llama_perf_context_print: prompt eval time =  132442.27 ms /   402 tokens (  329.46 ms per token,     3.04 tokens per second)
llama_perf_context_print:        eval time =    9553.78 ms /    15 runs   (  636.92 ms per token,     1.57 tokens per second)
llama_perf_context_print:       total time =  142006.14 ms /   417 tokens
Llama.generate: 12 prefix-match hit, remaining 287 prompt tokens to eval


üìå Retrieved: SERVICES
1.1 Service Provider agrees to provide Client with consulting services ...
üí¨ Answer: Answer: The late payments shall bear interest at the rate of
‚ö° Speed: 142.02s

üìÑ Document: LenderFeesWorksheetNew.pdf
‚ùì Query: What is the total estimated monthly payment?


llama_perf_context_print:        load time =  216279.88 ms
llama_perf_context_print: prompt eval time =   94897.01 ms /   287 tokens (  330.65 ms per token,     3.02 tokens per second)
llama_perf_context_print:        eval time =    8328.96 ms /    15 runs   (  555.26 ms per token,     1.80 tokens per second)
llama_perf_context_print:       total time =  103236.03 ms /   302 tokens


üìå Retrieved: Your actual rate, payment, and cost could be higher. Get an official Loan Estima...
üí¨ Answer: Answer: The total estimated monthly payment is indicated in the document as T
‚ö° Speed: 103.26s

üîç Running RAG with model: /content/mistral-7b-instruct-v0.2.Q4_K_M.gguf

üîß Semantic Chunking | Size: 300 | Overlap: 30


llama_model_loader: loaded meta data with 24 key-value pairs and 291 tensors from /content/mistral-7b-instruct-v0.2.Q4_K_M.gguf (version GGUF V3 (latest))
llama_model_loader: Dumping metadata keys/values. Note: KV overrides do not apply in this output.
llama_model_loader: - kv   0:                       general.architecture str              = llama
llama_model_loader: - kv   1:                               general.name str              = mistralai_mistral-7b-instruct-v0.2
llama_model_loader: - kv   2:                       llama.context_length u32              = 32768
llama_model_loader: - kv   3:                     llama.embedding_length u32              = 4096
llama_model_loader: - kv   4:                          llama.block_count u32              = 32
llama_model_loader: - kv   5:                  llama.feed_forward_length u32              = 14336
llama_model_loader: - kv   6:                 llama.rope.dimension_count u32              = 128
llama_model_loader: - kv   7:         

‚úÖ Total Chunks Created: 23


load_tensors: tensor 'token_embd.weight' (q4_K) (and 98 others) cannot be used with preferred buffer type CPU_REPACK, using CPU instead
load_tensors:   CPU_REPACK model buffer size =  3204.00 MiB
load_tensors:   CPU_Mapped model buffer size =  4165.37 MiB
repack: repack tensor blk.0.attn_q.weight with q4_K_8x8
repack: repack tensor blk.0.attn_k.weight with q4_K_8x8
repack: repack tensor blk.0.attn_output.weight with q4_K_8x8
repack: repack tensor blk.0.ffn_gate.weight with q4_K_8x8
.repack: repack tensor blk.0.ffn_up.weight with q4_K_8x8
repack: repack tensor blk.1.attn_q.weight with q4_K_8x8
.repack: repack tensor blk.1.attn_k.weight with q4_K_8x8
repack: repack tensor blk.1.attn_output.weight with q4_K_8x8
repack: repack tensor blk.1.ffn_gate.weight with q4_K_8x8
.repack: repack tensor blk.1.ffn_up.weight with q4_K_8x8
repack: repack tensor blk.2.attn_q.weight with q4_K_8x8
.repack: repack tensor blk.2.attn_k.weight with q4_K_8x8
repack: repack tensor blk.2.attn_output.weight with q4


üìÑ Document: appraisal.pdf
‚ùì Query: What is the estimated home value?


llama_perf_context_print:        load time =  193936.74 ms
llama_perf_context_print: prompt eval time =  193935.51 ms /   559 tokens (  346.93 ms per token,     2.88 tokens per second)
llama_perf_context_print:        eval time =   13067.34 ms /    15 runs   (  871.16 ms per token,     1.15 tokens per second)
llama_perf_context_print:       total time =  207014.02 ms /   574 tokens
Llama.generate: 12 prefix-match hit, remaining 813 prompt tokens to eval


üìå Retrieved: Describe common elements and recreational facilities.
PUD INFORMATION
Freddie Ma...
üí¨ Answer: Answer: The estimated home value is $1,918,
‚ö° Speed: 207.16s

üìÑ Document: sample_bank_statement.pdf
‚ùì Query: How much was the last transaction?


llama_perf_context_print:        load time =  193936.74 ms
llama_perf_context_print: prompt eval time =  270954.25 ms /   813 tokens (  333.28 ms per token,     3.00 tokens per second)
llama_perf_context_print:        eval time =    9780.14 ms /    15 runs   (  652.01 ms per token,     1.53 tokens per second)
llama_perf_context_print:       total time =  280748.01 ms /   828 tokens
Llama.generate: 12 prefix-match hit, remaining 305 prompt tokens to eval


üìå Retrieved: Closing Costs (+)
Loan Amount (-)
Principal & Interest
Other Financing (P & I)
H...
üí¨ Answer: What type of loan was it? Who were the involved parties? Who paid the
‚ö° Speed: 280.87s

üìÑ Document: payslip_sample_image.pdf
‚ùì Query: What is the total net salary for this month?


llama_perf_context_print:        load time =  193936.74 ms
llama_perf_context_print: prompt eval time =   97422.11 ms /   305 tokens (  319.42 ms per token,     3.13 tokens per second)
llama_perf_context_print:        eval time =    9302.67 ms /    15 runs   (  620.18 ms per token,     1.61 tokens per second)
llama_perf_context_print:       total time =  106734.30 ms /   320 tokens
Llama.generate: 12 prefix-match hit, remaining 402 prompt tokens to eval


üìå Retrieved: Payslip 
Zoonodle  Inc 
21023 Pearson Point Road 
Gate Avenue 
 
 
Date of Joini...
üí¨ Answer: Answer: The total net salary for this month for Sally Harley,
‚ö° Speed: 106.77s

üìÑ Document: sample_contract.pdf
‚ùì Query: What are the penalties for late payments?


llama_perf_context_print:        load time =  193936.74 ms
llama_perf_context_print: prompt eval time =  127491.53 ms /   402 tokens (  317.14 ms per token,     3.15 tokens per second)
llama_perf_context_print:        eval time =    9398.27 ms /    15 runs   (  626.55 ms per token,     1.60 tokens per second)
llama_perf_context_print:       total time =  136899.45 ms /   417 tokens
Llama.generate: 12 prefix-match hit, remaining 287 prompt tokens to eval


üìå Retrieved: SERVICES
1.1 Service Provider agrees to provide Client with consulting services ...
üí¨ Answer: Answer: The late payments shall bear interest at the rate of
‚ö° Speed: 136.92s

üìÑ Document: LenderFeesWorksheetNew.pdf
‚ùì Query: What is the total estimated monthly payment?


llama_perf_context_print:        load time =  193936.74 ms
llama_perf_context_print: prompt eval time =   91621.55 ms /   287 tokens (  319.24 ms per token,     3.13 tokens per second)
llama_perf_context_print:        eval time =    8124.70 ms /    15 runs   (  541.65 ms per token,     1.85 tokens per second)
llama_perf_context_print:       total time =   99755.45 ms /   302 tokens


üìå Retrieved: Your actual rate, payment, and cost could be higher. Get an official Loan Estima...
üí¨ Answer: Answer: The total estimated monthly payment is indicated in the document as T
‚ö° Speed: 99.77s

üîç Running RAG with model: /content/mistral-7b-instruct-v0.2.Q4_K_M.gguf

üîß Semantic Chunking | Size: 500 | Overlap: 100


llama_model_loader: loaded meta data with 24 key-value pairs and 291 tensors from /content/mistral-7b-instruct-v0.2.Q4_K_M.gguf (version GGUF V3 (latest))
llama_model_loader: Dumping metadata keys/values. Note: KV overrides do not apply in this output.
llama_model_loader: - kv   0:                       general.architecture str              = llama
llama_model_loader: - kv   1:                               general.name str              = mistralai_mistral-7b-instruct-v0.2
llama_model_loader: - kv   2:                       llama.context_length u32              = 32768
llama_model_loader: - kv   3:                     llama.embedding_length u32              = 4096
llama_model_loader: - kv   4:                          llama.block_count u32              = 32
llama_model_loader: - kv   5:                  llama.feed_forward_length u32              = 14336
llama_model_loader: - kv   6:                 llama.rope.dimension_count u32              = 128
llama_model_loader: - kv   7:         

‚úÖ Total Chunks Created: 23


load_tensors:   CPU_REPACK model buffer size =  3204.00 MiB
load_tensors:   CPU_Mapped model buffer size =  4165.37 MiB
repack: repack tensor blk.0.attn_q.weight with q4_K_8x8
repack: repack tensor blk.0.attn_k.weight with q4_K_8x8
repack: repack tensor blk.0.attn_output.weight with q4_K_8x8
repack: repack tensor blk.0.ffn_gate.weight with q4_K_8x8
.repack: repack tensor blk.0.ffn_up.weight with q4_K_8x8
repack: repack tensor blk.1.attn_q.weight with q4_K_8x8
.repack: repack tensor blk.1.attn_k.weight with q4_K_8x8
repack: repack tensor blk.1.attn_output.weight with q4_K_8x8
repack: repack tensor blk.1.ffn_gate.weight with q4_K_8x8
.repack: repack tensor blk.1.ffn_up.weight with q4_K_8x8
repack: repack tensor blk.2.attn_q.weight with q4_K_8x8
.repack: repack tensor blk.2.attn_k.weight with q4_K_8x8
repack: repack tensor blk.2.attn_output.weight with q4_K_8x8
repack: repack tensor blk.2.ffn_gate.weight with q4_K_8x8
.repack: repack tensor blk.2.ffn_up.weight with q4_K_8x8
repack: repack


üìÑ Document: appraisal.pdf
‚ùì Query: What is the estimated home value?


llama_perf_context_print:        load time =  187035.07 ms
llama_perf_context_print: prompt eval time =  187034.28 ms /   559 tokens (  334.59 ms per token,     2.99 tokens per second)
llama_perf_context_print:        eval time =   11732.59 ms /    15 runs   (  782.17 ms per token,     1.28 tokens per second)
llama_perf_context_print:       total time =  198775.92 ms /   574 tokens
Llama.generate: 12 prefix-match hit, remaining 813 prompt tokens to eval


üìå Retrieved: Describe common elements and recreational facilities.
PUD INFORMATION
Freddie Ma...
üí¨ Answer: Answer: The estimated home value is $1,918,
‚ö° Speed: 198.89s

üìÑ Document: sample_bank_statement.pdf
‚ùì Query: How much was the last transaction?


llama_perf_context_print:        load time =  187035.07 ms
llama_perf_context_print: prompt eval time =  255601.58 ms /   813 tokens (  314.39 ms per token,     3.18 tokens per second)
llama_perf_context_print:        eval time =    8366.00 ms /    15 runs   (  557.73 ms per token,     1.79 tokens per second)
llama_perf_context_print:       total time =  263981.05 ms /   828 tokens
Llama.generate: 12 prefix-match hit, remaining 305 prompt tokens to eval


üìå Retrieved: Closing Costs (+)
Loan Amount (-)
Principal & Interest
Other Financing (P & I)
H...
üí¨ Answer: What type of loan was it? Who were the involved parties? Who paid the
‚ö° Speed: 264.11s

üìÑ Document: payslip_sample_image.pdf
‚ùì Query: What is the total net salary for this month?


llama_perf_context_print:        load time =  187035.07 ms
llama_perf_context_print: prompt eval time =   90535.27 ms /   305 tokens (  296.84 ms per token,     3.37 tokens per second)
llama_perf_context_print:        eval time =    8786.87 ms /    15 runs   (  585.79 ms per token,     1.71 tokens per second)
llama_perf_context_print:       total time =   99331.41 ms /   320 tokens
Llama.generate: 12 prefix-match hit, remaining 402 prompt tokens to eval


üìå Retrieved: Payslip 
Zoonodle  Inc 
21023 Pearson Point Road 
Gate Avenue 
 
 
Date of Joini...
üí¨ Answer: Answer: The total net salary for this month for Sally Harley,
‚ö° Speed: 99.36s

üìÑ Document: sample_contract.pdf
‚ùì Query: What are the penalties for late payments?


llama_perf_context_print:        load time =  187035.07 ms
llama_perf_context_print: prompt eval time =  119978.46 ms /   402 tokens (  298.45 ms per token,     3.35 tokens per second)
llama_perf_context_print:        eval time =    8779.51 ms /    15 runs   (  585.30 ms per token,     1.71 tokens per second)
llama_perf_context_print:       total time =  128766.71 ms /   417 tokens
Llama.generate: 12 prefix-match hit, remaining 287 prompt tokens to eval


üìå Retrieved: SERVICES
1.1 Service Provider agrees to provide Client with consulting services ...
üí¨ Answer: Answer: The late payments shall bear interest at the rate of
‚ö° Speed: 128.79s

üìÑ Document: LenderFeesWorksheetNew.pdf
‚ùì Query: What is the total estimated monthly payment?


llama_perf_context_print:        load time =  187035.07 ms
llama_perf_context_print: prompt eval time =   85066.68 ms /   287 tokens (  296.40 ms per token,     3.37 tokens per second)
llama_perf_context_print:        eval time =    8741.38 ms /    15 runs   (  582.76 ms per token,     1.72 tokens per second)
llama_perf_context_print:       total time =   93816.77 ms /   302 tokens


üìå Retrieved: Your actual rate, payment, and cost could be higher. Get an official Loan Estima...
üí¨ Answer: Answer: The total estimated monthly payment is indicated in the document as T
‚ö° Speed: 93.83s


In [None]:
""" # All model setups
model_configs = [
    {"name": "microsoft/phi-2", "type": "transformers"},
    {"name": "TinyLlama/TinyLlama-1.1B-Chat-v1.0", "type": "transformers"},
    {"name": "/content/mistral-7b-instruct-v0.2.Q4_K_M.gguf", "type": "llama-cpp"},
]

# Chunk sizes and overlaps to test
chunk_configs = [
    {"chunk_size": 100, "chunk_overlap": 0},
    {"chunk_size": 300, "chunk_overlap": 30},
    {"chunk_size": 500, "chunk_overlap": 100},
]
"""

' # All model setups\nmodel_configs = [\n    {"name": "microsoft/phi-2", "type": "transformers"},\n    {"name": "TinyLlama/TinyLlama-1.1B-Chat-v1.0", "type": "transformers"},\n    {"name": "/content/mistral-7b-instruct-v0.2.Q4_K_M.gguf", "type": "llama-cpp"},\n]\n\n# Chunk sizes and overlaps to test\nchunk_configs = [\n    {"chunk_size": 100, "chunk_overlap": 0},\n    {"chunk_size": 300, "chunk_overlap": 30},\n    {"chunk_size": 500, "chunk_overlap": 100},\n]\n'

In [None]:
""" results = []

for model_config in model_configs:
    for chunk_config in chunk_configs:
        run_rag(
            model_name=model_config["name"],
            model_type=model_config["type"],
            chunk_size=chunk_config["chunk_size"],
            chunk_overlap=chunk_config["chunk_overlap"]
        )
"""

' results = []\n\nfor model_config in model_configs:\n    for chunk_config in chunk_configs:\n        run_rag(\n            model_name=model_config["name"],\n            model_type=model_config["type"],\n            chunk_size=chunk_config["chunk_size"],\n            chunk_overlap=chunk_config["chunk_overlap"]\n        )\n'

In [None]:
df_results = pd.DataFrame(results)
df_results

Unnamed: 0,Model,Chunk Size,Chunk Overlap,Document,Query,Retrieved Context,Answer,Time (s)
0,microsoft/phi-2,100,0,appraisal.pdf,What is the estimated home value?,Describe common elements and recreational faci...,Answer this question based on the context:\nCo...,7.03
1,microsoft/phi-2,100,0,sample_bank_statement.pdf,How much was the last transaction?,Closing Costs (+)\nLoan Amount (-)\nPrincipal ...,Answer this question based on the context:\nCo...,0.82
2,microsoft/phi-2,100,0,payslip_sample_image.pdf,What is the total net salary for this month?,Payslip \nZoonodle Inc \n21023 Pearson Point ...,Answer this question based on the context:\nCo...,3.03
3,microsoft/phi-2,100,0,sample_contract.pdf,What are the penalties for late payments?,SERVICES\n1.1 Service Provider agrees to provi...,Answer this question based on the context:\nCo...,4.83
4,microsoft/phi-2,100,0,LenderFeesWorksheetNew.pdf,What is the total estimated monthly payment?,"Your actual rate, payment, and cost could be h...",Answer this question based on the context:\nCo...,5.69
5,microsoft/phi-2,300,30,appraisal.pdf,What is the estimated home value?,Describe common elements and recreational faci...,Answer this question based on the context:\nCo...,5.88
6,microsoft/phi-2,300,30,sample_bank_statement.pdf,How much was the last transaction?,Closing Costs (+)\nLoan Amount (-)\nPrincipal ...,Answer this question based on the context:\nCo...,4.92
7,microsoft/phi-2,300,30,payslip_sample_image.pdf,What is the total net salary for this month?,Payslip \nZoonodle Inc \n21023 Pearson Point ...,Answer this question based on the context:\nCo...,1.52
8,microsoft/phi-2,300,30,sample_contract.pdf,What are the penalties for late payments?,SERVICES\n1.1 Service Provider agrees to provi...,Answer this question based on the context:\nCo...,5.52
9,microsoft/phi-2,300,30,LenderFeesWorksheetNew.pdf,What is the total estimated monthly payment?,"Your actual rate, payment, and cost could be h...",Answer this question based on the context:\nCo...,4.8


In [None]:
from google.colab import sheets
sheet = sheets.InteractiveSheet(df=df_results)

https://docs.google.com/spreadsheets/d/1uPXMG_3qPJ4ETCBQUCCPk-Q899eHK6XRs38oIh0DJLk/edit#gid=0


In [None]:
# Save to CSV
df_results.to_csv("rag_results.csv", index=False)

# Download to your local machine
from google.colab import files
files.download("rag_results.csv")