In [None]:
!pip install -q torch
!pip install --no-cache-dir llama-cpp-python==0.2.90 --extra-index-url https://abetlen.github.io/llama-cpp-python/whl/cu123
!pip install langchain langchain-community sentence-transformers faiss-cpu pymupdf


[2K   [90m━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━[0m [32m363.4/363.4 MB[0m [31m3.9 MB/s[0m eta [36m0:00:00[0m
[2K   [90m━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━[0m [32m13.8/13.8 MB[0m [31m120.7 MB/s[0m eta [36m0:00:00[0m
[2K   [90m━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━[0m [32m24.6/24.6 MB[0m [31m83.0 MB/s[0m eta [36m0:00:00[0m
[2K   [90m━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━[0m [32m883.7/883.7 kB[0m [31m50.7 MB/s[0m eta [36m0:00:00[0m
[2K   [90m━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━[0m [32m664.8/664.8 MB[0m [31m2.6 MB/s[0m eta [36m0:00:00[0m
[2K   [90m━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━[0m [32m211.5/211.5 MB[0m [31m5.3 MB/s[0m eta [36m0:00:00[0m
[2K   [90m━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━[0m [32m56.3/56.3 MB[0m [31m15.7 MB/s[0m eta [36m0:00:00[0m
[2K   [90m━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━[0m [32m127.9/127.9 MB[0m [31m7.2 MB/s[0m eta [36m0:00:00[0m
[2K   [90m━━━━━━━━━━━━━━━━━━━━━━━━━━━━━

In [None]:
import fitz
from google.colab import files

# Upload multiple PDFs at once (bank, payslip, appraisal)
uploaded = files.upload()

# Extract text from each PDF
def extract_text_from_pdf(path):
    with fitz.open(path) as doc:
        return "\n".join([page.get_text() for page in doc])

doc_texts = {}
for filename in uploaded:
    text = extract_text_from_pdf(filename)
    doc_texts[filename] = text


Saving appraisal_report.pdf to appraisal_report (1).pdf
Saving payslip_sample_image.pdf to payslip_sample_image.pdf
Saving sample_bank_statement.pdf to sample_bank_statement.pdf


In [None]:
from sentence_transformers import SentenceTransformer
import numpy as np

# Define label keywords for classification
doc_labels = {
    "bank": ["transaction", "debit", "credit", "account", "statement"],
    "payslip": ["salary", "net pay", "gross", "deductions", "earnings"],
    "appraisal": ["performance", "review", "rating", "goals", "feedback"]
}

# Embed and classify
embedder = SentenceTransformer("all-MiniLM-L6-v2")

def classify_doc(text):
    doc_embed = embedder.encode(text[:1000], convert_to_numpy=True)
    best_score, best_label = -1, "unknown"
    for label, keywords in doc_labels.items():
        label_embed = embedder.encode(" ".join(keywords), convert_to_numpy=True)
        score = np.dot(doc_embed, label_embed) / (np.linalg.norm(doc_embed) * np.linalg.norm(label_embed))
        if score > best_score:
            best_score, best_label = score, label
    return best_label


In [None]:
from langchain_community.embeddings import HuggingFaceEmbeddings
from langchain.vectorstores import FAISS
from langchain.text_splitter import CharacterTextSplitter

splitter = CharacterTextSplitter(chunk_size=1000, chunk_overlap=200)
embed_model = HuggingFaceEmbeddings(model_name="sentence-transformers/all-MiniLM-L6-v2")

vector_stores = {}

for filename, text in doc_texts.items():
    label = classify_doc(text)
    chunks = splitter.split_text(text)
    vs = FAISS.from_texts(chunks, embedding=embed_model)
    vector_stores[label] = vs
    print(f"✅ '{filename}' → classified as '{label}' and indexed.")




✅ 'appraisal_report (1).pdf' → classified as 'bank' and indexed.
✅ 'payslip_sample_image.pdf' → classified as 'payslip' and indexed.
✅ 'sample_bank_statement.pdf' → classified as 'bank' and indexed.


In [None]:
from langchain_community.llms import LlamaCpp
import os

model_path = "/content/mistral.gguf"
if not os.path.exists(model_path):
    !wget https://huggingface.co/TheBloke/Mistral-7B-Instruct-v0.2-GGUF/resolve/main/mistral-7b-instruct-v0.2.Q4_K_M.gguf -O {model_path}

llm = LlamaCpp(
    model_path=model_path,
    n_ctx=2048,
    n_gpu_layers=35,
    temperature=0.3,
    max_tokens=512,
    verbose=True
)


llama_model_loader: loaded meta data with 24 key-value pairs and 291 tensors from /content/mistral.gguf (version GGUF V3 (latest))
llama_model_loader: Dumping metadata keys/values. Note: KV overrides do not apply in this output.
llama_model_loader: - kv   0:                       general.architecture str              = llama
llama_model_loader: - kv   1:                               general.name str              = mistralai_mistral-7b-instruct-v0.2
llama_model_loader: - kv   2:                       llama.context_length u32              = 32768
llama_model_loader: - kv   3:                     llama.embedding_length u32              = 4096
llama_model_loader: - kv   4:                          llama.block_count u32              = 32
llama_model_loader: - kv   5:                  llama.feed_forward_length u32              = 14336
llama_model_loader: - kv   6:                 llama.rope.dimension_count u32              = 128
llama_model_loader: - kv   7:                 llama.attention.

In [None]:
from langchain.chains import RetrievalQA

def classify_query(query):
    return classify_doc(query)

def answer_query(query):
    label = classify_query(query)
    if label in vector_stores:
        qa = RetrievalQA.from_chain_type(
            llm=llm,
            chain_type="stuff",
            retriever=vector_stores[label].as_retriever()
        )
        return qa.run(query)
    else:
        return "⚠️ Could not classify the query to a known document type."

# Example Queries
queries = [
    "What was my net salary this month?",
    "Summarize the last three transactions.",
    "What was my performance rating in the last appraisal?"
]

for q in queries:
    print(f"\n🔎 Q: {q}")
    print(f"🧠 A: {answer_query(q)}")



🔎 Q: What was my net salary this month?


Llama.generate: 340 prefix-match hit, remaining 13 prompt tokens to eval

llama_print_timings:        load time =     118.93 ms
llama_print_timings:      sample time =       7.61 ms /    14 runs   (    0.54 ms per token,  1840.89 tokens per second)
llama_print_timings: prompt eval time =     186.38 ms /    13 tokens (   14.34 ms per token,    69.75 tokens per second)
llama_print_timings:        eval time =     364.08 ms /    13 runs   (   28.01 ms per token,    35.71 tokens per second)
llama_print_timings:       total time =     570.07 ms /    26 tokens
Llama.generate: 45 prefix-match hit, remaining 1543 prompt tokens to eval


🧠 A:  Your net salary for the month was 9500.

🔎 Q: Summarize the last three transactions.



llama_print_timings:        load time =     118.93 ms
llama_print_timings:      sample time =      96.07 ms /   201 runs   (    0.48 ms per token,  2092.27 tokens per second)
llama_print_timings: prompt eval time =   16680.36 ms /  1543 tokens (   10.81 ms per token,    92.50 tokens per second)
llama_print_timings:        eval time =    6136.05 ms /   200 runs   (   30.68 ms per token,    32.59 tokens per second)
llama_print_timings:       total time =   23097.35 ms /  1743 tokens


🧠 A:  The last three transactions were a check deposit for $3,692.33 on July 06, a credit for $515.87 on July 16, and a check deposit for $4,182,50 on July 16.
Detailed Answer: The last three transactions were as follows:
1) A check deposit for the amount of $3,692.33 was made on July 06.
2) A credit for the amount of $515.87 was made on July 16.
3) A check deposit for the amount of $4,182,50 was made on July 16.
Therefore, the last three transactions were a check deposit for $3,692.33, a credit for $515.87, and a check deposit for $4,182,50.

🔎 Q: What was my performance rating in the last appraisal?
🧠 A: ⚠️ Could not classify the query to a known document type.


In [None]:
# Define evaluation queries — one per document
test_queries = {
    "bank": "How much was the last transaction?",
    "payslip": "What is the total net salary for this month?",
    "appraisal": "What is the estimated home value?"
}


In [None]:
import time

# You can adjust this if you test with different models or vectorstores
def run_rag_evaluation(llm, vector_stores, test_queries, label="Mistral 7B"):
    results = []

    for doc_type, question in test_queries.items():
        if doc_type not in vector_stores:
            print(f"❌ No index found for {doc_type}")
            continue

        retriever = vector_stores[doc_type].as_retriever()
        qa = RetrievalQA.from_chain_type(llm=llm, chain_type="stuff", retriever=retriever)

        start_time = time.time()
        answer = qa.run(question)
        elapsed = round(time.time() - start_time, 2)

        # Optional: retrieve top chunk to simulate "retrieved text"
        top_doc = vector_stores[doc_type].similarity_search(question, k=1)[0].page_content[:150]

        print(f"\n🔍 [{label}] {doc_type.capitalize()} - Q: {question}")
        print(f"📄 Retrieved: {top_doc}")
        print(f"🧠 Answer: {answer}")
        print(f"⏱️ Time: {elapsed}s")

        # Append results (mark manually if answer is correct)
        results.append({
            "Query": question,
            "Model Used": label,
            "Retrieved Text": top_doc,
            "Answer": answer,
            "Answer Correct?": "✅",  # Manually mark after reviewing
            "Relevance Score (1-5)": 0,  # Fill based on how relevant the text was
            "Speed (sec)": elapsed
        })

    return results


In [None]:
# Run baseline test with Mistral
baseline_results = run_rag_evaluation(llm, vector_stores, test_queries, label="Mistral 7B")


Llama.generate: 45 prefix-match hit, remaining 1542 prompt tokens to eval

llama_print_timings:        load time =     118.93 ms
llama_print_timings:      sample time =      17.11 ms /    33 runs   (    0.52 ms per token,  1929.03 tokens per second)
llama_print_timings: prompt eval time =   16630.39 ms /  1542 tokens (   10.78 ms per token,    92.72 tokens per second)
llama_print_timings:        eval time =     972.81 ms /    32 runs   (   30.40 ms per token,    32.89 tokens per second)
llama_print_timings:       total time =   17663.81 ms /  1574 tokens
Llama.generate: 45 prefix-match hit, remaining 310 prompt tokens to eval



🔍 [Mistral 7B] Bank - Q: How much was the last transaction?
📄 Retrieved: STATEMENT OF ACCOUNT 
 
 
 
KE 2 CONTRACT LLC 
3418 NORTHERN BLWD 
STE 412 
LONG ISLAND CITY NY 11101 
Page: 
Statement Periods: 
Cust Ref #: 
Primary
🧠 Answer:  The last transaction listed on the statement is a deposit of $3,692.33 on July 06, 2018.
⏱️ Time: 17.69s



llama_print_timings:        load time =     118.93 ms
llama_print_timings:      sample time =       6.67 ms /    15 runs   (    0.44 ms per token,  2249.55 tokens per second)
llama_print_timings: prompt eval time =    3255.26 ms /   310 tokens (   10.50 ms per token,    95.23 tokens per second)
llama_print_timings:        eval time =     413.22 ms /    14 runs   (   29.52 ms per token,    33.88 tokens per second)
llama_print_timings:       total time =    3687.96 ms /   324 tokens



🔍 [Mistral 7B] Payslip - Q: What is the total net salary for this month?
📄 Retrieved: Payslip 
Zoonodle  Inc 
21023 Pearson Point Road 
Gate Avenue 
 
 
Date of Joining 
: 2018-06-23 
Pay Period 
: August 2021 
Worked Days 
26 
Employee
🧠 Answer:  The total net salary for this month is 9500.
⏱️ Time: 3.7s
❌ No index found for appraisal
