# Imports

In [None]:
!pip install faiss-cpu
!pip install faiss-gpu-cu12

Collecting faiss-gpu-cu12
  Downloading faiss_gpu_cu12-1.11.0-cp311-cp311-manylinux_2_17_x86_64.manylinux2014_x86_64.whl.metadata (11 kB)
Collecting numpy<2 (from faiss-gpu-cu12)
  Downloading numpy-1.26.4-cp311-cp311-manylinux_2_17_x86_64.manylinux2014_x86_64.whl.metadata (61 kB)
[2K     [90m━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━[0m [32m61.0/61.0 kB[0m [31m3.9 MB/s[0m eta [36m0:00:00[0m
Downloading faiss_gpu_cu12-1.11.0-cp311-cp311-manylinux_2_17_x86_64.manylinux2014_x86_64.whl (48.0 MB)
[2K   [90m━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━[0m [32m48.0/48.0 MB[0m [31m20.3 MB/s[0m eta [36m0:00:00[0m
[?25hDownloading numpy-1.26.4-cp311-cp311-manylinux_2_17_x86_64.manylinux2014_x86_64.whl (18.3 MB)
[2K   [90m━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━[0m [32m18.3/18.3 MB[0m [31m99.7 MB/s[0m eta [36m0:00:00[0m
[?25hInstalling collected packages: numpy, faiss-gpu-cu12
  Attempting uninstall: numpy
    Found existing installation: numpy 2.0.2
    Uninstalling numpy-

In [None]:
import json
import faiss
import numpy as np
from sentence_transformers import SentenceTransformer
from transformers import AutoTokenizer, AutoModelForCausalLM
import torch
from transformers import pipeline
from typing import List

# RAG Dataset

## JSON QA Pairs

In [None]:
with open("qa_pairs.json", "r", encoding="utf-8") as f:
    qa_data = json.load(f)["questions"]

In [None]:
qa_data[:5]

[{'question': 'What are the profit payment and profit rate for NUST Asaan Account Savings Account?',
  'answer': 'Profit Payment is Semi-Annually, Profit Rate is 19.00%.'},
 {'question': 'What are the profit payment and profit rate for Profit Loss Sharing Savings Account?',
  'answer': 'Profit Payment is Semi-Annually, Profit Rate is 19.00%.'},
 {'question': 'What are the profit payment and profit rate for Little Champs Account Savings Account?',
  'answer': 'Profit Payment is Semi-Annually, Profit Rate is 19.00%.'},
 {'question': 'What are the profit payment and profit rate for NUST Special Deposit Account (ASDA) Savings Account?',
  'answer': 'Profit Payment is Monthly/Semi-Annually, Profit Rate is 19.00%.'},
 {'question': 'What are the profit payment and profit rate for NUST Waqaar Account - Senior Citizen Savings Account?',
  'answer': 'Profit Payment is Monthly, Profit Rate is 19.00%.'}]

## Chunking

In [None]:
documents = [f"Q: {item['question']}\nA: {item['answer']}" for item in qa_data]

In [None]:
documents[:5]

['Q: What are the profit payment and profit rate for NUST Asaan Account Savings Account?\nA: Profit Payment is Semi-Annually, Profit Rate is 19.00%.',
 'Q: What are the profit payment and profit rate for Profit Loss Sharing Savings Account?\nA: Profit Payment is Semi-Annually, Profit Rate is 19.00%.',
 'Q: What are the profit payment and profit rate for Little Champs Account Savings Account?\nA: Profit Payment is Semi-Annually, Profit Rate is 19.00%.',
 'Q: What are the profit payment and profit rate for NUST Special Deposit Account (ASDA) Savings Account?\nA: Profit Payment is Monthly/Semi-Annually, Profit Rate is 19.00%.',
 'Q: What are the profit payment and profit rate for NUST Waqaar Account - Senior Citizen Savings Account?\nA: Profit Payment is Monthly, Profit Rate is 19.00%.']

In [None]:
len(documents)

327

# Embedding Model

In [None]:
embedding_model = SentenceTransformer("all-MiniLM-L6-v2")

The secret `HF_TOKEN` does not exist in your Colab secrets.
To authenticate with the Hugging Face Hub, create a token in your settings tab (https://huggingface.co/settings/tokens), set it as secret in your Google Colab and restart your session.
You will be able to reuse this secret in all of your notebooks.
Please note that authentication is recommended but still optional to access public models or datasets.


modules.json:   0%|          | 0.00/349 [00:00<?, ?B/s]

config_sentence_transformers.json:   0%|          | 0.00/116 [00:00<?, ?B/s]

README.md:   0%|          | 0.00/10.5k [00:00<?, ?B/s]

sentence_bert_config.json:   0%|          | 0.00/53.0 [00:00<?, ?B/s]

config.json:   0%|          | 0.00/612 [00:00<?, ?B/s]

Xet Storage is enabled for this repo, but the 'hf_xet' package is not installed. Falling back to regular HTTP download. For better performance, install the package with: `pip install huggingface_hub[hf_xet]` or `pip install hf_xet`


model.safetensors:   0%|          | 0.00/90.9M [00:00<?, ?B/s]

tokenizer_config.json:   0%|          | 0.00/350 [00:00<?, ?B/s]

vocab.txt:   0%|          | 0.00/232k [00:00<?, ?B/s]

tokenizer.json:   0%|          | 0.00/466k [00:00<?, ?B/s]

special_tokens_map.json:   0%|          | 0.00/112 [00:00<?, ?B/s]

config.json:   0%|          | 0.00/190 [00:00<?, ?B/s]

In [None]:
embeddings = embedding_model.encode(documents, convert_to_numpy=True)

In [None]:
embeddings[0][:10]

array([ 0.02455929,  0.08280013, -0.05625945, -0.04772451,  0.02212636,
        0.04610027,  0.01438646,  0.04925155,  0.0171691 ,  0.03099133],
      dtype=float32)

# FAISS Index

## Building

In [None]:
dimension = embeddings.shape[1]
index = faiss.IndexFlatL2(dimension)
index.add(embeddings)

## Persisting

In [None]:
faiss.write_index(index, "bank-data_index.faiss")

In [None]:
index = faiss.read_index("bank-data_index.faiss")

# Retrieval

## Retireving Docs

In [None]:
def retrieve(query: str, top_k=3) -> List[str]:
    query_vec = embedding_model.encode([query])
    distances, indices = index.search(query_vec, top_k)
    return [documents[i] for i in indices[0]]

## Instruct Model

In [None]:
model_name = "Qwen/Qwen3-4B"
tokenizer = AutoTokenizer.from_pretrained(model_name)
model = AutoModelForCausalLM.from_pretrained(model_name, device_map="auto", torch_dtype=torch.float16)

tokenizer_config.json:   0%|          | 0.00/9.68k [00:00<?, ?B/s]

vocab.json:   0%|          | 0.00/2.78M [00:00<?, ?B/s]

merges.txt:   0%|          | 0.00/1.67M [00:00<?, ?B/s]

Xet Storage is enabled for this repo, but the 'hf_xet' package is not installed. Falling back to regular HTTP download. For better performance, install the package with: `pip install huggingface_hub[hf_xet]` or `pip install hf_xet`


tokenizer.json:   0%|          | 0.00/11.4M [00:00<?, ?B/s]

config.json:   0%|          | 0.00/726 [00:00<?, ?B/s]

model.safetensors.index.json:   0%|          | 0.00/32.8k [00:00<?, ?B/s]

Fetching 3 files:   0%|          | 0/3 [00:00<?, ?it/s]

Xet Storage is enabled for this repo, but the 'hf_xet' package is not installed. Falling back to regular HTTP download. For better performance, install the package with: `pip install huggingface_hub[hf_xet]` or `pip install hf_xet`
Xet Storage is enabled for this repo, but the 'hf_xet' package is not installed. Falling back to regular HTTP download. For better performance, install the package with: `pip install huggingface_hub[hf_xet]` or `pip install hf_xet`
Xet Storage is enabled for this repo, but the 'hf_xet' package is not installed. Falling back to regular HTTP download. For better performance, install the package with: `pip install huggingface_hub[hf_xet]` or `pip install hf_xet`


model-00001-of-00003.safetensors:   0%|          | 0.00/3.96G [00:00<?, ?B/s]

model-00003-of-00003.safetensors:   0%|          | 0.00/99.6M [00:00<?, ?B/s]

model-00002-of-00003.safetensors:   0%|          | 0.00/3.99G [00:00<?, ?B/s]

Loading checkpoint shards:   0%|          | 0/3 [00:00<?, ?it/s]

generation_config.json:   0%|          | 0.00/239 [00:00<?, ?B/s]

# Querying

## Prompting

In [None]:
def build_prompt(query: str, context_docs: List[str]) -> str:
    context = "\n---\n".join(context_docs)
    return (
        f"<|system|>>\nYou are a helpful banking assistant. You are provided with NUST Bank FAQ relevant to the query of the user. Your job is to give direct answers according to the provided context. NOT from your own knowledge. Just give the answer in a user friendly manner nothing more\n"
        f"Context:\n\n{context}\n"
        f"---\n</s>"
        f"<|user|>\n {query}\n</s>"
        f"<|assistant|>"
    )

## Asking

In [None]:
def ask(query: str):
    context_docs = retrieve(query)
    prompt = build_prompt(query, context_docs)
    input_ids = tokenizer(prompt, return_tensors="pt", truncation=True).input_ids.to(model.device)
    output_ids = model.generate(input_ids, max_new_tokens=200, temperature=0.7)
    answer = tokenizer.decode(output_ids[0], skip_special_tokens=True)
    print(f"Answer:\n{answer}")

In [None]:
ask("What is the profit rate for NUST Maximiser Savings Account?")

Answer:
<|system|>>
You are a helpful banking assistant. You are provided with NUST Bank FAQ relevant to the query of the user. Your job is to give direct answers according to the provided context. NOT from your own knowledge. Just give the answer in a user friendly manner nothing more
Context:

Q: What are the profit payment and profit rate for NUST Maximiser Savings Account?
A: When Profit Payment is Monthly, Profit Rate is 19.00%. When Profit Payment is Quarterly, Profit Rate is 19.05%. When Profit Payment is Semi-Annually, Profit Rate is 19.10%. When Profit Payment is Annually, Profit Rate is 19.15%.
---
Q: What are the profit payment and profit rate for NUST Asaan Account Savings Account?
A: Profit Payment is Semi-Annually, Profit Rate is 19.00%.
---
Q: What are the main Features of NUST Maximiser Savings Account? for NUST Maximiser Account
A: -  Attractive profit payment on monthly, quarterly, semi-annual & annual basis: - Minimum monthly average balance requirement Rs. 100,000 -