In [1]:
import torch
import json
import faiss
import numpy as np
from pathlib import Path
from peft import PeftModel
from transformers import AutoTokenizer, AutoModelForCausalLM
from sentence_transformers import SentenceTransformer




In [2]:
base_model_path = Path(r"E:\PGDBA course\IIT KGP\Project\Model")   # or any other HF base
lora_adapter_path = Path(r"E:\PGDBA course\IIT KGP\Project\lora_adapter-20250514T114804Z-1-001\lora_adapter")
faiss_index_path = Path(r"E:\PGDBA course\IIT KGP\Project\tata_vector_improved.index")
metadata_path = Path(r"E:\PGDBA course\IIT KGP\Project\tata_metadata_improved.json")

In [3]:
tokenizer = AutoTokenizer.from_pretrained(base_model_path, use_fast=True)
tokenizer.pad_token = tokenizer.eos_token

base_model = AutoModelForCausalLM.from_pretrained(
    base_model_path,
    torch_dtype=torch.float16,
    device_map="auto",
    trust_remote_code=True
)

In [4]:
model = PeftModel.from_pretrained(base_model, lora_adapter_path)
model.eval()

The installed version of bitsandbytes was compiled without GPU support. 8-bit optimizers, 8-bit multiplication, and GPU quantization are unavailable.


PeftModelForCausalLM(
  (base_model): LoraModel(
    (model): LlamaForCausalLM(
      (model): LlamaModel(
        (embed_tokens): Embedding(32000, 2048)
        (layers): ModuleList(
          (0-21): 22 x LlamaDecoderLayer(
            (self_attn): LlamaAttention(
              (q_proj): lora.Linear(
                (base_layer): Linear(in_features=2048, out_features=2048, bias=False)
                (lora_dropout): ModuleDict(
                  (default): Dropout(p=0.05, inplace=False)
                )
                (lora_A): ModuleDict(
                  (default): Linear(in_features=2048, out_features=8, bias=False)
                )
                (lora_B): ModuleDict(
                  (default): Linear(in_features=8, out_features=2048, bias=False)
                )
                (lora_embedding_A): ParameterDict()
                (lora_embedding_B): ParameterDict()
                (lora_magnitude_vector): ModuleDict()
              )
              (k_proj): Linear(in_feat

In [5]:
index = faiss.read_index(str(faiss_index_path))

with open(metadata_path, "r", encoding="utf-8") as f:
    metadata = json.load(f)

embedder = SentenceTransformer("BAAI/bge-base-en-v1.5")

In [11]:
def answer_query(user_query, top_k=3, max_tokens=200):
    query_embedding = embedder.encode([user_query])
    D, I = index.search(query_embedding, k=top_k)

    context_chunks = [metadata[i]["text"] for i in I[0]]
    context = "\n\n".join(context_chunks)

    prompt = f"""### Context from Tata docs:
{context}

### Buyer:
{user_query}

### Dealer (persuasive):"""

    inputs = tokenizer(prompt, return_tensors="pt").to(model.device)
    with torch.no_grad():
        outputs = model.generate(
            **inputs,
            max_new_tokens=max_tokens,
            do_sample=True,
            temperature=0.8,
            top_p=0.95,
            repetition_penalty=1.1
        )
    return tokenizer.decode(outputs[0], skip_special_tokens=True)


In [None]:
if __name__ == "__main__":
    user_input = "Does Tata Safari come with ventilated seats?"

    # Automatically select CUDA if available
    device = torch.device("cuda" if torch.cuda.is_available() else "cpu")
    model = model.to(device)

    response = answer_query(user_input)
    print("\nüó£Ô∏è Dealer:", response)