In [5]:
from huggingface_hub import login
login()

VBox(children=(HTML(value='<center> <img\nsrc=https://huggingface.co/front/assets/huggingface_logo-noborder.sv…

In [None]:
# Imports

import faiss
import pickle
import numpy as np
from sentence_transformers import SentenceTransformer
from huggingface_hub import InferenceClient


#  Config paths

FAISS_INDEX_PATH = "../vector_store/faiss_recursive_sample.index"
METADATA_PATH = "../vector_store/recursive_sample_metadata.pkl"


#  Load index and metadata

print("🔄 Loading FAISS index...")
index = faiss.read_index(FAISS_INDEX_PATH)

print("🔄 Loading metadata...")
with open(METADATA_PATH, "rb") as f:
    metadata = pickle.load(f)

print(f"✅ Loaded index with {index.ntotal} vectors.")
print(f"✅ Loaded metadata with {len(metadata)} records.")


#  Load embedding model

embedder = SentenceTransformer("sentence-transformers/all-MiniLM-L6-v2")

#  Set up Hugging Face Inference API for Mistral

from huggingface_hub import InferenceClient
import os

client = InferenceClient(
    model="mistralai/Mistral-7B-Instruct-v0.2",
    provider='auto'  # Mistral model on Hugging Face
)


#  Define RAG answer function

def generate_answer(query, top_k=5):
    # Embed the query
    query_embedding = embedder.encode([query])
    
    # Perform similarity search
    D, I = index.search(np.array(query_embedding).astype("float32"), top_k)
    retrieved_chunks = [metadata[i] for i in I[0]]
    
    context_text = "\n\n".join(
        [chunk.get("chunk", "N/A") for chunk in retrieved_chunks]
    )

    # Prompt template
    prompt = f"""
You are a financial analyst assistant for CrediTrust. Your task is to answer questions about customer complaints.
Use the following retrieved complaint excerpts to formulate your answer.
If the context does not contain the answer, say you don't have enough information.

Context:
{context_text}

Question:
{query}

Answer:
"""

    # Run Mistral on Hugging Face Inference API
    response = client.text_generation(
        prompt,
    
        max_new_tokens=256,
        temperature=0.2,
    )

    return response, retrieved_chunks


# Test

question = "Why are people unhappy with their credit cards?"
answer, retrieved_sources = generate_answer(question)

print("\n✅ AI Answer:")
print(answer)

print("\n🔍 Retrieved Chunks (first 2 shown):")
for i, chunk in enumerate(retrieved_sources[:2]):
    print(f"#{i+1}: {chunk['chunk'][:200]}...")  # Show first 200 chars
