In [7]:
import os
import pandas as pd
from langchain_huggingface import HuggingFaceEmbeddings, HuggingFaceEndpoint
from langchain_chroma import Chroma
from langchain_core.prompts import ChatPromptTemplate
from langchain.chains import create_retrieval_chain
from langchain.chains.combine_documents import create_stuff_documents_chain

# ==========================================
# 1. Setup & Configuration
# ==========================================
# ðŸ›‘ PASTE YOUR TOKEN BELOW (Keep the quotes "")
HF_TOKEN = "PASTE_YOUR_HF_TOKEN_HERE" 

os.environ["HUGGINGFACEHUB_API_TOKEN"] = HF_TOKEN

VECTOR_DB_PATH = '../vector_store'

# ==========================================
# 2. Load the Vector Database
# ==========================================
print("Loading Vector Store...")
embedding_model = HuggingFaceEmbeddings(model_name="sentence-transformers/all-MiniLM-L6-v2")

vector_store = Chroma(
    persist_directory=VECTOR_DB_PATH,
    embedding_function=embedding_model
)

# Create the Retriever (The tool that searches for files)
# k=5 means "Find the top 5 most relevant complaints"
retriever = vector_store.as_retriever(search_kwargs={"k": 5})

# Test the Retriever quickly
print("\nTesting retrieval for 'hidden fees'...")
docs = retriever.invoke("hidden fees")
print(f"Found {len(docs)} relevant documents.")
print(f"Sample snippet: {docs[0].page_content[:100]}...")

# ==========================================
# 3. Initialize the LLM (The "Mouth")
# ==========================================
print("\nConnecting to Mistral-7B via Hugging Face...")

# We use Mistral-7B-Instruct. It's fast, smart, and free via API.
llm = HuggingFaceEndpoint(
    repo_id="mistralai/Mistral-7B-Instruct-v0.3",
    task="text-generation",
    max_new_tokens=512,
    do_sample=False,
    temperature=0.1,  # Low temperature = more factual, less creative
)

# ==========================================
# 4. Define the Prompt Template
# ==========================================
# This tells the AI how to behave.
system_prompt = (
    "You are a senior financial analyst at CrediTrust. "
    "Use the following pieces of retrieved context to answer the question. "
    "If you don't know the answer, say that you don't know. "
    "Keep the answer concise and professional.\n\n"
    "{context}"
)

prompt = ChatPromptTemplate.from_messages(
    [
        ("system", system_prompt),
        ("human", "{input}"),
    ]
)

# ==========================================
# 5. Build the RAG Chain
# ==========================================
# This glues everything together: Retriever -> Prompt -> LLM
question_answer_chain = create_stuff_documents_chain(llm, prompt)
rag_chain = create_retrieval_chain(retriever, question_answer_chain)

# ==========================================
# 6. Evaluation Loop
# ==========================================
print("\nRunning Evaluation Questions...")

test_questions = [
    "What are the main complaints regarding Credit Cards?",
    "Why are customers upset about Personal Loans?",
    "Have there been issues with money transfers?",
    "What kind of fraud is being reported?",
    "Are there any billing disputes mentioned?"
]

results = []

for q in test_questions:
    print(f"\nAsking: {q}")
    # Run the chain
    response = rag_chain.invoke({"input": q})
    
    # Store result
    answer = response["answer"]
    
    # Get the sources (evidence) used
    sources = [doc.metadata['product'] for doc in response["context"]]
    
    results.append({
        "Question": q,
        "Answer": answer,
        "Sources": list(set(sources)) # Unique sources
    })
    
    print(f"Answer: {answer.strip()}")
    print("-" * 50)

# ==========================================
# 7. Save Results
# ==========================================
results_df = pd.DataFrame(results)
results_df.to_csv("../data/processed/rag_evaluation_results.csv", index=False)
print("\nEvaluation complete! Results saved.")

ModuleNotFoundError: No module named 'langchain.chains'

In [4]:
import sys
print(sys.executable)
import langchain
print(langchain.__version__)

c:\Users\Maireg\Documents\GitHub\creditrust-complaint-rag\.venv\Scripts\python.exe
1.2.0
