In [1]:
import faiss
import pandas as pd
import numpy as np
from sentence_transformers import SentenceTransformer
from transformers import pipeline

In [2]:
index = faiss.read_index("../vector_store/sample_faiss.index")
print("Total vectors in index:", index.ntotal)
metadata_df = pd.read_parquet("../vector_store/sample_metadata.parquet")
metadata_df.head()

Total vectors in index: 39577


Unnamed: 0,text,product_category,complaint_id,chunk_index,total_chunks
0,my soon to be ex wife took out several credit ...,Credit Cards,7471816,0,1
1,after informing bank of america of my identity...,Credit Cards,3952712,0,1
2,i am deeply troubled by the inclusion of this ...,Credit Cards,9669839,0,1
3,on xx xx xxxx i reached out to discover bank t...,Credit Cards,7420081,0,23
4,positive balance since i had no balance due on...,Credit Cards,7420081,1,23


In [3]:
embedding_model = SentenceTransformer("all-MiniLM-L6-v2")

In [15]:
def retrieve_chunks(query, k=5):
    query_embedding = embedding_model.encode([query]).astype("float32")
    distances, indices = index.search(query_embedding, k)
    retrieved_rows = metadata_df.iloc[indices[0]] 
    return retrieved_rows 

In [16]:
query = "Why are customers unhappy with credit cards?"
retrieved_chunks = retrieve_chunks(query)
retrieved_chunks[["text", "product_category"]].head()

Unnamed: 0,text,product_category
13914,cards who have all been wonderful by the way w...,Credit Cards
3257,my credit again on this bank be aware of store...,Credit Cards
3255,scheme as a good credit card company as i ve w...,Credit Cards
14971,about charges at the end of the year so they c...,Credit Cards
9798,card as they are unwilling to protect their cu...,Credit Cards


In [17]:
from transformers import pipeline
llm = pipeline(
    "text-generation",
    model="sshleifer/tiny-gpt2",
    max_new_tokens=100
)

Device set to use cpu


In [18]:
from transformers import pipeline
llm = pipeline(
    "text-generation",
    model="sshleifer/tiny-gpt2",
    max_new_tokens=120,
    do_sample=True,
    temperature=0.7
)

Device set to use cpu


In [21]:
def generate_answer(query, retrieved_docs):
    if hasattr(retrieved_docs, "columns"):
        texts = retrieved_docs["text"].tolist()
    else:
        texts = retrieved_docs 
    context = "\n\n".join(texts[:3])
    prompt = f"""
You are a financial analyst assistant for CrediTrust.
Context:
{context}
Question:
{query}
Answer:
"""
    result = llm(prompt)[0]["generated_text"]
    return result

In [22]:
query = "Why are customers unhappy with credit card services?"
docs = retrieve_chunks(query, k=5) 
response = generate_answer(query, docs)
print(response)

Setting `pad_token_id` to `eos_token_id`:50256 for open-end generation.



You are a financial analyst assistant for CrediTrust.
Context:
the same thing i am not sure if this is a dead stop to the service i was asking for or if its a form of prejudice being refused a service i have other credit cards with high limits xxxx charge cards platinum and gold and have never missed a payment having a credit line of only is more harmful then helpful if i should spend then it would be reported that i have credit available to me which will probably give an adverse effect to my credit rating

cards who have all been wonderful by the way who were so quick to get me into lower interest rates payment plan so that it doesn t affect me and my credit instead i get to waste hours of my time on the phone with people who can t help me and i m getting harassed all hours of the day with their phone calls i m like didn t i in good faith do my part originally didn t they not hold up their end of this like why offer a chat and cs when you are going to just lie or not take care of you