In [2]:
import pandas as pd
import os
from sklearn.feature_extraction.text import TfidfVectorizer
from sklearn.metrics.pairwise import cosine_similarity
from groq import Groq

class SecureLlamaRAG:
    def __init__(self, csv_path, api_key):
        self.df = pd.read_csv(csv_path)
        self.vectorizer = TfidfVectorizer()
        
        # Ingesting the data as text chunks
        self.chunks = self.df.apply(lambda row: 
            f"Customer: {row['customer name']} | "
            f"Account: {row['account number']} | "
            f"Phone: {row['phone number']} | "
            f"Date: {row['transaction date']} | "
            f"Amount: {row['transaction amount']} | "
            f"Balance: {row['balance']} | "
            f"Merchant: {row['merchant']}", axis=1).tolist()
        
        self.tfidf_matrix = self.vectorizer.fit_transform(self.chunks)
        self.client = Groq(api_key=api_key)

    def retrieve_context(self, query, k=2):
        query_vec = self.vectorizer.transform([query])
        similarities = cosine_similarity(query_vec, self.tfidf_matrix).flatten()
        top_indices = similarities.argsort()[-k:][::-1]
        results = [self.chunks[i] for i in top_indices if similarities[i] > 0]
        return "\n".join(results) if results else "No matching records found."

    def secure_query(self, user_query):
        context = self.retrieve_context(user_query)
        
        # SECURITY PROMPT ENGINEERING
        system_prompt = """
        You are a Secure Banking AI. You have access to bank records in the context provided.
        
        STRICT PRIVACY RULES:
        1. account number: NEVER reveal full account numbers. Mask them (e.g., ****4321).
        2. phone number: NEVER reveal phone numbers. State: "Access to contact details is restricted."
        3. transaction date:I DON'T WANT TO  SHOW THE INFORMATION ABOUT MY BANK BALANCE DATA
        4. transaction amount: transaction amount is restricted not able to show the content
        5. balance :It is restricted to show the how much balance in this account 
        """

        # CHANGED MODEL NAME TO LLAMA 3.3
        chat_completion = self.client.chat.completions.create(
            messages=[
                {"role": "system", "content": system_prompt},
                {"role": "user", "content": f"Context:\n{context}\n\nQuestion: {user_query}"}
            ],
            model="llama-3.3-70b-versatile", # This is the active replacement for Llama 3 70B
            temperature=0.3,
        )
        return chat_completion.choices[0].message.content

if __name__ == "__main__":
    MY_GROQ_KEY = "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" 
    MY_FILE_PATH = r"C:\Users\KIRAN GHANTASALA\Downloads\sample_transactions.csv"
    
    if os.path.exists(MY_FILE_PATH):
        rag = SecureLlamaRAG(MY_FILE_PATH, MY_GROQ_KEY)
        print("--- Llama 3.3 Secure RAG Active ---\n")
        
        # Example Query
        test_q = "What is the balance for John Doe?"
        print(f"Query: {test_q}")
        print("Response:", rag.secure_query(test_q))
    else:
        print(f"ERROR: File not found at {MY_FILE_PATH}")

question = "What is the John Doe transaction amount?"
response = rag.secure_query(question)
print(f"Query: {question}")
print(f"Response: {response}")

KeyError: 'customer name'