In [6]:
import pandas as pd
import os
from sklearn.feature_extraction.text import TfidfVectorizer
from sklearn.metrics.pairwise import cosine_similarity
from groq import Groq

class SecureLlamaRAG:
    def __init__(self, csv_path, api_key):
        self.df = pd.read_csv(csv_path)
        self.vectorizer = TfidfVectorizer()
        
        # Ingesting the data as text chunks
        self.chunks = self.df.apply(lambda row: 
            f"Customer: {row['Customer Name']} | "
            f"Account: {row['Account Number']} | "
            f"Phone: {row['Phone Number']} | "
            f"Date: {row['Transaction Date']} | "
            f"Amount: {row['Amount']} | "
            , axis=1).tolist()
        
        self.tfidf_matrix = self.vectorizer.fit_transform(self.chunks)
        self.client = Groq(api_key=api_key)

    def retrieve_context(self, query, k=2):
        query_vec = self.vectorizer.transform([query])
        similarities = cosine_similarity(query_vec, self.tfidf_matrix).flatten()
        top_indices = similarities.argsort()[-k:][::-1]
        results = [self.chunks[i] for i in top_indices if similarities[i] > 0]
        return "\n".join(results) if results else "No matching records found."

    def secure_query(self, user_query):
        context = self.retrieve_context(user_query)
        
        # SECURITY PROMPT ENGINEERING
        system_prompt = """
        You are a Secure Banking AI. You have access to bank records in the context provided.
        
        STRICT PRIVACY RULES:
        1. ACCOUNT NUMBERS: NEVER reveal full account numbers. Mask them (e.g., ****4321).
        2. PHONE NUMBERS: NEVER reveal phone numbers. State: "Access to contact details is restricted."
        3. BALANCES: NEVER reveal exact balances. Provide a range (e.g., "Between 180,000 and 190,000").
        4. ANALYSIS: You are allowed to discuss merchants, dates, and spending amounts.
        5. REFUSAL: If a user asks for restricted data, politely refuse and provide a safe alternative like a spending summary.
        """

        # CHANGED MODEL NAME TO LLAMA 3.3
        chat_completion = self.client.chat.completions.create(
            messages=[
                {"role": "system", "content": system_prompt},
                {"role": "user", "content": f"Context:\n{context}\n\nQuestion: {user_query}"}
            ],
            model="llama-3.3-70b-versatile", # This is the active replacement for Llama 3 70B
            temperature=0.1,
        )
        return chat_completion.choices[0].message.content

# ==========================================
# EXECUTION
# ==========================================
if __name__ == "__main__":
    MY_GROQ_KEY = "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" 
    MY_FILE_PATH = r"C:\Users\KIRAN GHANTASALA\Downloads\sample_transactions.csv"
    
    if os.path.exists(MY_FILE_PATH):
        rag = SecureLlamaRAG(MY_FILE_PATH, MY_GROQ_KEY)
        print("--- Llama 3.3 Secure RAG Active ---\n")
        
        # Example Query
        test_q = "What is the Account Number for Priya Sharma?"
        print(f"Query: {test_q}")
        print("Response:", rag.secure_query(test_q))
    else:
        print(f"ERROR: File not found at {MY_FILE_PATH}")

--- Llama 3.3 Secure RAG Active ---

Query: What is the Account Number for Priya Sharma?
Response: I'm happy to help, but I must follow strict privacy rules. I can confirm that Priya Sharma's account number is masked as ****321098 for security purposes. I'm not authorized to reveal the full account number. Is there anything else I can help you with, such as a spending summary or transaction history?
