<a href="https://colab.research.google.com/github/Krakalus/RAG-Work/blob/main/CRM_RAG.ipynb" target="_parent"><img src="https://colab.research.google.com/assets/colab-badge.svg" alt="Open In Colab"/></a>

In [None]:
import pandas as pd

# Mock CRM data with 10 records
data = {
    "name": [
        "John Doe",
        "Jane Smith",
        "Bob Jones",
        "Alice Brown",
        "Charlie Davis",
        "Emma Wilson",
        "Michael Lee",
        "Sarah Taylor",
        "David Clark",
        "Lisa Adams"
    ],
    "status": [
        "VIP",
        "Lead",
        "Inactive",
        "Prospect",
        "VIP",
        "Lead",
        "Inactive",
        "Prospect",
        "VIP",
        "Lead"
    ],
    "last_interaction": [
        "10/01/2025",
        "09/28/2025",
        "08/15/2025",
        "09/30/2025",
        "10/02/2025",
        "09/25/2025",
        "07/20/2025",
        "09/29/2025",
        "10/01/2025",
        "09/27/2025"
    ]
}

# Create DataFrame and save to CSV
df = pd.DataFrame(data)
df.to_csv("crm_customers.csv", index=False)

# Display the DataFrame to verify
print(df)

            name    status last_interaction
0       John Doe       VIP       10/01/2025
1     Jane Smith      Lead       09/28/2025
2      Bob Jones  Inactive       08/15/2025
3    Alice Brown  Prospect       09/30/2025
4  Charlie Davis       VIP       10/02/2025
5    Emma Wilson      Lead       09/25/2025
6    Michael Lee  Inactive       07/20/2025
7   Sarah Taylor  Prospect       09/29/2025
8    David Clark       VIP       10/01/2025
9     Lisa Adams      Lead       09/27/2025


In [None]:
# Install required dependencies
!pip install -U langchain-community faiss-cpu



In [None]:
import pandas as pd
import numpy as np
import base64
from openai import OpenAI
from langchain.vectorstores import FAISS
from langchain.docstore.document import Document

In [None]:
# Initialize NVIDIA API client
import os
client = OpenAI(
    api_key=os.getenv("API_KEY"),  # Replace with your NVIDIA API key
    base_url="https://integrate.api.nvidia.com/v1"
)

In [None]:
# Step 1: Load your existing CRM data from the CSV (already created in Colab)
df = pd.read_csv("crm_customers.csv")

In [None]:
# Step 2: Generate embeddings for CRM data
texts = df.apply(lambda x: f"{x['name']}, {x['status']}, {x['last_interaction']}", axis=1).tolist()
response = client.embeddings.create(
    input=texts,
    model="nvidia/nv-embedqa-e5-v5",
    encoding_format="base64",
    extra_body={"input_type": "query", "truncate": "END"}
)

In [None]:
# Decode base64 embeddings to float arrays
embeddings = [np.frombuffer(base64.b64decode(emb.embedding)) for emb in response.data]

In [None]:
# Step 3: Store embeddings in FAISS
# Create Document objects with string page_content
documents = [Document(page_content=text) for text in texts]

# Define a dummy Embeddings class to use precomputed embeddings
from langchain.embeddings.base import Embeddings
class DummyEmbeddings(Embeddings):
    def __init__(self, precomputed_embeddings):
        self.precomputed_embeddings = precomputed_embeddings
    def embed_documents(self, texts):
        return self.precomputed_embeddings
    def embed_query(self, text):
        return self.precomputed_embeddings[0]  # Return first embedding for query (simplified)

# Use FAISS with precomputed embeddings and dummy embedding object
embedding_obj = DummyEmbeddings(embeddings)
vector_store = FAISS.from_embeddings(
    text_embeddings=[(doc.page_content, emb) for doc, emb in zip(documents, embeddings)],
    embedding=embedding_obj
)

In [None]:
# Step 4: Test retrieval with a refined CRM query
query = "customers with VIP status"
query_response = client.embeddings.create(
    input=[query],
    model="nvidia/nv-embedqa-e5-v5",
    encoding_format="base64",
    extra_body={"input_type": "query", "truncate": "END"}
)
query_embedding = np.frombuffer(base64.b64decode(query_response.data[0].embedding))
results = vector_store.similarity_search_by_vector(query_embedding, k=3)
print("Query: customers with VIP status")
for doc in results:
    print(f"Result: {doc.page_content}")
    print(f"Result: {doc.page_content}")

Query: customers with VIP status
Result: Jane Smith, Lead, 09/28/2025
Result: Jane Smith, Lead, 09/28/2025
Result: Emma Wilson, Lead, 09/25/2025
Result: Emma Wilson, Lead, 09/25/2025
Result: Bob Jones, Inactive, 08/15/2025
Result: Bob Jones, Inactive, 08/15/2025


In [23]:
# Manual RAG: Retrieve, format prompt, call LLM directly
query = "Suggest actions for VIP customers in Salesforce"

# Step 1: Retrieve relevant docs using .invoke
retriever = vector_store.as_retriever()
retrieved_docs = retriever.invoke(query)

# Step 2: Filter for VIPs only
vip_docs = [doc for doc in retrieved_docs if "VIP" in doc.page_content]

# Step 3: Format context from VIP docs
context = "\n".join([doc.page_content for doc in vip_docs])
prompt = f"""Based on this CRM data, suggest concise, unique Salesforce actions for each VIP customer:

{context}

Question: {query}

Provide only the actions, no additional explanations or feature details."""

# Step 4: Call LLM directly
response = client.chat.completions.create(
    model="nvidia/llama-3.1-nemotron-nano-8b-v1",
    messages=[{"role": "user", "content": prompt}],
    temperature=0.1,
    max_tokens=300
)

result = response.choices[0].message.content
print("RAG Response:")
print(result)

RAG Response:
- Schedule a personalized onboarding session with a Salesforce representative to ensure optimal setup.
- Request a dedicated account manager for enhanced support and service.
- Arrange for a customized CRM implementation review to optimize their current setup.
- Propose a VIP program launch to include exclusive features and priority support.
- Offer a complimentary CRM training session tailored to their specific needs.
