<a href="https://colab.research.google.com/github/Krakalus/RAG-Work/blob/main/CRM_Agentic_RAG.ipynb" target="_parent"><img src="https://colab.research.google.com/assets/colab-badge.svg" alt="Open In Colab"/></a>

In [1]:
import pandas as pd

# Enhanced CRM dataset for Agentic AI Lead Scoring & Prioritization
# Use case: Autonomous CRM Agent that retrieves leads, scores them based on fit (demographics, behavior),
# reasons on priority (e.g., high-value VIP prospects with recent engagement), and suggests actions
# like personalized outreach, lead routing, or Salesforce workflow triggers.
# This dataset supports multi-step agentic flow: Retrieve → Score → Reason → Act (e.g., simulate update lead status).
# Fields inspired by Salesforce lead schemas: identity (name, company), descriptive (title, industry, size),
# quantitative (revenue, score), behavioral (engagement level, last activity), status (Prospect, Lead, VIP).

data = {
    "name": [
        "John Doe", "Jane Smith", "Bob Jones", "Alice Brown", "Charlie Davis", "Emma Wilson", "Michael Lee",
        "Sarah Taylor", "David Clark", "Lisa Adams", "Tom Harris", "Rachel Green", "Mike Ross", "Donna Paulsen",
        "Harvey Specter", "Louis Litt", "Jessica Pearson", "Alex Williams"
    ],
    "company": [
        "TechCorp Inc.", "Innovate Solutions", "Global Enterprises", "FutureTech LLC", "Elite Consulting",
        "Dynamic Marketing", "Strategic Partners", "Visionary Ventures", "Premier Analytics", "Nexus Group",
        "Quantum Dynamics", "Synergy Labs", "Pinnacle Strategies", "Apex Innovations", "Summit Partners",
        "Vertex Solutions", "Horizon Tech", "Catalyst Corp"
    ],
    "title": [
        "CEO", "Marketing Director", "Sales Manager", "CTO", "VP of Sales", "CMO", "Account Executive",
        "Business Development Manager", "Data Analyst", "Product Manager", "Founder", "Head of Operations",
        "Senior Consultant", "Director of Finance", "CIO", "Head of HR", "General Counsel", "Chief Revenue Officer"
    ],
    "industry": [
        "Technology", "Marketing", "Manufacturing", "Technology", "Consulting", "Marketing", "Finance",
        "Technology", "Analytics", "Software", "Tech Startup", "Biotech", "Management Consulting", "Finance",
        "IT Services", "HR Tech", "Legal Tech", "Revenue Management"
    ],
    "company_size": [500, 200, 1000, 150, 300, 250, 400, 180, 220, 350, 50, 120, 280, 90, 600, 140, 320, 450],
    "estimated_revenue": [50000000, 20000000, 100000000, 15000000, 30000000, 25000000, 40000000, 18000000, 22000000, 35000000, 5000000, 12000000, 28000000, 9000000, 60000000, 14000000, 32000000, 45000000],
    "lead_source": [
        "Webinar", "Referral", "Trade Show", "Inbound Email", "Social Media", "Partnership", "Cold Call",
        "Content Download", "Event", "LinkedIn", "Google Ads", "Email Campaign", "Partner Referral",
        "Organic Search", "Conference", "Demo Request", "Blog Read", "Newsletter Signup"
    ],
    "engagement_level": ["High", "Medium", "Low", "High", "High", "Medium", "Low", "High", "Medium", "High", "High", "Medium", "Low", "High", "Medium", "High", "Low", "Medium"],
    "last_activity_date": [
        "10/01/2025", "09/28/2025", "08/15/2025", "09/30/2025", "10/02/2025", "09/25/2025", "07/20/2025",
        "09/29/2025", "10/01/2025", "09/27/2025", "10/03/2025", "09/26/2025", "08/10/2025", "09/24/2025",
        "10/01/2025", "09/22/2025", "08/05/2025", "09/23/2025"
    ],
    "status": [
        "VIP Prospect", "Lead", "Prospect", "VIP Lead", "VIP Prospect", "Lead", "Prospect", "VIP Lead",
        "Lead", "VIP Prospect", "High-Value Lead", "Prospect", "VIP Lead", "Lead", "VIP Prospect",
        "High-Value Lead", "Prospect", "VIP Lead"
    ],
    "lead_score": [95, 75, 40, 90, 98, 80, 35, 85, 70, 92, 88, 65, 45, 82, 96, 78, 50, 89]  # AI-predicted score (0-100)
}

# Create DataFrame and save to CSV
df = pd.DataFrame(data)
df.to_csv("crm_leads_enhanced.csv", index=False)

# Display the DataFrame to verify
print(df)

               name              company                         title  \
0          John Doe        TechCorp Inc.                           CEO   
1        Jane Smith   Innovate Solutions            Marketing Director   
2         Bob Jones   Global Enterprises                 Sales Manager   
3       Alice Brown       FutureTech LLC                           CTO   
4     Charlie Davis     Elite Consulting                   VP of Sales   
5       Emma Wilson    Dynamic Marketing                           CMO   
6       Michael Lee   Strategic Partners             Account Executive   
7      Sarah Taylor   Visionary Ventures  Business Development Manager   
8       David Clark    Premier Analytics                  Data Analyst   
9        Lisa Adams          Nexus Group               Product Manager   
10       Tom Harris     Quantum Dynamics                       Founder   
11     Rachel Green         Synergy Labs            Head of Operations   
12        Mike Ross  Pinnacle Strategi

In [2]:
!pip install -U langchain-community faiss-cpu

Collecting langchain-community
  Downloading langchain_community-0.3.30-py3-none-any.whl.metadata (3.0 kB)
Collecting faiss-cpu
  Downloading faiss_cpu-1.12.0-cp312-cp312-manylinux_2_27_x86_64.manylinux_2_28_x86_64.whl.metadata (5.1 kB)
Collecting requests<3.0.0,>=2.32.5 (from langchain-community)
  Downloading requests-2.32.5-py3-none-any.whl.metadata (4.9 kB)
Collecting dataclasses-json<0.7.0,>=0.6.7 (from langchain-community)
  Downloading dataclasses_json-0.6.7-py3-none-any.whl.metadata (25 kB)
Collecting marshmallow<4.0.0,>=3.18.0 (from dataclasses-json<0.7.0,>=0.6.7->langchain-community)
  Downloading marshmallow-3.26.1-py3-none-any.whl.metadata (7.3 kB)
Collecting typing-inspect<1,>=0.4.0 (from dataclasses-json<0.7.0,>=0.6.7->langchain-community)
  Downloading typing_inspect-0.9.0-py3-none-any.whl.metadata (1.5 kB)
Collecting mypy-extensions>=0.3.0 (from typing-inspect<1,>=0.4.0->dataclasses-json<0.7.0,>=0.6.7->langchain-community)
  Downloading mypy_extensions-1.1.0-py3-none-an

In [4]:
import pandas as pd
import numpy as np
import base64
from openai import OpenAI
from langchain.vectorstores import FAISS
from langchain.docstore.document import Document

In [5]:
# Initialize NVIDIA API client
import os
client = OpenAI(
    api_key=os.getenv("API_KEY"),  # Replace with your NVIDIA API key
    base_url="https://integrate.api.nvidia.com/v1"
)

In [6]:
# Step 1: Load your existing CRM data from the CSV (already created in Colab)
df = pd.read_csv("crm_leads_enhanced.csv")

In [37]:
# Step 2: Generate embeddings for CRM data
texts = df.apply(lambda x: f"{x['name']}, {x['status']}, score:{x['lead_score']}, engagement:{x['engagement_level']}, last_activity:{x['last_activity_date']}", axis=1).tolist()
response = client.embeddings.create(
    input=texts,
    model="nvidia/nv-embedqa-e5-v5",
    encoding_format="base64",
    extra_body={"input_type": "query", "truncate": "END"}
)

In [38]:
# Decode base64 embeddings to float arrays
embeddings = [np.frombuffer(base64.b64decode(emb.embedding)) for emb in response.data]

In [39]:
# Step 3: Store embeddings in FAISS
# Create Document objects with string page_content
documents = [Document(page_content=text) for text in texts]

# Define a dummy Embeddings class to use precomputed embeddings
from langchain.embeddings.base import Embeddings
class DummyEmbeddings(Embeddings):
    def __init__(self, precomputed_embeddings):
        self.precomputed_embeddings = precomputed_embeddings
    def embed_documents(self, texts):
        return self.precomputed_embeddings
    def embed_query(self, text):
        return self.precomputed_embeddings[0]  # Return first embedding for query (simplified)

# Use FAISS with precomputed embeddings and dummy embedding object
embedding_obj = DummyEmbeddings(embeddings)
vector_store = FAISS.from_embeddings(
    text_embeddings=[(doc.page_content, emb) for doc, emb in zip(documents, embeddings)],
    embedding=embedding_obj
)

In [40]:
# Step 4: Gradio UI for Agentic RAG
!pip install gradio -q

import gradio as gr
from langchain.vectorstores import FAISS

def get_agentic_response(query):
    # Reuse RAG logic from Cell 9
    retriever = vector_store.as_retriever()
    retrieved_docs = retriever.invoke(query)
    print(f"Retrieved {len(retrieved_docs)} documents")

    def score_lead(doc):
        content = doc.page_content
        score = int(''.join(filter(str.isdigit, content.split('score:')[1].split(',')[0]))) if 'score:' in content else 0
        is_vip = any(status in content for status in ["VIP Prospect", "VIP Lead", "High-Value Lead"])
        date = content.split('last_activity:')[1].split(',')[0] if 'last_activity:' in content else "01/01/2020"
        return score > 80 and is_vip, score, date

    high_priority_leads = [doc for doc in retrieved_docs if score_lead(doc)[0]]
    leads_with_scores = [(doc, score_lead(doc)[1], score_lead(doc)[2]) for doc in high_priority_leads]
    leads_with_scores.sort(key=lambda x: (x[1], x[2]), reverse=True)  # Score desc, date asc

    if not high_priority_leads:
        return "No high-priority leads identified.", "", ""

    context = "\n".join([f"{doc.page_content} (Score: {score}, Date: {date})" for doc, score, date in leads_with_scores])
    prompt = f"""You are an autonomous CRM agent. Based on this lead data, follow these steps:
    1. Assess priority: Rank leads by score (highest first), then by latest last_activity_date.
    2. Reason: Determine if outreach is needed (score > 80 and VIP/High-Value status justify action).
    3. Act: Suggest 2-3 unique Salesforce actions per lead, tailored to their industry or title.

    Lead Data:
    {context}

    Query: {query}

    Provide output as: 'Priority Ranking: [list]\nReasoning: [summary]\nActions: [action list]'"""

    try:
        response = client.chat.completions.create(
            model="nvidia/llama-3.1-nemotron-nano-8b-v1",
            messages=[{"role": "user", "content": prompt}],
            temperature=0.1,
            max_tokens=500
        )
        output = response.choices[0].message.content
        # Split output into sections
        ranking = output.split('\nReasoning:')[0].replace('Priority Ranking:', '').strip()
        reasoning = output.split('\nReasoning:')[1].split('\nActions:')[0].strip()
        actions = output.split('\nActions:')[1].strip()
        return ranking, reasoning, actions
    except Exception as e:
        return f"API error: {e}", "", ""

# Create enhanced Gradio interface with custom styling
with gr.Blocks(title="CRM Lead Prioritization Agent", theme=gr.themes.Soft()) as interface:
    gr.Markdown("# CRM Lead Prioritization Agent\nPowered by NVIDIA NIM & AWS")
    with gr.Row():
        with gr.Column():
            query_input = gr.Textbox(label="Enter Query", placeholder="e.g., 'Identify and prioritize high-value leads for Salesforce outreach'", lines=3)
            submit_btn = gr.Button("Analyze Leads", variant="primary")
        with gr.Column():
            ranking_output = gr.Textbox(label="Priority Ranking", interactive=False, lines=5)
            reasoning_output = gr.Textbox(label="Reasoning", interactive=False, lines=5)
            actions_output = gr.Textbox(label="Actions", interactive=False, lines=8)

    submit_btn.click(
        fn=get_agentic_response,
        inputs=query_input,
        outputs=[ranking_output, reasoning_output, actions_output]
    )

interface.launch()

It looks like you are running Gradio on a hosted Jupyter notebook, which requires `share=True`. Automatically setting `share=True` (you can turn this off by setting `share=False` in `launch()` explicitly).

Colab notebook detected. To show errors in colab notebook, set debug=True in launch()
* Running on public URL: https://96be294c9a191fb4b2.gradio.live

This share link expires in 1 week. For free permanent hosting and GPU upgrades, run `gradio deploy` from the terminal in the working directory to deploy to Hugging Face Spaces (https://huggingface.co/spaces)


