In [46]:
# Install libraries
!pip install fastembed==0.3.4 llama-index-core==0.11.13 \
             llama-index-llms-gemini==0.3.5 \
             llama-index-utils-workflow==0.2.1 \
             llama-index llama-index-experimental==0.4.0 \
             llama-index-vector-stores-chroma \
             llama-index-embeddings-huggingface==0.3.1

Collecting llama-index-embeddings-huggingface
  Downloading llama_index_embeddings_huggingface-0.3.1-py3-none-any.whl.metadata (718 bytes)
Collecting minijinja>=1.0 (from huggingface-hub[inference]>=0.19.0->llama-index-embeddings-huggingface)
  Downloading minijinja-2.2.0-cp38-abi3-manylinux_2_17_x86_64.manylinux2014_x86_64.whl.metadata (8.8 kB)
Downloading llama_index_embeddings_huggingface-0.3.1-py3-none-any.whl (8.6 kB)
Downloading minijinja-2.2.0-cp38-abi3-manylinux_2_17_x86_64.manylinux2014_x86_64.whl (861 kB)
[2K   [90m━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━[0m [32m861.9/861.9 kB[0m [31m40.5 MB/s[0m eta [36m0:00:00[0m
[?25hInstalling collected packages: minijinja, llama-index-embeddings-huggingface
Successfully installed llama-index-embeddings-huggingface-0.3.1 minijinja-2.2.0


In [52]:
# Load libraries
import random
import tiktoken
import chromadb
import pandas as pd

from llama_index.llms.gemini import Gemini
from llama_index.core.agent import ReActAgent
from llama_index.core.tools import FunctionTool
from llama_index.core.prompts import PromptTemplate
from llama_index.core.memory import ChatMemoryBuffer
from llama_index.core import Settings as LLMSettings
from llama_index.core.storage.chat_store import SimpleChatStore
from google.generativeai.types import HarmCategory, HarmBlockThreshold
from llama_index.core.callbacks import CallbackManager, TokenCountingHandler

from llama_index.core.schema import TextNode
from llama_index.core import VectorStoreIndex, StorageContext
from llama_index.vector_stores.chroma import ChromaVectorStore
from llama_index.core.retrievers import VectorIndexAutoRetriever
from llama_index.experimental.query_engine import PandasQueryEngine
from llama_index.embeddings.huggingface import HuggingFaceEmbedding
from llama_index.core.vector_stores.types import MetadataInfo, VectorStoreInfo

### Data Mockup

In [85]:
# Create Mockup Conversation on QA Sales & Customer
nodes = [

    # Tech Industry - Software Campaign
    TextNode(
        text=(
            "Sales: I noticed your company is scaling rapidly. Have you considered automating your HR processes?\n"
            "Customer: Yes, we're struggling with manual onboarding for our new hires.\n"
            "Sales: Our HR automation suite could reduce onboarding time by 60%.\n"
            "Customer: That's interesting. What's the implementation timeline?\n"
            "Sales: Typically 2-3 weeks, with minimal disruption to your operations."
        ),
        metadata={
            "industry": "Technology",
            "campaign": "HR Automation Suite",
            "conversation_id": "TECH001",
            "customer_size": "Enterprise"
        }
    ),

    # Healthcare - Digital Transformation
    TextNode(
        text=(
            "Sales: How are you currently managing patient records?\n"
            "Customer: We're using a mix of digital and paper records, it's quite inefficient.\n"
            "Sales: Our healthcare digitization platform could streamline this.\n"
            "Customer: What about compliance and security?\n"
            "Sales: We're fully HIPAA compliant with end-to-end encryption."
        ),
        metadata={
            "industry": "Healthcare",
            "campaign": "Digital Transformation",
            "conversation_id": "HEALTH001",
            "customer_size": "Mid-size"
        }
    ),

    # Financial Services - Investment Platform
    TextNode(
        text=(
            "Sales: What's your biggest challenge in portfolio management?\n"
            "Customer: Real-time analytics and reporting for our clients.\n"
            "Sales: Our platform offers instant portfolio insights and automated reporting.\n"
            "Customer: Do you support custom branding?\n"
            "Sales: Yes, full white-labeling is available in our enterprise plan."
        ),
        metadata={
            "industry": "Financial Services",
            "campaign": "Investment Analytics",
            "conversation_id": "FIN001",
            "customer_size": "Enterprise"
        }
    ),

    # Retail - E-commerce Solution
    TextNode(
        text=(
            "Sales: How's your online sales performance been this quarter?\n"
            "Customer: We're seeing increased cart abandonment rates.\n"
            "Sales: Our smart checkout solution has reduced abandonment by 35% for similar retailers.\n"
            "Customer: What about integration with our existing system?\n"
            "Sales: We offer seamless integration with all major e-commerce platforms."
        ),
        metadata={
            "industry": "Retail",
            "campaign": "Smart Checkout",
            "conversation_id": "RETAIL001",
            "customer_size": "Small"
        }
    ),

    # Manufacturing - IoT Solutions
    TextNode(
        text=(
            "Sales: What's your current approach to equipment monitoring?\n"
            "Customer: Manual checks every shift, but we sometimes miss issues.\n"
            "Sales: Our IoT sensors could provide real-time monitoring and predictive maintenance.\n"
            "Customer: What's the ROI timeline?\n"
            "Sales: Most clients see positive ROI within 6-8 months through reduced downtime."
        ),
        metadata={
            "industry": "Manufacturing",
            "campaign": "IoT Monitoring",
            "conversation_id": "MFG001",
            "customer_size": "Large"
        }
    ),

    # Technology - Cloud Migration
    TextNode(
        text=(
            "Sales: What's holding you back from moving to the cloud?\n"
            "Customer: Concerns about data security and migration downtime.\n"
            "Sales: We offer zero-downtime migration with military-grade encryption.\n"
            "Customer: How long would the migration take?\n"
            "Sales: For your size, approximately 4-6 weeks with staged migration."
        ),
        metadata={
            "industry": "Technology",
            "campaign": "Cloud Migration",
            "conversation_id": "TECH002",
            "customer_size": "Mid-size"
        }
    ),

    # Healthcare - Telemedicine Platform
    TextNode(
        text=(
            "Sales: How are you handling remote patient consultations?\n"
            "Customer: We're using basic video calls, but it's not integrated with our EMR.\n"
            "Sales: Our telemedicine platform offers full EMR integration and patient scheduling.\n"
            "Customer: What about patient data privacy?\n"
            "Sales: We maintain strict HIPAA compliance with audit trails."
        ),
        metadata={
            "industry": "Healthcare",
            "campaign": "Telemedicine",
            "conversation_id": "HEALTH002",
            "customer_size": "Large"
        }
    ),

    # Financial Services - Fraud Detection
    TextNode(
        text=(
            "Sales: What's your current fraud detection success rate?\n"
            "Customer: Around 85%, but we're seeing new types of fraud emerge.\n"
            "Sales: Our AI-powered system achieves 97% accuracy with real-time adaptation.\n"
            "Customer: Can it integrate with our existing security stack?\n"
            "Sales: Yes, we support all major security protocols and platforms."
        ),
        metadata={
            "industry": "Financial Services",
            "campaign": "Fraud Prevention",
            "conversation_id": "FIN002",
            "customer_size": "Enterprise"
        }
    ),

    # Retail - Inventory Management
    TextNode(
        text=(
            "Sales: How do you currently handle inventory forecasting?\n"
            "Customer: Mostly manual with spreadsheets, it's not very accurate.\n"
            "Sales: Our AI forecasting has reduced overstock by 40% for similar retailers.\n"
            "Customer: Does it consider seasonal variations?\n"
            "Sales: Yes, it analyzes multiple years of data for seasonal patterns."
        ),
        metadata={
            "industry": "Retail",
            "campaign": "Smart Inventory",
            "conversation_id": "RETAIL002",
            "customer_size": "Mid-size"
        }
    ),

    # Manufacturing - Quality Control
    TextNode(
        text=(
            "Sales: What's your current defect detection rate?\n"
            "Customer: About 92%, but we're aiming for higher accuracy.\n"
            "Sales: Our computer vision system achieves 99.9% accuracy in defect detection.\n"
            "Customer: What about integration time?\n"
            "Sales: Two weeks for basic setup, one month for full optimization."
        ),
        metadata={
            "industry": "Manufacturing",
            "campaign": "Quality Control",
            "conversation_id": "MFG002",
            "customer_size": "Enterprise"
        }
    )
]

vector_store_info = VectorStoreInfo(
    content_info="Sales conversation transcripts between sales representatives and potential customers across different industries and campaigns",
    metadata_info=[
        MetadataInfo(
            name="industry",
            type="str",
            description=(
                "Industry sector of the customer company"
            ),
        ),
        MetadataInfo(
            name="campaign",
            type="str",
            description=(
                "Marketing campaign type"
            ),
        ),
        MetadataInfo(
            name="conversation_id",
            type="str",
            description=(
                "Unique identifier for the conversation"
            ),
        ),
        MetadataInfo(
            name="customer_size",
            type="str",
            description=(
                "Size category of the customer company"
            ),
        ),
    ],
)

In [87]:
# Create Mockup Vector Database
collection_name = "sales_appointment"
chroma_client = chromadb.EphemeralClient()
chroma_collection = chroma_client.create_collection(collection_name, get_or_create=True)

vector_store = ChromaVectorStore(chroma_collection=chroma_collection)
storage_context = StorageContext.from_defaults(vector_store=vector_store)
index = VectorStoreIndex(nodes, storage_context=storage_context)

In [86]:
# Create Mockup Dataframe
df_appointment = pd.DataFrame([node.metadata.copy() for node in nodes])
df_appointment["is_appointment_success"] = random.choices([True, False], k=df_appointment.shape[0])
df_appointment

Unnamed: 0,industry,campaign,conversation_id,customer_size,is_appointment_success
0,Technology,HR Automation Suite,TECH001,Enterprise,True
1,Healthcare,Digital Transformation,HEALTH001,Mid-size,True
2,Financial Services,Investment Analytics,FIN001,Enterprise,False
3,Retail,Smart Checkout,RETAIL001,Small,True
4,Manufacturing,IoT Monitoring,MFG001,Large,True
5,Technology,Cloud Migration,TECH002,Mid-size,False
6,Healthcare,Telemedicine,HEALTH002,Large,True
7,Financial Services,Fraud Prevention,FIN002,Enterprise,True
8,Retail,Smart Inventory,RETAIL002,Mid-size,False
9,Manufacturing,Quality Control,MFG002,Enterprise,False


### Agent Guide

In [None]:
class Settings:
    GEMINI_MODEL_NAME: str = "models/gemini-1.5-pro-latest"
    GEMINI_API_KEY: str = ""
    TEMPERATURE: float = 0.5
    TOKEN_CONVERSATION_LIMIT: int = 10_000
    EMBEDDING_NAME = "BAAI/bge-small-en-v1.5"

In [102]:
class SalesAnalyticsAgent:
    """
    A class to handle sales analytics through natural language processing.

    This class integrates various components for analyzing sales data and conversations:
    - Query engine for historical sales appointment data
    - Vector retriever for sales conversation analysis
    - Gemini LLM integration with custom settings
    - ReAct agent for handling complex queries
    - Memory management for maintaining conversation context

    The agent can:
    - Process historical sales appointment data
    - Analyze sales conversations
    - Maintain conversation context
    - Execute complex queries using tool-based reasoning
    """

    def __init__(
        self,
        df_appointment: pd.DataFrame,
        index: any,
        vector_store_info: VectorStoreInfo,
        session_id: str = "123"
    ):
        self.df_appointment = df_appointment
        self.index = index
        self.vector_store_info = vector_store_info
        self.session_id = session_id

        self.token_counter = TokenCountingHandler(
            tokenizer=tiktoken.encoding_for_model("gpt-3.5-turbo").encode
        )

        self.query_engine = PandasQueryEngine(df=df_appointment, verbose=True)
        self.retriever = VectorIndexAutoRetriever(index, vector_store_info=vector_store_info)

        self._setup_llm_settings()
        self._setup_tools()
        self._setup_memory()
        self._setup_agent()

    def _setup_llm_settings(self):
        self.embed_model = HuggingFaceEmbedding(
            model_name=Settings.EMBEDDING_NAME
        )

        self.llm = Gemini(
            model_name=Settings.GEMINI_MODEL_NAME,
            api_key=Settings.GEMINI_API_KEY,
            temperature=Settings.TEMPERATURE,
            safety_settings={
                HarmCategory.HARM_CATEGORY_HARASSMENT: HarmBlockThreshold.BLOCK_NONE,
                HarmCategory.HARM_CATEGORY_SEXUALLY_EXPLICIT: HarmBlockThreshold.BLOCK_NONE,
                HarmCategory.HARM_CATEGORY_DANGEROUS_CONTENT: HarmBlockThreshold.BLOCK_NONE,
                HarmCategory.HARM_CATEGORY_HATE_SPEECH: HarmBlockThreshold.BLOCK_NONE,
            }
        )

        self.callback_manager = CallbackManager([self.token_counter])

    def historical_sales_appointment(self, query: str) -> str:
        """
        Retrieve Historical Data between Sales & Customer on Appointment Meeting,
        contains information such as industry name, campaign, conversation id,
        customer size, and appointment success
        """
        context = self.query_engine.query(query)
        return context

    def conversation_sales_interaction(self, query: str) -> str:
        """
        Retrieve Conversation Data between Sales & Customer on Appointment Meeting
        """
        resp_ctx = []
        response = self.retriever.retrieve(query)
        for resp in response:
            response_formatter = "Content:\n{content}\n\nMetadata:\n{metadata}".format(
                content=resp.text,
                metadata=resp.metadata
            )
            resp_ctx.append(response_formatter)
        context = "\n\n".join(resp_ctx)
        return context

    def _setup_tools(self):
        self.tools = [
            FunctionTool.from_defaults(fn=self.historical_sales_appointment),
            FunctionTool.from_defaults(fn=self.conversation_sales_interaction)
        ]

    def _setup_memory(self):
        self.chat_memory = ChatMemoryBuffer.from_defaults(
            token_limit=Settings.TOKEN_CONVERSATION_LIMIT,
            chat_store_key=self.session_id,
            chat_store=SimpleChatStore(),
        )

    def _setup_agent(self):
        self.agent = ReActAgent.from_tools(
            llm=self.llm,
            tools=self.tools,
            memory=self.chat_memory,
            verbose=True
        )

    def query(self, user_query: str) -> str:
        """
        Process a user query using the ReAct agent

        Args:
            user_query (str): The query to process

        Returns:
            str: The agent's response
        """
        return self.agent.chat(user_query).response

In [103]:
# Instantiate agent
sales_agent = SalesAnalyticsAgent(
    df_appointment, index, vector_store_info
)

In [104]:
# Chat Q&A Test
user_prompt = "Halo, nama saya alif."
response = sales_agent.query(user_prompt)
print(response)

> Running step dacb6ca4-2a88-40a3-927b-972f58d0e2fd. Step input: Halo, nama saya alif.
[1;3;38;5;200mThought: The current language of the user is: Indonesian.  The user introduced themselves. I don't have a question yet. I will wait for the user to ask a question.
Answer: Halo Alif, senang bertemu denganmu. Ada yang bisa saya bantu?
[0m

'Halo Alif, senang bertemu denganmu. Ada yang bisa saya bantu?'

In [105]:
# Chat Q&A Test
user_prompt = "Siapa nama saya"
response = sales_agent.query(user_prompt)
print(response)

> Running step d23da3ee-34bf-4bb5-988f-ae7acd79fffa. Step input: Siapa nama saya
[1;3;38;5;200mThought: The current language of the user is: Indonesian.  The user asked for their name, which they provided in the first message.
Answer: Nama Anda adalah Alif.
[0m

'Nama Anda adalah Alif.'

In [106]:
user_prompt = "Berapa total appointment yang sukses di industri retail"
response = sales_agent.query(user_prompt)
print(response)

> Running step 395afd5f-759d-4c4b-991e-476e1e3f08f3. Step input: Berapa total appointment yang sukses di industri retail
[1;3;38;5;200mThought: The current language of the user is: Indonesian. I need to use a tool to find the total successful appointments in the retail industry.
Action: historical_sales_appointment
Action Input: {'query': 'Berapa total janji temu penjualan yang berhasil di industri ritel?'}
[0m> Pandas Instructions:
```
df[df['industry'] == 'Retail']['is_appointment_success'].sum()

```
> Pandas Output: 1
[1;3;34mObservation: 1
[0m> Running step eea8a08d-9a65-4882-9938-c83e2347d255. Step input: None
[1;3;38;5;200mThought: I now know the total number of successful appointments.
Answer: Total janji temu yang berhasil di industri ritel adalah 1.
[0m

'Total janji temu yang berhasil di industri ritel adalah 1.'

In [107]:
user_prompt = "Bisa berikan contoh conversation nya?"
response = sales_agent.query(user_prompt)
print(response)

> Running step aef33b9a-8463-4c95-8236-780ae0fae856. Step input: Bisa berikan contoh conversation nya?
[1;3;38;5;200mThought: The current language of the user is: Indonesian. I need to use a tool to retrieve the conversation examples related to successful appointments in the retail industry.
Action: conversation_sales_interaction
Action Input: {'query': 'Contoh percakapan dari janji temu yang sukses di industri ritel'}
[0m[1;3;34mObservation: 
[0m> Running step b25a8e4c-af8e-458d-b5a4-b65a476ae14d. Step input: None
[1;3;38;5;200mThought: I need more information to provide a relevant conversation example.  The previous query was too broad. I should specify a successful appointment.
Action: conversation_sales_interaction
Action Input: {'query': 'Contoh percakapan dari janji temu yang sukses di industri ritel, khususnya yang membahas tentang implementasi sistem POS'}
[0m[1;3;34mObservation: 
[0m> Running step 0a531704-dfe7-4df7-84a4-cdcee2f7f628. Step input: None
[1;3;38;5;200mTh



[1;3;38;5;200mThought: Now I have a specific conversation ID (RETAIL001). I can use this to retrieve the conversation details.
Action: conversation_sales_interaction
Action Input: {'query': 'Percakapan untuk ID janji temu RETAIL001'}
[0m[1;3;34mObservation: Content:
Sales: How's your online sales performance been this quarter?
Customer: We're seeing increased cart abandonment rates.
Sales: Our smart checkout solution has reduced abandonment by 35% for similar retailers.
Customer: What about integration with our existing system?
Sales: We offer seamless integration with all major e-commerce platforms.

Metadata:
{'industry': 'Retail', 'campaign': 'Smart Checkout', 'conversation_id': 'RETAIL001', 'customer_size': 'Small'}
[0m> Running step a58c67d0-dd54-4c7f-9cb5-f1a957d54432. Step input: None
[1;3;38;5;200mThought: I can answer without using any more tools. I'll use the user's language to answer
Answer: Berikut contoh percakapan dari janji temu yang sukses di industri ritel dengan 

'Berikut contoh percakapan dari janji temu yang sukses di industri ritel dengan ID RETAIL001:\\\n\\\nSales: "Bagaimana kinerja penjualan online Anda kuartal ini?"\\\nCustomer: "Kami melihat peningkatan tingkat pengabaian keranjang." \\\nSales: "Solusi checkout pintar kami telah mengurangi pengabaian sebesar 35% untuk retailer serupa."\\\nCustomer: "Bagaimana dengan integrasi dengan sistem kami yang ada?"\\\nSales: "Kami menawarkan integrasi yang mulus dengan semua platform e-commerce utama."'

In [108]:
user_prompt = "Buatkan 1 topik krusial appointment meeting dgn salah satu brand industri retail ternama di thailand"
sales_agent.query(user_prompt)

> Running step 994aa729-f0da-495c-b4f1-69adadc3b55a. Step input: Buatkan 1 topik krusial appointment meeting dgn salah satu brand industri retail ternama di thailand
[1;3;38;5;200mThought: The current language of the user is: Indonesian. I need to use a tool to help me answer the question.
Action: historical_sales_appointment
Action Input: {'query': 'topik krusial appointment meeting dengan salah satu brand industri retail ternama di Thailand'}
[0m> Pandas Instructions:
```
df[df['industry'] == 'Retail']['campaign'].iloc[0]

```
> Pandas Output: Smart Checkout
[1;3;34mObservation: Smart Checkout
[0m> Running step 5a3665a9-d603-43a0-8786-99ef38b93b66. Step input: None
[1;3;38;5;200mThought: I can answer without using any more tools. I'll use the user's language to answer
Answer: Topik krusial untuk appointment meeting dengan salah satu brand industri retail ternama di Thailand adalah implementasi strategi Smart Checkout untuk meningkatkan konversi penjualan dan mengurangi tingkat p

'Topik krusial untuk appointment meeting dengan salah satu brand industri retail ternama di Thailand adalah implementasi strategi Smart Checkout untuk meningkatkan konversi penjualan dan mengurangi tingkat pengabaian keranjang belanja (cart abandonment).  Topik ini relevan karena tren belanja online yang terus meningkat dan persaingan yang ketat di pasar ritel Thailand.'