In [60]:
from haystack import Pipeline, component
from haystack.components.embedders import SentenceTransformersTextEmbedder
from haystack.components.agents import Agent
from haystack.components.generators.chat import OpenAIChatGenerator
from haystack_integrations.document_stores.mongodb_atlas import MongoDBAtlasDocumentStore
from haystack.utils import Secret
from haystack.components.builders import ChatPromptBuilder, PromptBuilder
from haystack.dataclasses import ChatMessage
from haystack.tools.tool import Tool
from haystack_experimental.chat_message_stores.in_memory import InMemoryChatMessageStore
from haystack_experimental.components.retrievers import ChatMessageRetriever
from haystack_experimental.components.writers import ChatMessageWriter
from haystack_integrations.components.retrievers.mongodb_atlas import MongoDBAtlasEmbeddingRetriever
from haystack.components.generators import OpenAIGenerator
from pymongo import MongoClient
from typing import List,Annotated
from haystack.components.routers import ConditionalRouter
from haystack.dataclasses import Document
from getpass import getpass
import re
import json
import os

### LOGGER

In [61]:
# import logging
# from haystack import tracing
# from haystack.tracing.logging_tracer import LoggingTracer

# logging.basicConfig(format="%(levelname)s - %(name)s -  %(message)s", level=logging.WARNING)
# logging.getLogger("haystack").setLevel(logging.DEBUG)

# tracing.tracer.is_content_tracing_enabled = True # to enable tracing/logging content (inputs/outputs)
# tracing.enable_tracing(LoggingTracer(tags_color_strings={"haystack.component.input": "\x1b[1;31m", "haystack.component.name": "\x1b[1;34m"}))

### ENV VARIABLES

In [62]:
mongo_connection_string = Secret.from_token(os.getenv("MONGO_CONNECTION_STRING"))
openai_api_key = Secret.from_token(os.getenv("OPENAI_API_KEY"))

In [63]:
chat_message_store = InMemoryChatMessageStore()
document_store_product = MongoDBAtlasDocumentStore(
    database_name="depato_store",
    collection_name="products",
    vector_search_index="vector_index",
    full_text_search_index="search_index",
    mongo_connection_string=mongo_connection_string
)
document_store_common = MongoDBAtlasDocumentStore(
    database_name="depato_store",
    collection_name="common_information",
    vector_search_index="vector_index_common",
    full_text_search_index=None,
    mongo_connection_string=mongo_connection_string
)

## Membuat Paraphraser Tool

In [64]:
class ParaphraserPipeline:
    def __init__(self,chat_message_store):
        self.memory_retriever = ChatMessageRetriever(chat_message_store)
        self.pipeline = Pipeline()
        self.pipeline.add_component("prompt_builder",ChatPromptBuilder(variables=["query","memories"],required_variables=["query", "memories"],))
        self.pipeline.add_component("generator", OpenAIChatGenerator(model="gpt-4.1-2025-04-14", api_key=Secret.from_token(os.environ["OPENAI_API_KEY"])))
        self.pipeline.add_component("memory_retriever", self.memory_retriever)

        self.pipeline.connect("prompt_builder.prompt", "generator.messages")
        self.pipeline.connect("memory_retriever", "prompt_builder.memories")
    
    def run(self, query):
        messages = [
            ChatMessage.from_system(
                "You are a helpful assistant that paraphrases user queries based on previous conversations."
            ),
            ChatMessage.from_user(
                """
                Please paraphrase the following query based on the conversation history provided below. If the conversation history is empty, please return the query as is.
                history:
                {% for memory in memories %}
                    {{memory.text}}
                {% endfor %}
                query: {{query}}
                answer:
                """
            )
        ]

        res = self.pipeline.run(
            data = {
                "prompt_builder":{
                    "query": query,
                    "template": messages
                },
            
            },
            include_outputs_from=["generator"]
        )
        print("Pipeline Input", query)
        return res["generator"]["replies"][0].text

## Membuat History Tool

In [65]:
class ChatHistoryPipeline:
    def __init__(self, chat_message_store):
        self.chat_message_store = chat_message_store
        self.pipeline = Pipeline()
        self.pipeline.add_component("memory_retriever", ChatMessageRetriever(chat_message_store))
        self.pipeline.add_component("prompt_builder", PromptBuilder(variables=["memories"], required_variables=["memories"], template="""
        Previous Conversations history:
        {% for memory in memories %}
            {{memory.text}}
        {% endfor %}
        """)
        )
        self.pipeline.connect("memory_retriever", "prompt_builder.memories")

    def run(self):
        res = self.pipeline.run(
            data = {},
            include_outputs_from=["prompt_builder"]
        )

        return res["prompt_builder"]["prompt"]

## Membuat Route Common

In [66]:
class  CommonRoute:
    def __init__(self, document_store):
        self.document_store = document_store
        template_common_message = [
            ChatMessage.from_system(
            """
            You are smart personal assistant system to help customer find common information
            Answer the user's question using only the provided context.
            Maintain the same language as the question.   
            Context:
            {{ context | map(attribute='content') | join(" ") | replace("\n", " ")}}   
            Instructions:
            1. Only use information from the context to answer.
            2. If the context does not contain the required Context, respond with:
            "I'm sorry, I can't answer that right now."
            3. Keep the answer concise and clear.          
            """
        ),
            ChatMessage.from_user(
                "{{ query }}"
            )
        ]
        self.pipeline = Pipeline()
        self.pipeline.add_component("embedder_common", SentenceTransformersTextEmbedder())
        self.pipeline.add_component("retriever_common", MongoDBAtlasEmbeddingRetriever( document_store=document_store, top_k=6))
        self.pipeline.add_component("prompt_builder_common", ChatPromptBuilder(template=template_common_message))
        self.pipeline.add_component("generator_common", OpenAIChatGenerator(api_key=openai_api_key, model="gpt-4.1"))

        self.pipeline.connect("embedder_common.embedding", "retriever_common.query_embedding")
        self.pipeline.connect("retriever_common.documents", "prompt_builder_common.context")
        self.pipeline.connect("prompt_builder_common.prompt", "generator_common.messages")
    def run(self, query):
        res = self.pipeline.run(
            data={
                "embedder_common":{"text" : query},
                "prompt_builder_common" : {"query" : query},
            }, include_outputs_from=["generator_common"]
        )
        return res["generator_common"]["replies"][0].text

## Membuat Metadata Filtering

In [67]:
class MongoDBAtlas:
    def __init__(self, mongo_connection_string:str):
        self.client = MongoClient(mongo_connection_string)
        self.db = self.client.depato_store
        self.material_collection = self.db.materials
        self.category_collection = self.db.categories

    def get_materials(self):
        return [doc['name'] for doc in self.material_collection.find()]

    def get_categories(self):
        return [doc['name'] for doc in self.category_collection.find()]

In [68]:
@component
class GetMaterials:
    def __init__(self):
        self.db = MongoDBAtlas(os.getenv("MONGO_CONNECTION_STRING"))
    @component.output_types(materials=List[str])
    def run(self):
        materials = self.db.get_materials()
        return {"materials": materials}

In [69]:
@component
class GetCategories:
    def __init__(self):
        self.db = MongoDBAtlas(os.environ['MONGO_CONNECTION_STRING'])
    
    @component.output_types(categories=List[str])
    def run(self):
        categories = self.db.get_categories()
        return {"categories": categories}

In [70]:
METADATA_FILTER_TEMPLATE = """
You are a json generator that have a job to generate json based on the input.
The return json should be in the format:
```json
{
    "operator": "AND",
    "conditions":[
        {"field": "meta.category", "operator":"==", "value": <category>},
        {"field": "meta.material", "operator":"==", "value": <material>},
        {"field": "meta.gender", "operator":"==", "value" : <male|female|unisex>},
        {"field": "meta.price", "operator":<"<="|">="|"==">, "value": <price>}
    ]
}
```
The json key above can be omiitted if the value is not provided in the input, so please make sure to only return the keys that are provided in the input.

For the material and category, you can only use the material and category that are provided below:
Materials: [ {% for material in materials %} {{ material }} {% if not loop.last %}, {% endif %} {% endfor %} ]

Categories: [ {% for category in categories %} {{ category }} {% if not loop.last %}, {% endif %} {% endfor %} ]

if the input does not contain any of the keys above, you should return an empty json object like this:
```json
{}
```
Sometimes the material and category can be negated, so you should also handle that by using the operator "!=" for material and category. 

Sometimes the material and category is not explicitly mentioned, you should analyze which material and category is the most suitable based on the input, and return the json with the material and category that you think is the most suitable.

Nestede conditions are allowed, for nested conditions, you can use "OR" and "AND" as the operator, and the conditions should be in the "conditions" array.

if user said the price around some value, please find the price between those value -10 and value +10.

The example of the result are expected to be like this:

1. Input: "can you give me a adress with cotton material?"
output:
```json
{
    "operator": "AND",
    "conditions": [
        {"field": "meta.material", "operator": "==", "value": "Cotton"},
        {"field": "meta.category", "operator": "==", "value": "Dresses/Jumpsuits"}
    ]
}
```

2. Input: "Give me Shirt that is not made of cotton and has a price less than $100"
output:
```json
{
    "operator": "AND",
    "conditions": [
        {"field": "meta.category", "operator": "==", "value": "Tops"},
        {"field": "meta.material", "operator": "!=", "value": "Cotton"},
        {"field": "meta.price", "operator": "<=", "value": 100}
    ]
}
3. Input: "I want a dress that is not hot and has a price greater than $50"
output:
```json
{
    "operator": "AND",
    "conditions": [
        {"field": "meta.category", "operator": "==", "value": "Dresses/Jumpsuits"},
        {"field": "meta.price", "operator": ">=", "value": 50},
        {
            "operator": "OR",
            "conditions": [
                {"field": "meta.material", "operator": "==", "value": "Cotton"},
                {"field": "meta.material", "operator": "==", "value": "Polyester"}
            ]
        }
    ]
}

4. Input i want tops that have price between $20 and $50
output:
```json
{
    "operator": "AND",
    "conditions": [
        {"field": "meta.category", "operator": "==", "value": "Tops"},
        {
            "operator": "AND",
            "conditions":[
                {"field": "meta.price", "operator": ">=", "value": 20},
                {"field": "meta.price", "operator": "<=", "value": 50}
            ]
        }
    ]
}
```
5. Input: I want the dress price around $50
output: 
```json
{
    "operator": "AND",
    "conditions": [
        {"field": "meta.category", "operator": "==", "value": "Dresses/Jumpsuits"},
        {
            "operator": "AND",
            "conditions":[
                {"field": "meta.price", "operator": ">=", "value": 40},
                {"field": "meta.price", "operator": "<=", "value": 60}
            ]
        }
    ]
}
```
6. Input: {{input}}
output:

```

"""

In [71]:
class MetaDataFilterPipeline:
    def __init__(self, get_materials, get_categories, template):
        self.get_materials = get_materials
        self.get_categories = get_categories
        self.template = template

        self.pipeline = Pipeline()
        self.pipeline.add_component("materials", GetMaterials())
        self.pipeline.add_component("categories", GetCategories())
        self.pipeline.add_component(
            "prompt_builder",
            PromptBuilder(
                template=self.template,
                required_variables=["input", "materials", "categories"],
            )
        )
        self.pipeline.add_component("generator", OpenAIGenerator(
            model="gpt-4.1-2025-04-14",
            api_key=Secret.from_token(os.environ['OPENAI_API_KEY'])
        ))
        self.pipeline.connect("materials.materials", "prompt_builder.materials")
        self.pipeline.connect("categories.categories", "prompt_builder.categories")
        self.pipeline.connect("prompt_builder","generator")

    def run(self, query: str):
        res = self.pipeline.run(
            data={
                "prompt_builder": {
                    "input": query,
                },
            },
        )
        return res["generator"]["replies"][0]
      

## Membuat Products Route

In [72]:
class ProductsRoute:
    def __init__(self, chat_message_store, document_store):
        self.chat_message_store = chat_message_store
        self.document_store = document_store
        self.pipeline = Pipeline()
        self.pipeline.add_component("embedder_products" , SentenceTransformersTextEmbedder())
        self.pipeline.add_component("retriever_products", MongoDBAtlasEmbeddingRetriever(document_store=document_store,top_k=10))
        self.pipeline.add_component("prompt_builder_products", ChatPromptBuilder(variables=["query","documents"],required_variables=["query", "documents"]))
        self.pipeline.add_component("generator_products", OpenAIChatGenerator(model="gpt-4.1-2025-04-14", api_key=Secret.from_token(os.environ["OPENAI_API_KEY"])))        
    
        self.pipeline.connect("embedder_products.embedding" , "retriever_products.query_embedding")
        self.pipeline.connect("retriever_products.documents", "prompt_builder_products.documents")
        self.pipeline.connect("prompt_builder_products.prompt", "generator_products.messages")

    def run(self, query: str,  filter: dict = {}):
        messages = [
            ChatMessage.from_system(
                  """
                    You are a shop assiistant that helps users find the best products in a shopping mall.
                    You will be give a query and list of products. Your task is to generate a list of products that best match the query.
                    USE THE SAME LANGUAGE AS THE QUERY!
                    The output should be a list of products in the following format:
                    .  
                    1. Title: 
                    Price: 
                    Material: 
                    Category: 
                    Brand: 
                    Recommendation: 

                    From the format above, you should pay attention to the following:
                    1.  should be a short summary of the query.
                    2.  should be a number starting from 1.
                    3.  should be the name of the product, this product name can be found from the product_name field.
                    4.  should be the price of the product, this product price can be found from the product_price field.
                    5.  should be the material of the product, this product material can be found from the product_material field.
                    6.  should be the category of the product, this product category can be found from the product_category field.
                    7.  should be the brand of the product, this product brand can be found from the product_brand field.
                    8.  should be the recommendation of the product, you should give a recommendation why this product is recommended, please pay attentation to the product_content field. 


                    You should only return the list of products that best match the query, do not return any other information.
                    the products are:
                    {% for product in documents %}
                    ===========================================================
                    {{loop.index + 1}}. product_name: {{ product.meta.title }}
                    product_price: {{ product.meta.price }}
                    product_material: {{ product.meta.material }}
                    product_category: {{ product.meta.category }}
                    product_brand: {{ product.meta.brand }}
                    product_content: {{ product.content}}
                    {% endfor %}

                    ===========================================================

                    Answer:

                    """
            ),
            ChatMessage.from_user(
                """
                 The query is: {{query}}
                """
            )
        ]
        res = self.pipeline.run(
            data={
                 "embedder_products":{
                    "text": query,
                },
                "retriever_products":{
                    "filters":filter
                },
               "prompt_builder_products":{
                   "query": query,
                   "template": messages
               },

            },
            include_outputs_from=["generator_prodcts", "prompt_builder_products"]
        )
        print(res["prompt_builder_products"]["prompt"])
        return res["generator_products"]["replies"][0].text

In [73]:
paraprahser_pipeline = ParaphraserPipeline(chat_message_store=chat_message_store)

## Tools for CommonRoute

In [74]:
common_route = CommonRoute(document_store=document_store_common)
products_route = ProductsRoute(chat_message_store=chat_message_store, document_store=document_store_product)
def retrieve_and_generate_common(query:str):
    pharaprased_query = paraprahser_pipeline.run(query)
    return common_route.run(query=pharaprased_query)
common_tool = Tool(
    name="retrieve_and_generate_common_information",
    description="use this to retrieve common information based on the query",
    function=retrieve_and_generate_common,
    parameters= {
        "type": "object",
        "properties": {
            "query": {
                "type": "string",
                "description": "The user query to retrieve common_information and generate an answer."
            }
        },
        "required": ["query"]
    }
)

ChatPromptBuilder has 2 prompt variables, but `required_variables` is not set. By default, all prompt variables are treated as optional, which may lead to unintended behavior in multi-branch pipelines. To avoid unexpected execution, ensure that variables intended to be required are explicitly set in `required_variables`.


## Tools for Products

In [75]:
retrieve_and_generate_pipeline = ProductsRoute(chat_message_store=chat_message_store, document_store=document_store_product)
metadata_filter_pipeline = MetaDataFilterPipeline(
    get_materials=GetMaterials(),
    get_categories=GetCategories(),
    template=METADATA_FILTER_TEMPLATE
)

def retrieve_and_generate_products(query: Annotated[str, "User query"]):
    """
    This tool retrieves products based on user query and generates an answer.
    """
    pharaprased_query = paraprahser_pipeline.run(query)
    result = metadata_filter_pipeline.run(pharaprased_query)
    data = {}
    try:
        json_match = re.search(r'```json\n(.*?)\n```', result, re.DOTALL)
        if json_match:
           json_str = json_match.group(1)
           data = json.loads(json_str)
        else:
            data = {}
    except Exception as e:
          data = {}
    return retrieve_and_generate_pipeline.run(pharaprased_query,data)

product_tool = Tool(
    name="retrieve_and_generate_recommendation",
    description="Use this tool to create metadata filter, retrieve products based on user query, and generate an answer.",
    function=retrieve_and_generate_products,
    parameters= {
        "type": "object",
        "properties": {
            "query": {
                "type": "string",
                "description": "The user query to retrieve products and generate an answer."
            }
        },
        "required": ["query"]
    }
)

In [76]:
agent = Agent(
    chat_generator = OpenAIChatGenerator(model="gpt-4.1-2025-04-14", api_key=Secret.from_token(os.environ["OPENAI_API_KEY"])),
    tools=[product_tool, common_tool],
    system_prompt="""
   You are a helpful shop assistant AI agent. Your job is to provide:

    1. Product recommendations (based on material, price, and category).
    2. Common shop information (Shipping, Returns, Privacy Policy, Payment, Terms & Conditions, etc).

    DECISION LOGIC:
    - If the user asks about common/shop information → use the common information tool.
    - If the user asks about products:
        • Analyze the user query and conversation history.
        • If enough information is provided (material/price/category), call the product tool.
        • If information is insufficient, ask the user for clarification.

    WORKFLOW RULES:
    - Only call ONE tool at a time.
    - After a tool returns results, evaluate:
        “Am I done?”
        • If yes → respond with the final answer.
        • If no → call another tool.
    - If the user's request is outside product or shop information, politely decline.

    Your responses must stay focused on these two domains only.
    """,
    exit_conditions=["text"],
    max_agent_steps= 20,
)

In [77]:
chat_history_pipeline = ChatHistoryPipeline(chat_message_store=chat_message_store)

In [78]:
agent.warm_up()
chat_message_writer = ChatMessageWriter(chat_message_store)
query = input("Masukkan query: ")

history = chat_history_pipeline.run()
messages = [ChatMessage.from_system(history),ChatMessage.from_user(query)]
chat_message_writer.run([ChatMessage.from_user(query)])
response = agent.run(messages=messages)
response_text = response["messages"][-1].text

messages_save = [
    ChatMessage.from_assistant(response_text)
]
chat_message_writer.run(messages_save)
print(f"Response: {response_text}")

Pipeline Input baju pria harga di bawah 50 ribu


Batches: 100%|██████████| 1/1 [00:00<00:00, 12.72it/s]


Response: Berikut beberapa rekomendasi baju pria di bawah 50 ribu:

1. buXsbaum T-Shirt Hungary-Logo (Rp19.990)
   - Bahan: 100% katun, nyaman, desain unik, print tahan lama.
2. buXsbaum T-Shirt Morocco (Rp14.490)
   - Bahan: Katun, desain kreatif, fashionable dengan harga ekonomis.
3. Billabong Boys' Kids Permahang 10 Short Sleeve T-Shirt (Rp15.940)
   - Bahan: Katun ring spun premium, desain simpel, kualitas terjamin.
4. Soul Eater-Character Panels T-Shirt (Rp17.720)
   - Bahan: Katun, bergrafis karakter anime Soul Eater, cocok untuk daily look.
5. Men's Life Behind Bars MTB Mountain Biking T Shirt (Rp13.720)
   - Desain biker, sporty, ramah di kantong.
6. buXsbaum T-Shirt Chess-Set (Rp18.990)
   - Bahan: 100% katun, desain minimalis pecinta catur, harga ekonomis.

Semua pilihan di atas terjangkau dengan bahan utama katun yang nyaman. Jika ingin rekomendasi berdasarkan model tertentu (kaos polos, kemeja, dsb.), silakan informasikan lebih detail!
