In [1]:
!pip install python-dotenv yake langchain-groq chromadb langdetect nltk sentence-transformers torch

Collecting yake
  Downloading yake-0.6.0-py3-none-any.whl.metadata (10 kB)
Collecting langchain-groq
  Downloading langchain_groq-0.3.7-py3-none-any.whl.metadata (2.6 kB)
Collecting chromadb
  Downloading chromadb-1.0.20-cp39-abi3-manylinux_2_17_x86_64.manylinux2014_x86_64.whl.metadata (7.3 kB)
Collecting langdetect
  Downloading langdetect-1.0.9.tar.gz (981 kB)
[2K     [90m━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━[0m [32m981.5/981.5 kB[0m [31m12.4 MB/s[0m eta [36m0:00:00[0m
[?25h  Preparing metadata (setup.py) ... [?25l[?25hdone
Collecting jellyfish (from yake)
  Downloading jellyfish-1.2.0-cp312-cp312-manylinux_2_17_x86_64.manylinux2014_x86_64.whl.metadata (2.6 kB)
Collecting segtok (from yake)
  Downloading segtok-1.5.11-py3-none-any.whl.metadata (9.0 kB)
Collecting groq<1,>=0.30.0 (from langchain-groq)
  Downloading groq-0.31.1-py3-none-any.whl.metadata (16 kB)
Collecting pybase64>=1.4.1 (from chromadb)
  Downloading pybase64-1.4.2-cp312-cp312-manylinux1_x86_64.manylinux20

In [2]:
# Upload the `.env` and your `.txt` data file
from google.colab import files
print("📁 Please upload your `.env` file (with GROQ_API_KEY) and your input `.txt` data file:")
uploaded = files.upload()

📁 Please upload your `.env` file (with GROQ_API_KEY) and your input `.txt` data file:


Saving .env to .env
Saving products.txt to products.txt


In [3]:
# Load the .env variables
import os
from dotenv import load_dotenv

load_dotenv()
groq_api_key = os.getenv("GROQ_API_KEY")

In [4]:
import json
import numpy as np
import pandas as pd
from langchain_groq import ChatGroq
import re
import uuid
import pickle
import gzip
from collections import Counter
import yake
import chromadb
from chromadb.config import Settings
import logging
from langdetect import detect
import nltk
from nltk.corpus import stopwords
from sentence_transformers import SentenceTransformer
from typing import List, Dict, Any
import torch

In [5]:
logger = logging.getLogger(__name__)
logger.setLevel(logging.INFO)

In [6]:
USE_NLP_TRANSLATION = True

In [7]:
# Custom embedding function class for ChromaDB
class AlibabaEmbeddingFunction:
    def __init__(self, model_name: str = "Alibaba-NLP/gte-multilingual-base"):
        """
        Initialize the custom embedding function with Alibaba's model
        """
        print(f"Loading embedding model: {model_name}")

        # Load the model with trust_remote_code=True
        self.model = SentenceTransformer(
            model_name,
            trust_remote_code=True,
            device='cpu'  # Use 'cuda' if you have GPU available
        )
        print(f"✅ Model loaded successfully!")
        print(f"✅ Model dimension: {self.model.get_sentence_embedding_dimension()}")

    def __call__(self, input: List[str]) -> List[List[float]]:
        """
        Generate embeddings for a list of texts
        """
        try:
            # Generate embeddings
            embeddings = self.model.encode(input, convert_to_tensor=False)

            # Ensure embeddings are in the right format (list of lists)
            if isinstance(embeddings, np.ndarray):
                embeddings = embeddings.tolist()

            return embeddings

        except Exception as e:
            logger.error(f"Error generating embeddings: {e}")
            # Return zero embeddings as fallback
            embedding_dim = self.model.get_sentence_embedding_dimension()
            return [[0.0] * embedding_dim for _ in input]

In [8]:
# Initialize the custom embedding function
print("Initializing custom embedding function...")
embedding_function = AlibabaEmbeddingFunction()

Initializing custom embedding function...
Loading embedding model: Alibaba-NLP/gte-multilingual-base


The secret `HF_TOKEN` does not exist in your Colab secrets.
To authenticate with the Hugging Face Hub, create a token in your settings tab (https://huggingface.co/settings/tokens), set it as secret in your Google Colab and restart your session.
You will be able to reuse this secret in all of your notebooks.
Please note that authentication is recommended but still optional to access public models or datasets.


modules.json:   0%|          | 0.00/349 [00:00<?, ?B/s]

README.md: 0.00B [00:00, ?B/s]

sentence_bert_config.json:   0%|          | 0.00/55.0 [00:00<?, ?B/s]

config.json: 0.00B [00:00, ?B/s]

configuration.py: 0.00B [00:00, ?B/s]

A new version of the following files was downloaded from https://huggingface.co/Alibaba-NLP/new-impl:
- configuration.py
. Make sure to double-check they do not contain any added malicious code. To avoid downloading new versions of the code file, you can pin a revision.


modeling.py: 0.00B [00:00, ?B/s]

A new version of the following files was downloaded from https://huggingface.co/Alibaba-NLP/new-impl:
- modeling.py
. Make sure to double-check they do not contain any added malicious code. To avoid downloading new versions of the code file, you can pin a revision.


model.safetensors:   0%|          | 0.00/611M [00:00<?, ?B/s]

Some weights of the model checkpoint at Alibaba-NLP/gte-multilingual-base were not used when initializing NewModel: ['classifier.bias', 'classifier.weight']
- This IS expected if you are initializing NewModel from the checkpoint of a model trained on another task or with another architecture (e.g. initializing a BertForSequenceClassification model from a BertForPreTraining model).
- This IS NOT expected if you are initializing NewModel from the checkpoint of a model that you expect to be exactly identical (initializing a BertForSequenceClassification model from a BertForSequenceClassification model).


tokenizer_config.json: 0.00B [00:00, ?B/s]

tokenizer.json:   0%|          | 0.00/17.1M [00:00<?, ?B/s]

special_tokens_map.json:   0%|          | 0.00/964 [00:00<?, ?B/s]

config.json:   0%|          | 0.00/190 [00:00<?, ?B/s]

✅ Model loaded successfully!
✅ Model dimension: 768


In [9]:
# Modified ChromaDB initialization
print("Initializing ChromaDB with custom embedding...")
client = chromadb.PersistentClient(path="./chroma_db")

# Create or get collection with custom embedding function
try:
    collection = client.get_collection(
        name="product_collection_alibaba",
        embedding_function=embedding_function
    )
    print("✅ Using existing collection with custom embedding")
except Exception:
    collection = client.create_collection(
        name="product_collection_alibaba",
        embedding_function=embedding_function,
        metadata={"description": "Product and QA embeddings with Alibaba model"}
    )
    print("✅ Created new collection with custom embedding")

Initializing ChromaDB with custom embedding...
✅ Created new collection with custom embedding


In [10]:
llm = ChatGroq(
    groq_api_key=groq_api_key,
    model_name="meta-llama/llama-4-scout-17b-16e-instruct",
    temperature=0.0
)

In [11]:
def parse_product_line(line, category=None):
    """
    Parse a single product line to extract all information.
    """
    try:
        # Extract product name (between first [ and ])
        name_match = re.match(r'\[([^\]]+)\]', line)
        if not name_match:
            return None
        name = name_match.group(1)

        # Extract URL (between first ( and ))
        url_match = re.search(r'\(([^)]+)\)', line)
        url = url_match.group(1) if url_match else ""

        # Extract description (between "Product Description: " and ") price:")
        desc_match = re.search(r'Product Description:\s*([^)]+)\)\s*price:', line)
        description = desc_match.group(1).strip() if desc_match else ""

        # Extract price (after "price:" and before comma or end)
        price_match = re.search(r'price:(\d+)', line)
        price = int(price_match.group(1)) if price_match else 0

        return {
            "id": str(uuid.uuid4()),
            "name": name,
            "url": url,
            "description": description,
            "price": price,
            "category": category,
            "text": f"{name} - {description} - Price: {price}"
        }

    except Exception as e:
        print(f"Error parsing line: {line[:50]}... - {e}")
        return None

In [12]:
def parse_structured_data(txt):
    """
    Parse product data from structured text file containing categories and products.
    """
    prod = []

    # Split text into lines and process each line
    lines = txt.strip().split('\n')
    current_category = None

    for line in lines:
        line = line.strip()

        # Skip empty lines
        if not line:
            continue

        # Check if line is a category header (starts with #)
        if line.startswith('#'):
            current_category = line.replace('#', '').strip()
            continue

        # Check if line contains a product (starts with [ and contains price:)
        if line.startswith('[') and 'price:' in line:
            product_data = parse_product_line(line, current_category)
            if product_data:
                prod.append(product_data)

    return prod

In [13]:
def load_data_file():
    """
    Load and parse data from uploaded text files.

    Returns:
        tuple: (docs, content) where docs is list of parsed items and content is raw text
    """
    for fname in uploaded:
        if fname.endswith(".txt"):
            try:
                with open(fname, 'r', encoding='utf-8') as f:
                    content = f.read()

                prod_items = parse_structured_data(content)
                docs = prod_items

                print(f"✅ Successfully parsed:")
                print(f"   🛍️  {len(prod_items)} products")
                print(f"   📊 Total documents: {len(docs)}")

                # Show sample products by category
                categories = {}
                for item in prod_items:
                    cat = item.get('category', 'Unknown')
                    categories[cat] = categories.get(cat, 0) + 1

                print(f"   📂 Categories found:")
                for cat, count in categories.items():
                    print(f"      • {cat}: {count} items")

                return docs, content

            except Exception as e:
                print(f"❌ Error reading file {fname}: {e}")
                return None, None

    print("❌ No `.txt` file found in uploaded files.")
    return None, None

In [14]:
def build_chroma_db(docs):
    """Build ChromaDB with documents using custom embedding"""
    print("Building ChromaDB with custom Alibaba embedding...")

    # Clear existing collection
    try:
        collection.delete()
        print("Cleared existing collection")
    except Exception as e:
        print(f"Note: {e}")

    if not docs:
        print("No documents to add")
        return collection

    batch_size = 32  # Adjust based on your memory constraints
    for i in range(0, len(docs), batch_size):
        batch_docs = docs[i:i+batch_size]

        # Prepare data for ChromaDB
        texts = [doc["text"] for doc in batch_docs]
        ids = [doc["id"] for doc in batch_docs]

        metadatas = []
        for doc in batch_docs:
            metadata = {
                "name": doc.get("name", ""),
                "category": doc.get("category", "Unknown").lower(),
                "price": doc.get("price", 0),
                "url": doc.get("url", ""),
                "type": "product" if "price" in doc else "qa"
            }
            metadatas.append(metadata)

        print(f"Processing batch {i//batch_size + 1}/{(len(docs)-1)//batch_size + 1}")

        try:
            collection.add(
                documents=texts,
                metadatas=metadatas,
                ids=ids
            )
        except Exception as e:
            print(f"Error adding batch: {e}")
            continue

    doc_count = collection.count()
    print(f"✅ ChromaDB built with {doc_count} documents using Alibaba embedding")
    return collection

In [15]:
nltk.download('stopwords')

[nltk_data] Downloading package stopwords to /root/nltk_data...
[nltk_data]   Unzipping corpora/stopwords.zip.


True

In [16]:
# Compile regex once
WORD_REGEX = re.compile(r'\b[\w\u0980-\u09FF]+\b')

# Stopwords (supports both English and Bangla here)
STOPWORDS = {
    'en': set(stopwords.words('english')),
    # 'bn': {'কি', 'কেন', 'এই', 'ওই', 'একটি', 'এবং', 'তবে', 'যা', 'কিছু'}
}

def extract_keywords(text, max_k=5):
    """
    Extracts keywords from given text using YAKE and falls back to token frequency.
    """

    try:
        kw_extractor = yake.KeywordExtractor(lan="en", n=3, dedupLim=0.7, top=max_k)
        kws = kw_extractor.extract_keywords(text)
        extracted = [kw for kw, score in kws if isinstance(kw, str)]
        if extracted:
            logger.info(f"Extracted Keywords: {extracted[:max_k]}")
            return extracted[:max_k]
    except Exception as e:
        logger.warning(f"YAKE keyword extraction failed: {e}")

    # Fallback: frequency-based method
    words = WORD_REGEX.findall(text.lower())
    stopword_set = STOPWORDS.get("en", set())
    counts = Counter(words)
    keywords = [w for w, _ in counts.most_common() if w not in stopword_set and len(w) > 2]
    return keywords[:max_k] or [text]

In [17]:
def translate_to_en(text_bn: str) -> str:
    """
    Translate Bengali text to English using an LLM prompt.
    Returns original text if translation is not enabled or fails.
    """
    if not USE_NLP_TRANSLATION:
        logger.info("Translation disabled, returning original text.")
        return text_bn

    text_bn = text_bn.strip()
    if not text_bn:
        logger.warning("Empty text received for translation.")
        return ""

    prompt = f"""You are a professional translator for product queries.
Translate the following Bengali text to English. Return ONLY the English translation, nothing else:

Bengali: {text_bn}

English:"""

    try:
        response = llm.invoke(prompt)
        if hasattr(response, "content"):
            translated = response.content.strip()
            logger.info("Translation successful.")
            return translated or text_bn
        else:
            logger.warning("Response from LLM has no 'content'.")
            return text_bn

    except Exception as e:
        logger.error(f"Translation error: {e}")
        return text_bn

In [18]:
def parse_intent_response(content):
    """Parse structured text response into dictionary"""
    intent_dict = {
        "main_intent": "Unknown",
        "product_category": "Unknown",
        "action_type": "Unknown",
        "target_user": "Unknown",
        "key_terms": []
    }

    # Define patterns to extract each field
    patterns = {
        "main_intent": r"Main Intent:\s*(.+?)(?:\n|$)",
        "product_category": r"Product Category:\s*(.+?)(?:\n|$)",
        "action_type": r"Action Type:\s*(.+?)(?:\n|$)",
        "target_user": r"Target User:\s*(.+?)(?:\n|$)",
        "key_terms": r"Key Terms:\s*(.+?)(?:\n|$)"
    }

    for key, pattern in patterns.items():
        match = re.search(pattern, content, re.IGNORECASE)
        if match:
            value = match.group(1).strip()
            if key == "key_terms":
                # Split key terms by comma and clean them
                terms = [term.strip() for term in value.split(',')]
                # Remove empty terms and brackets
                terms = [term.strip('[]') for term in terms if term.strip()]
                intent_dict[key] = terms
            else:
                intent_dict[key] = value

    return intent_dict

In [19]:
def extract_intent(text_bn, text_en):
    """Extract intent details from query"""
    if not USE_NLP_TRANSLATION:
        return {
            "main_intent": "NLP translation & intent disabled",
            "product_category": "Unknown",
            "action_type": "Unknown",
            "target_user": "Unknown",
            "key_terms": []
        }

    # Define your exact product categories
    VALID_CATEGORIES = [
        "Baby Products",
        "Electronics & Technology",
        "Home & Kitchen Appliances",
        "Fashion & Clothing",
        "Beauty & Personal Care",
        "Sports & Fitness",
        "Books & Education",
        "Food & Beverages",
        "Automotive & Tools",
        "Health & Wellness",
        "Pet Supplies",
        "Garden & Outdoor",
        "Office & Stationery"
    ]

    categories_list = "\n".join([f"- {cat}" for cat in VALID_CATEGORIES])

    prompt = f"""Extract intent details from this product query:

Bengali: {text_bn}
English: {text_en}

IMPORTANT: For Product Category, you MUST choose ONLY from these exact categories:
{categories_list}

Provide in this exact format:
Main Intent: [description]
Product Category: [choose from list above ONLY]
Action Type: [action]
Target User: [user type]
Key Terms: [term1, term2, term3]

Examples:
Main Intent: Looking for a smartphone
Product Category: Electronics & Technology
Action Type: Product Search
Target User: General Consumer
Key Terms: phone, smartphone, mobile

Main Intent: Need baby diapers
Product Category: Baby Products
Action Type: Product Purchase
Target User: Parents
Key Terms: baby, diapers, infant"""

    try:
        response = llm.invoke(prompt)
        content = response.content.strip()
        intent_dict = parse_intent_response(content)
        logger.info(f"Extracted Intents: {intent_dict}")
        return intent_dict

    except Exception as e:
        print(f"Intent extraction error: {e}")
        return {
            "main_intent": "Intent extraction failed",
            "product_category": "Unknown",
            "action_type": "Unknown",
            "target_user": "Unknown",
            "key_terms": []
        }

In [20]:
def search_chromadb_custom(query, method="semantic", top_k=6, filters=None):
    """
    Search function using ChromaDB with custom Alibaba embedding
    """
    print(f"[CHROMADB_CUSTOM_SEARCH] Query: {query}, Method: {method}")

    if not query or not query.strip():
        return [], [], query

    try:
        # ChromaDB query with custom embedding
        results = collection.query(
            query_texts=[query],
            n_results=top_k,
            where=filters  # Metadata filtering
        )

        texts = results['documents'][0] if results['documents'] else []
        distances = results['distances'][0] if results['distances'] else []

        # Convert distances to similarity scores
        scores = [1 - dist for dist in distances]

        print(f"[CHROMADB_CUSTOM_SEARCH] Found {len(texts)} results")
        print(f"[CHROMADB_CUSTOM_SEARCH] Top score: {scores[0]:.4f}" if scores else "No results")

        return texts, scores, query

    except Exception as e:
        print(f"ChromaDB custom search error: {e}")
        return [], [], query

In [21]:
def answer_bengali_custom(query_bn, top_k=4):
    """
    Uses ChromaDB with custom Alibaba embedding and multiple search strategies
    """
    print(f"\n{'='*50}")
    print(f"CHROMADB CUSTOM EMBEDDING SEARCH")
    print(f"{'='*50}")

    # Extract preprocessing data
    en_query = translate_to_en(query_bn)
    intent_info = extract_intent(query_bn, en_query)
    kw_bn = extract_keywords(query_bn)
    kw_en = extract_keywords(en_query)

    # Combine unique results from multiple ChromaDB searches
    combined_results = set()
    all_scores = []

    # 1. English semantic search
    print(f"\n[1] English Search: {en_query}")
    try:
        en_texts, en_scores, _ = search_chromadb_custom(
            query=en_query,
            method="semantic",
            top_k=top_k
        )
        for doc, score in zip(en_texts, en_scores):
            combined_results.add(doc)
            all_scores.append(score)
        print(f"    Added {len(en_texts)} English results")
        print(f"    English results {(en_texts)} ")
    except Exception as e:
        print(f"English search error: {e}")

    # 2. Intent-based search with category filtering
    print(f"\n[2] Intent Search")
    intent_parts = []
    product_category = intent_info.get('product_category', '')
    if product_category and product_category.lower() != 'unknown':
        intent_parts.append(product_category)

    intent_key_terms = intent_info.get('key_terms', [])
    if intent_key_terms:
        intent_parts.extend(intent_key_terms)

    if intent_parts:
        intent_query = " ".join(intent_parts)
        print(f"    Intent query: {intent_query}")

        # Add category filter if available
        filters = None
        if product_category and product_category.lower() != 'unknown':
            filters = {"category": {"$eq": product_category.lower()}}

        try:
            intent_texts, intent_scores, _ = search_chromadb_custom(
                query=intent_query,
                method="semantic",
                top_k=top_k,
                filters=filters
            )
            for doc, score in zip(intent_texts, intent_scores):
                combined_results.add(doc)
                all_scores.append(score)
            print(f"    Added {len(intent_texts)} intent results")
            print(f"    Intent results {intent_texts}")
        except Exception as e:
            print(f"Intent search error: {e}")
    else:
        print("    No valid intent parts found")

    # 3. Keyword search
    if kw_en:
        en_keyword_query = " ".join(kw_en)

    if kw_bn:
        bn_keyword_query = " ".join(kw_bn)

        try:
            print(f"\n English Keyword query: {en_keyword_query}")
            en_kw_texts, en_kw_scores, _ = search_chromadb_custom(
                query=en_keyword_query,
                method="semantic",
                top_k=top_k
            )
            print(f"\n Bangla Keyword query: {bn_keyword_query}")
            bn_kw_texts, bn_kw_scores, _ = search_chromadb_custom(
                query=bn_keyword_query,
                method="semantic",
                top_k=top_k
            )
            for doc, score in zip(en_kw_texts, en_kw_scores):
                combined_results.add(doc)
                all_scores.append(score)
            print(f"    Added {len(en_kw_texts)} English keyword results")
            print(f"    English keyword results {en_kw_texts} ")
            for doc, score in zip(bn_kw_texts, bn_kw_scores):
                combined_results.add(doc)
                all_scores.append(score)
            print(f"    Added {len(bn_kw_texts)} Bangla keyword results")
            print(f"    Bangla keyword results {bn_kw_texts} ")
        except Exception as e:
            print(f"Keyword search error: {e}")
    else:
        print("No keywords found")

    # 4. Bengali direct search
    print(f"\n[4] Bengali Search: {query_bn}")
    try:
        bn_texts, bn_scores, _ = search_chromadb_custom(
            query=query_bn,
            method="semantic",
            top_k=top_k
        )
        for doc, score in zip(bn_texts, bn_scores):
            combined_results.add(doc)
            all_scores.append(score)
        print(f"    Added {len(bn_texts)} Bengali results")
        print(f"    Bengali results {bn_texts} ")
    except Exception as e:
        print(f"Bengali search error: {e}")

    # Get top results
    top = list(combined_results)[:top_k]
    ctx = "\n".join(f"{i+1}. {t}" for i, t in enumerate(top))

    print(f"\n[DEBUG] Combined {len(combined_results)} unique results")
    print(f"[DEBUG] Top {len(top)} results:")
    [print(f"  {i+1}. {item[:80]}...") for i, item in enumerate(top)]
    print(f"[DEBUG] Average score: {sum(all_scores)/len(all_scores):.4f}" if all_scores else "No scores")

    # Generate response using LLM
    llm = ChatGroq(
        groq_api_key=os.getenv("GROQ_API_KEY"),
        model_name="meta-llama/llama-4-scout-17b-16e-instruct",
        temperature=0.0
    )

    prompt = f"""STRICT RULES:
- Only mention items in CONTEXT below.
- Respond in Bengali.
- Consider the user's intent.

User Query: {query_bn}
English Translation: {en_query}
User Intent: {intent_info}

CONTEXT:
{ctx}

Your Bengali answer:"""

    try:
        llm_temp = ChatGroq(
            groq_api_key=os.getenv("GROQ_API_KEY"),
            model_name="meta-llama/llama-4-scout-17b-16e-instruct",
            temperature=0.2
        )
        response = llm_temp.invoke(prompt)
        return response.content.strip()
    except Exception as e:
        print(f"Response error: {e}")
        return "দুঃখিত, উত্তর তৈরি করতে সমস্যা হয়েছে।"

In [22]:
# Load data & build or load ChromaDB
docs, content = load_data_file()
if docs:
    try:
        # Check if ChromaDB already has data
        doc_count = collection.count()
        if doc_count > 0:
            print(f"✅ Using existing ChromaDB with {doc_count} documents")
        else:
            print("Building new ChromaDB...")
            collection = build_chroma_db(docs)
    except Exception as e:
        print(f"ChromaDB error: {e}")

✅ Successfully parsed:
   🛍️  132 products
   📊 Total documents: 132
   📂 Categories found:
      • Baby Products: 12 items
      • Electronics & Technology: 10 items
      • Home & Kitchen Appliances: 10 items
      • Fashion & Clothing: 10 items
      • Beauty & Personal Care: 10 items
      • Sports & Fitness: 10 items
      • Books & Education: 10 items
      • Food & Beverages: 10 items
      • Automotive & Tools: 10 items
      • Health & Wellness: 10 items
      • Pet Supplies: 10 items
      • Garden & Outdoor: 10 items
      • Office & Stationery: 10 items
Building new ChromaDB...
Building ChromaDB with custom Alibaba embedding...
Note: At least one of ids, where, or where_document must be provided in delete.
Processing batch 1/5
Processing batch 2/5
Processing batch 3/5
Processing batch 4/5
Processing batch 5/5
✅ ChromaDB built with 132 documents using Alibaba embedding


In [None]:
# Interactive loop
print(f"\n🔥 System Ready! NLP Translation: {USE_NLP_TRANSLATION}")
print("Commands:")
print("  'quit' - Exit")
print("  'toggle' - Switch NLP mode")
print("  Query your product")

while True:
    q = input("\nবাংলা প্রশ্ন: ").strip()
    if q.lower() == "quit":
        break
    elif q.lower() == "toggle":
        USE_NLP_TRANSLATION = not USE_NLP_TRANSLATION
        print(f"NLP Translation toggled to: {USE_NLP_TRANSLATION}")
        continue

    if q:
        print("\n✅ Answer:", answer_bengali_custom(q))


🔥 System Ready! NLP Translation: True
Commands:
  'quit' - Exit
  'toggle' - Switch NLP mode
  Query your product

বাংলা প্রশ্ন: pet supplier ki ki ache?

CHROMADB CUSTOM EMBEDDING SEARCH


INFO:__main__:Translation successful.
INFO:__main__:Extracted Intents: {'main_intent': 'Looking for pet products', 'product_category': 'Pet Supplies', 'action_type': 'Product Inquiry', 'target_user': 'Pet Owner', 'key_terms': ['pet', 'supplier', 'products']}
INFO:__main__:Extracted Keywords: ['pet supplier', 'pet', 'ache', 'supplier']
INFO:__main__:Extracted Keywords: ['pet products', 'pet', 'products']



[1] English Search: What pet products do you have?
[CHROMADB_CUSTOM_SEARCH] Query: What pet products do you have?, Method: semantic
[CHROMADB_CUSTOM_SEARCH] Found 4 results
[CHROMADB_CUSTOM_SEARCH] Top score: 0.3222
    Added 4 English results
    English results ['Pet Grooming Kit - Complete grooming kit with brushes, nail clippers, scissors, and comb for maintaining pet hygiene and appearance. - Price: 1200', 'Pet Carrier Travel Bag - Airline-approved pet carrier with mesh ventilation, comfortable padding, and secure zipper closure for safe travel. - Price: 2200', 'Royal Canin Dog Food 15kg - Premium dry dog food with balanced nutrition, high-quality proteins, and essential vitamins for adult dogs. - Price: 4500', 'Bebble Shampoo & Shower Gel, Strawberry - A 2-in-1 shampoo and shower gel with a mild formula and pleasant strawberry scent. cleanses gently without drying the skin or hair. - Price: 999'] 

[2] Intent Search
    Intent query: Pet Supplies pet supplier products
[CHROMADB_

INFO:__main__:Translation successful.
INFO:__main__:Extracted Intents: {'main_intent': 'Looking for a mobile phone', 'product_category': 'Electronics & Technology', 'action_type': 'Product Inquiry', 'target_user': 'General Consumer', 'key_terms': ['mobile', 'phone', 'available']}
INFO:__main__:Extracted Keywords: ['mobile']
INFO:__main__:Extracted Keywords: ['mobile phones', 'mobile', 'phones']



[1] English Search: Which mobile phones are available with you?
[CHROMADB_CUSTOM_SEARCH] Query: Which mobile phones are available with you?, Method: semantic
[CHROMADB_CUSTOM_SEARCH] Found 4 results
[CHROMADB_CUSTOM_SEARCH] Top score: 0.3275
    Added 4 English results
    English results ["iPhone 15 Pro Max - Apple's most advanced iPhone with titanium design, A17 Pro chip, 48MP camera system, and USB-C connectivity. - Price: 149000", 'Samsung Galaxy S24 Ultra - Premium flagship smartphone with 200MP camera, S Pen stylus, 6.8-inch Dynamic AMOLED display, and 5000mAh battery. - Price: 135000', 'AirPods Pro 2nd Gen - Wireless earbuds with active noise cancellation, spatial audio, and up to 6 hours listening time per charge. - Price: 25000', 'MacBook Air M3 - Ultra-thin laptop with Apple M3 chip, 13.6-inch Liquid Retina display, up to 18 hours battery life, and 8GB unified memory. - Price: 125000'] 

[2] Intent Search
    Intent query: Electronics & Technology mobile phone available
[CHR