In [9]:

import google.generativeai as genai
from PIL import Image
import os
import requests
import io
from urllib.parse import urlparse
import time
import logging
from abc import ABC, abstractmethod

In [10]:
class LLMProvider(ABC):
    @abstractmethod
    def generate(self, prompt: str, image_url: str = None) -> str:
        pass

class GeminiProvider(LLMProvider):
    def __init__(self, model_name: str = "gemini-2.5-flash"):
        genai.configure(api_key=os.getenv("GOOGLE_API_KEY"))
        self.model = genai.GenerativeModel(model_name)

    def generate(self, prompt: str, image_url: str = None) -> str:
        content = [prompt]

        # Add image if provided
        if image_url:
            try:
                response = requests.get(image_url, timeout=10)
                response.raise_for_status()
                img = Image.open(io.BytesIO(response.content))
                content.append(img)
            except Exception as e:
                logger.error(f"Error processing image: {e}")

        max_retries = 3
        for attempt in range(max_retries):
            try:
                response = self.model.generate_content(
                    content,
                    generation_config=genai.types.GenerationConfig(
                        temperature=0,
                        max_output_tokens=500,
                        response_mime_type="application/json"
                    )
                )

                if hasattr(response, 'text') and response.text:
                    return response.text

            except Exception as e:
                logger.error(f"API error (attempt {attempt + 1}): {e}")
                if attempt == max_retries - 1:
                    return '{"reasoning": "Technical error", "FunctionCall": []}'
                time.sleep(1)

        return '{"reasoning": "Connection issues", "FunctionCall": []}'

In [11]:
def create_prompt(query: str, image_url: str = None) -> str:
    image_filename = os.path.basename(urlparse(image_url).path) if image_url else None

    return f"""You are a Customer Service Agent. Analyze the user query and respond with JSON.

OUTPUT JSON SCHEMA:
{{
    "reasoning": "Explanation of user intent and why specific tools are needed",
    "FunctionCall": [
        {{
            "name": "search_products",
            "args": {{
                "text": "combined search text with image descriptions",
                "image": {str(bool(image_url)).lower()},
                "image_url": {[image_filename] if image_filename else []},
                "filters": {{
                    "category": "string or null",
                    "price_range": {{
                        "min": 0,
                        "max": 0,
                        "operation": "eq"
                    }},
                    "attributes": {{
                        "color": "string or null",
                        "size": "string or null",
                        "brand": "string or null",
                        "material": "string or null"
                    }}
                }}
            }}
        }}
    ]
}}

Instructions:
- Extract keywords from text query
- If image provided: describe product style, color, material, category, brand
- Combine text + image description in the "text" field
- Set "image" to true/false based on whether image was provided
- Include image filename in "image_url" array if image exists

User Query: {query}"""

# Test functions
def test_text_only():
    print("=== TEXT ONLY TEST ===")
    llm = GeminiProvider()
    prompt = create_prompt("I want to buy iPhone 15 pro red color")
    response = llm.generate(prompt)
    print(f"Response:\n{response}\n")

def test_with_image():
    print("=== TEXT + IMAGE TEST ===")
    llm = GeminiProvider()
    image_url = "https://demo2.wpthemego.com/themes/sw_himarket/wp-content/uploads/2016/04/20.jpg"
    prompt = create_prompt("I want to buy something like this", image_url)
    response = llm.generate(prompt, image_url)
    print(f"Response:\n{response}\n")

if __name__ == "__main__":
    logging.basicConfig(level=logging.INFO)
    test_text_only()
    test_with_image()

=== TEXT ONLY TEST ===


2025-07-13 11:43:05,793 - DEBUG - Starting new HTTPS connection (1): demo2.wpthemego.com:443


Response:
{
    "reasoning": "The user is looking to purchase a specific product, 'iPhone 15 pro red color'. The 'search_products' tool is appropriate for this query. I will extract the product name and color as search text and filter attributes.",
    "FunctionCall": [
        {
            "name": "search_products",
            "args": {
                "text": "iPhone 15 pro red color",
                "image": false,
                "image_url": [],
                "filters": {
                    "category": null,
                    "price_range": {
                        "min": 0,
                        "max": 0,
                        "operation": "eq"
                    },
                    "attributes": {
                        "color": "red",
                        "size": null,
                        "brand": "iPhone",
                        "material": null
                    }
                }
            }
        }
    ]
}

=== TEXT + IMAGE TEST ===


2025-07-13 11:43:06,390 - DEBUG - https://demo2.wpthemego.com:443 "GET /themes/sw_himarket/wp-content/uploads/2016/04/20.jpg HTTP/1.1" 200 21384
2025-07-13 11:43:06,622 - DEBUG - Importing AvifImagePlugin
2025-07-13 11:43:06,687 - DEBUG - Importing BlpImagePlugin
2025-07-13 11:43:06,689 - DEBUG - Importing BmpImagePlugin
2025-07-13 11:43:06,689 - DEBUG - Importing BufrStubImagePlugin
2025-07-13 11:43:06,690 - DEBUG - Importing CurImagePlugin
2025-07-13 11:43:06,690 - DEBUG - Importing DcxImagePlugin
2025-07-13 11:43:06,692 - DEBUG - Importing DdsImagePlugin
2025-07-13 11:43:06,694 - DEBUG - Importing EpsImagePlugin
2025-07-13 11:43:06,695 - DEBUG - Importing FitsImagePlugin
2025-07-13 11:43:06,696 - DEBUG - Importing FliImagePlugin
2025-07-13 11:43:06,697 - DEBUG - Importing FpxImagePlugin
2025-07-13 11:43:06,698 - DEBUG - Image: failed to import FpxImagePlugin: No module named 'olefile'
2025-07-13 11:43:06,698 - DEBUG - Importing FtexImagePlugin
2025-07-13 11:43:06,699 - DEBUG - Impor

Response:
{
  "reasoning": "The user wants to find a product similar to the one shown in the image. The image displays a table lamp with a distinctive design. I need to extract the key visual attributes from the image, such as the product type, colors, materials, and style, and combine them with the user's intent to search for similar items. The 'search_products' tool is appropriate for this task.",
  "FunctionCall": [
    {
      "name": "search_products",
      "args": {
        "text": "table lamp with a dark, textured, spherical base and a light beige fabric lampshade, possibly with a woven or braided pattern on the base",
        "image": true,
        "image_url": [
          "20.jpg"
        ],
        "filters": {
          "category": "lamp",
          "price_range": {
            "min": 0



In [12]:
1/0

ZeroDivisionError: division by zero

In [5]:
import os
import logging
from dotenv import load_dotenv
import tqdm as notebook_tqdm
import google.generativeai as genai
from abc import ABC, abstractmethod
import time

load_dotenv()

logging.basicConfig(
    level=logging.DEBUG,
    format='%(asctime)s - %(levelname)s - %(message)s'
)
logger = logging.getLogger(__name__)

In [7]:
def create_test_prompt(query: str, history: str = "") -> str:
    return f"""You are a Customer Service Agent. Analyze the user query and any images to respond with JSON.

Your task:
1. Understand what the user wants
2. If image provided, describe the product shown
3. Choose tool: search_products OR search_faqs
4. Combine text query + image description for search

Tools available:
- search_products: Find products and recommendations
- search_faqs: Answer policy, shipping, return questions

Return JSON format:
{{
    "reasoning": "explain user intent and tool choice",
    "FunctionCall": [
        {{
            "name": "search_products",
            "args": {{
                "text": "combined search text with image descriptions",
                "filters": {{
                    "category": null,
                    "price_range": {{"min": 0, "max": 0, "operation": "eq"}},
                    "attributes": {{"color": null, "size": null, "brand": null, "material": null}}
                }}
            }}
        }}
    ]
}}

Instructions:
- Extract keywords from text query
- If image shown: describe style, color, material, shape, category, brand
- Combine both into the "text" field
- Example: "iPhone red" + image of "sleek smartphone titanium frame" = "iPhone red sleek smartphone titanium frame"
- Focus on searchable product features only

User Query: {query}
History: {history}"""


llm_provider = GeminiProvider()

prompt1 = create_test_prompt("I want to buy the latest iPhone available in your cataluoge 15 pro red calor")
# print(f"PROMPT:\n{prompt1}")
# print("\n" + "-" * 30)

response1 = llm_provider.generate(prompt1)
print(f"JSON RESPONSE:\n{response1}")


2025-07-13 11:31:19,245 - DEBUG - Generating response for prompt length: 1371
2025-07-13 11:31:19,246 - DEBUG - Gemini API call attempt 1
2025-07-13 11:31:21,545 - DEBUG - Gemini response received: ```json
{
    "reasoning": "The user is looking to purchase a specific product, the 'latest iPhone 15 Pro' in 'red color'. This requires searching the product catalog, so the 'search_products' tool is...


JSON RESPONSE:
```json
{
    "reasoning": "The user is looking to purchase a specific product, the 'latest iPhone 15 Pro' in 'red color'. This requires searching the product catalog, so the 'search_products' tool is appropriate.",
    "FunctionCall": [
        {
            "name": "search_products",
            "args": {
                "text": "latest iPhone 15 Pro red color",
                "filters": {
                    "category": "smartphone",
                    "price_range": null,
                    "attributes": {
                        "color": "red",
                        "brand": "iPhone",
                        "model": "15 Pro"
                    }
                }
            }
        }
    ]
}
```


In [6]:
class LLMProvider(ABC):
    @abstractmethod
    def generate(self, prompt: str) -> str:
        pass

class GeminiProvider(LLMProvider):
    def __init__(self, model_name: str = "gemini-2.5-flash"):
        genai.configure(api_key=os.getenv("GOOGLE_API_KEY"))
        self.model = genai.GenerativeModel(model_name)

    def generate(self, prompt: str) -> str:
        logger.debug(f"Generating response for prompt length: {len(prompt)}")
        max_retries = 3
        for attempt in range(max_retries):
            try:
                logger.debug(f"Gemini API call attempt {attempt + 1}")
                response = self.model.generate_content(
                    prompt,
                    generation_config=genai.types.GenerationConfig(
                        temperature=0,
                        max_output_tokens=500,
                        top_p=0,
                        top_k=40
                    )
                )

                if hasattr(response, 'text') and response.text:
                    logger.debug(f"Gemini response received: {response.text[:200]}...")
                    return response.text

                logger.warning(f"Empty response from Gemini (attempt {attempt + 1})")

            except Exception as e:
                logger.error(f"Gemini API error (attempt {attempt + 1}): {e}")
                if attempt == max_retries - 1:
                    fallback_response = '{"thought": "Technical difficulties", "answer": "I\'m experiencing technical issues. Please try again."}'
                    logger.debug(f"Returning fallback response: {fallback_response}")
                    return fallback_response


                time.sleep(1)

        fallback_response = '{"thought": "Max retries exceeded", "answer": "Connection issues. Please try again later."}'
        logger.debug(f"Max retries exceeded, returning: {fallback_response}")
        return fallback_response

In [11]:
def create_test_prompt(query: str, history: str = "") -> str:
    return f"""You are an expert Customer Service Agent. Analyze the user's query to understand their intent and plan the appropriate response.

1. REASONING: Understand what the user wants and determine what tools are needed
2. TOOL SELECTION: Decide which functions to call (search_products, search_faqs, or both)
3. PARAMETER EXTRACTION: Extract search parameters and filters from the query

AVAILABLE TOOLS:
- search_products: For finding products, recommendations, product details in Milvus database
- search_faqs: For questions about the business, shipping, returns, general info

OUTPUT JSON SCHEMA:
{{
    "reasoning": "Explanation of user intent and why specific tools are needed",
    "FunctionCall": [
        {{
            "name": "search_products",
            "args": {{
                "text": "combined search text with image descriptions",
                "filters": {{
                    "category": "string or null",
                    "price_range": {{
                        "min": number,
                        "max": number,
                        "operation": "eq" | "lt" | "gt" | "between"
                    }},
                    "attributes": {{
                        "color": "string or null",
                        "size": "string or null",
                        "brand": "string or null",
                        "material": "string or null"
                    }}
                }}
            }}
        }}
    ]
}}

REASONING GUIDELINES:
- If user asks about policies, shipping, returns (use search_faqs)
- If user wants to find products, recommendations (use search_products)
- If user needs both product info AND policy info use both tools
- Explain your reasoning clearly

TEXT OPTIMIZATION RULES:
- Combine user query keywords with image descriptions
- Remove conversational words, keep only searchable product attributes: colors, materials, styles, functions
- For images: describe style, color, material, shape, function, category

FILTER EXTRACTION RULES:
- category: Extract product category from query/image (e.g., "Desks / Office Desks", "Clothing / Dresses")
- price_range: Extract budget mentions (e.g., "under $100" → max: 100, operation: "lt")
- attributes: Extract specific product features (color, size, brand, material)

IMAGE ANALYSIS REQUIREMENTS:
- Generate dense, factual descriptions focusing on searchable attributes
- Include: style, color, material, shape, size indicators, function, category
- Example: "minimalist white desk with rectangular top and thin metal legs"
- DO NOT make up information about products you don't recognize

PRICE OPERATIONS:
- "eq": exact price match
- "lt": less than
- "gt": greater than
- "between": range between min and max

User Query: {query}
History: {history}
"""

llm_provider = GeminiProvider()

prompt1 = create_test_prompt("I want to buy the latest iPhone available in your cataluoge 15 pro red calor")
# print(f"PROMPT:\n{prompt1}")
# print("\n" + "-" * 30)

response1 = llm_provider.generate(prompt1)
print(f"JSON RESPONSE:\n{response1}")


2025-07-13 10:29:18,414 - DEBUG - Generating response for prompt length: 2804
2025-07-13 10:29:18,415 - DEBUG - Gemini API call attempt 1
2025-07-13 10:29:21,571 - DEBUG - Gemini response received: ```json
{
    "reasoning": "The user is explicitly asking to find a specific product, 'iPhone 15 Pro', and has specified a color 'red'. This indicates a clear intent to search for products in the cata...


JSON RESPONSE:
```json
{
    "reasoning": "The user is explicitly asking to find a specific product, 'iPhone 15 Pro', and has specified a color 'red'. This indicates a clear intent to search for products in the catalog.",
    "FunctionCall": [
        {
            "name": "search_products",
            "args": {
                "text": "iPhone 15 Pro",
                "filters": {
                    "category": "Electronics / Mobile Phones",
                    "attributes": {
                        "color": "red",
                        "brand": "Apple"
                    }
                }
            }
        }
    ]
}
```


In [13]:
history = "I want to buy the latest iPhone available in your cataluoge 15 pro red calor\nassistant: " + response1
prompt2 = create_test_prompt("how much it cost and how many days of shipping", history)

response2 = llm_provider.generate(prompt2)
print(f"JSON RESPONSE:\n{response2}")

# print("\n" + "=" * 50)
# print("=" * 50)
# print(f"Response 1: {response1}")
# print(f"Response 2: {response2}")

2025-07-13 10:30:11,148 - DEBUG - Generating response for prompt length: 3487
2025-07-13 10:30:11,149 - DEBUG - Gemini API call attempt 1
2025-07-13 10:30:13,533 - DEBUG - Gemini response received: ```json
{
    "reasoning": "The user is asking two distinct questions. The first part, 'how much it cost', refers to the product previously discussed ('iPhone 15 Pro red'), indicating a need to retrie...


JSON RESPONSE:
```json
{
    "reasoning": "The user is asking two distinct questions. The first part, 'how much it cost', refers to the product previously discussed ('iPhone 15 Pro red'), indicating a need to retrieve product details including price. The 'search_products' tool is suitable for this. The second part, 'how many days of shipping', is a general query about shipping policy, which falls under frequently asked questions. The 'search_faqs' tool is appropriate for this.",
    "FunctionCall": [
        {
            "name": "search_products",
            "args": {
                "text": "iPhone 15 Pro",
                "filters": {
                    "category": "Electronics / Mobile Phones",
                    "attributes": {
                        "color": "red",
                        "brand": "Apple"
                    }
                }
            }
        },
        {
            "name": "search_faqs",
            "args": {
                "text": "shipping days"
 