In [1]:
import os
import logging
from dotenv import load_dotenv
import tqdm as notebook_tqdm
import google.generativeai as genai
from abc import ABC, abstractmethod
import time

load_dotenv()

logging.basicConfig(
    level=logging.DEBUG,
    format='%(asctime)s - %(levelname)s - %(message)s'
)
logger = logging.getLogger(__name__)

  from .autonotebook import tqdm as notebook_tqdm


In [2]:
class LLMProvider(ABC):
    @abstractmethod
    def generate(self, prompt: str) -> str:
        pass

class GeminiProvider(LLMProvider):
    def __init__(self, model_name: str = "gemini-2.5-flash"):
        genai.configure(api_key=os.getenv("GOOGLE_API_KEY"))
        self.model = genai.GenerativeModel(model_name)

    def generate(self, prompt: str) -> str:
        logger.debug(f"Generating response for prompt length: {len(prompt)}")
        max_retries = 3
        for attempt in range(max_retries):
            try:
                logger.debug(f"Gemini API call attempt {attempt + 1}")
                response = self.model.generate_content(
                    prompt,
                    generation_config=genai.types.GenerationConfig(
                        temperature=0,
                        max_output_tokens=500,
                        top_p=0,
                        top_k=40
                    )
                )

                if hasattr(response, 'text') and response.text:
                    logger.debug(f"Gemini response received: {response.text[:200]}...")
                    return response.text

                logger.warning(f"Empty response from Gemini (attempt {attempt + 1})")

            except Exception as e:
                logger.error(f"Gemini API error (attempt {attempt + 1}): {e}")
                if attempt == max_retries - 1:
                    fallback_response = '{"thought": "Technical difficulties", "answer": "I\'m experiencing technical issues. Please try again."}'
                    logger.debug(f"Returning fallback response: {fallback_response}")
                    return fallback_response


                time.sleep(1)

        fallback_response = '{"thought": "Max retries exceeded", "answer": "Connection issues. Please try again later."}'
        logger.debug(f"Max retries exceeded, returning: {fallback_response}")
        return fallback_response

In [3]:
def create_test_prompt(query: str, history: str = "") -> str:
    return f"""You are an expert Customer Service Agent. Analyze the user's query to understand their intent and plan the appropriate response.

1. REASONING: Understand what the user wants and determine what tools are needed
2. TOOL SELECTION: Decide which functions to call (search_products, search_faqs, or both)
3. PARAMETER EXTRACTION: Extract search parameters and filters from the query

AVAILABLE TOOLS:
- search_products: For finding products, recommendations, product details in Milvus database
- search_faqs: For questions about the business, shipping, returns, general info

OUTPUT JSON SCHEMA:
{{
    "reasoning": "Explanation of user intent and why specific tools are needed",
    "FunctionCall": [
        {{
            "name": "search_products",
            "args": {{
                "text": "combined search text with image descriptions",
                "filters": {{
                    "category": "string or null",
                    "price_range": {{
                        "min": number,
                        "max": number,
                        "operation": "eq" | "lt" | "gt" | "between"
                    }},
                    "attributes": {{
                        "color": "string or null",
                        "size": "string or null",
                        "brand": "string or null",
                        "material": "string or null"
                    }}
                }}
            }}
        }}
    ]
}}

REASONING GUIDELINES:
- If user asks about policies, shipping, returns (use search_faqs)
- If user wants to find products, recommendations (use search_products)
- If user needs both product info AND policy info use both tools
- Explain your reasoning clearly

TEXT OPTIMIZATION RULES:
- Combine user query keywords with image descriptions
- Remove conversational words, keep only searchable product attributes: colors, materials, styles, functions
- For images: describe style, color, material, shape, function, category

FILTER EXTRACTION RULES:
- category: Extract product category from query/image (e.g., "Desks / Office Desks", "Clothing / Dresses")
- price_range: Extract budget mentions (e.g., "under $100" → max: 100, operation: "lt")
- attributes: Extract specific product features (color, size, brand, material)

IMAGE ANALYSIS REQUIREMENTS:
- Generate dense, factual descriptions focusing on searchable attributes
- Include: style, color, material, shape, size indicators, function, category
- Example: "minimalist white desk with rectangular top and thin metal legs"
- DO NOT make up information about products you don't recognize

PRICE OPERATIONS:
- "eq": exact price match
- "lt": less than
- "gt": greater than
- "between": range between min and max

User Query: {query}
History: {history}
"""

llm_provider = GeminiProvider()

prompt1 = create_test_prompt("I want to buy the latest iPhone available in your cataluoge 15 pro red calor")
# print(f"PROMPT:\n{prompt1}")
# print("\n" + "-" * 30)

response1 = llm_provider.generate(prompt1)
print(f"JSON RESPONSE:\n{response1}")


2025-07-13 10:25:41,297 - DEBUG - Generating response for prompt length: 2804
2025-07-13 10:25:41,298 - DEBUG - Gemini API call attempt 1
2025-07-13 10:25:44,615 - DEBUG - Gemini response received: ```json
{
    "reasoning": "The user is explicitly asking to find a specific product, 'iPhone 15 Pro', and has specified a color 'red'. This indicates a clear intent to search for products in the cata...


JSON RESPONSE:
```json
{
    "reasoning": "The user is explicitly asking to find a specific product, 'iPhone 15 Pro', and has specified a color 'red'. This indicates a clear intent to search for products in the catalog.",
    "FunctionCall": [
        {
            "name": "search_products",
            "args": {
                "text": "iPhone 15 Pro",
                "filters": {
                    "category": "Electronics / Mobile Phones",
                    "attributes": {
                        "color": "red",
                        "brand": "Apple"
                    }
                }
            }
        }
    ]
}
```


In [4]:
history = "I want to buy the latest iPhone available in your cataluoge 15 pro red calor\nassistant: " + response1
prompt2 = create_test_prompt("how much it cost")

response2 = llm_provider.generate(prompt2)
print(f"JSON RESPONSE:\n{response2}")

# print("\n" + "=" * 50)
# print("=" * 50)
# print(f"Response 1: {response1}")
# print(f"Response 2: {response2}")

2025-07-13 10:25:44,638 - DEBUG - Generating response for prompt length: 2744
2025-07-13 10:25:44,639 - DEBUG - Gemini API call attempt 1
2025-07-13 10:25:48,129 - ERROR - Gemini API error (attempt 1): Invalid operation: The `response.text` quick accessor requires the response to contain a valid `Part`, but none were returned. The candidate's [finish_reason](https://ai.google.dev/api/generate-content#finishreason) is 2.
2025-07-13 10:25:49,133 - DEBUG - Gemini API call attempt 2
2025-07-13 10:25:52,537 - ERROR - Gemini API error (attempt 2): Invalid operation: The `response.text` quick accessor requires the response to contain a valid `Part`, but none were returned. The candidate's [finish_reason](https://ai.google.dev/api/generate-content#finishreason) is 2.
2025-07-13 10:25:53,543 - DEBUG - Gemini API call attempt 3
2025-07-13 10:25:56,765 - ERROR - Gemini API error (attempt 3): Invalid operation: The `response.text` quick accessor requires the response to contain a valid `Part`, but

JSON RESPONSE:
{"thought": "Technical difficulties", "answer": "I'm experiencing technical issues. Please try again."}
