# Comprehensive query processing

In [1]:
import os
import sys

# Add the project root to the Python path
project_root = os.path.abspath(os.path.join(os.getcwd(), ".."))
sys.path.append(project_root)

import asyncio
from typing import Dict, List
from models.message import Message
from models.product import Product
from services.weaviate_service import WeaviateService
from services.openai_service import OpenAIService
from services.query_processor import QueryProcessor
from config import Config

# Load configuration
config = Config()

# Initialize services
weaviate_service = WeaviateService()
await weaviate_service.initialize_weaviate(config.OPENAI_API_KEY, config.WEAVIATE_URL, config.RESET_WEAVIATE)

openai_service = OpenAIService(config.OPENAI_API_KEY, config)
query_processor = QueryProcessor(openai_service=openai_service)

INFO:httpx:HTTP Request: GET http://localhost:8080/v1/schema "HTTP/1.1 200 OK"
INFO:httpx:HTTP Request: GET http://localhost:8080/v1/schema "HTTP/1.1 200 OK"
INFO:httpx:HTTP Request: GET http://localhost:8080/v1/schema "HTTP/1.1 200 OK"
INFO:httpx:HTTP Request: GET http://localhost:8080/v1/schema "HTTP/1.1 200 OK"
INFO:httpx:HTTP Request: POST http://localhost:8080/v1/graphql "HTTP/1.1 200 OK"
INFO:httpx:HTTP Request: POST http://localhost:8080/v1/graphql "HTTP/1.1 200 OK"
INFO:root: Weaviate schema is valid: True
INFO:root: Weaviate schema info: Weaviate Schema Information

Class: Product (Entries: 768)
Properties:
  - Name: name, Type: text, Description: The name of the product.
  - Name: ids, Type: text, Description: ids of the products
  - Name: manufacturer, Type: text, Description: The manufacturer of the product.
  - Name: form_factor, Type: text, Description: The form of the product.
  - Name: processor, Type: text, Description: The processor of the product.
  - Name: core_coun

===:> Initializing Weaviate


### Helper functions

In [2]:
async def display_search_results(query: str, features: List[str], limit: int = 5):
    print(f"Searching for: {query}")
    results = await weaviate_service.search_products(query, limit=limit)
    print("-" * 80)
    for result in results:
        for feature in features:
            print(f"{str(feature).capitalize()}: {result[feature]}")
    print("-" * 80)
    return results

In [3]:
def get_unique_products(products):
    unique_products = {}
    for prod in products:
        if prod["name"] not in unique_products:
            unique_products[prod["name"]] = Product(
                name=prod["name"],
                ids=prod["ids"],
                manufacturer=prod["manufacturer"],
                form_factor=prod["form_factor"],
                processor=prod["processor"],
                core_count=prod["core_count"],
                processor_tdp=prod["processor_tdp"],
                memory=prod["memory"],
                io=prod["io"],
                operating_system=prod["operating_system"],
                environmentals=prod["environmentals"],
                certifications=prod["certifications"],
                short_summary=prod["short_summary"],
                full_summary=prod["full_summary"],
                full_product_description=prod["full_product_description"],
            )
    return list(unique_products.values())

In [4]:
def compare_products(extended_products: List[str], original_products: List[str]) -> None:
    """Compares products retrieved from original and expanded queries."""
    max_length = max(len(extended_products), len(original_products))

    print(f"{'Original Products':<80}{'Extended Products':<80}")
    print("-" * 160)

    for i in range(max_length):
        original_product = original_products[i] if i < len(original_products) else ""
        extended_product = extended_products[i] if i < len(extended_products) else ""

        print(f"{original_product:<80}{extended_product:<80}")

### Prompt 1


In [5]:
clear_intent_1 = "What are the top Computer on Modules available with high memory and I/O count?"
chat_history = []

#### Original prompt search results

In [6]:
original_products = await display_search_results(clear_intent_1, ["name"], 10)

Searching for: What are the top Computer on Modules available with high memory and I/O count?


INFO:httpx:HTTP Request: POST http://localhost:8080/v1/graphql "HTTP/1.1 200 OK"


--------------------------------------------------------------------------------
Name: SOM 7th Gen Intel Xeon Core Celeron Processors COM Express Basic Module
Name: ET COM Express
Name: Com Express Basic Module
Name: SOM 13th Gen Intel Core Processor
Name: SOM Intel Xeon D Processor
Name: CPU COM Express Basic Type
Name: COM Express Basic Type CPU Module
Name: COMecRP E
Name: EmQi C
Name: Computer On Module
--------------------------------------------------------------------------------


In [7]:
original_products[0]

{'certifications': 'Class I, Division 2 Solution',
 'core_count': 'Quad/Dual Cores',
 'environmentals': 'Operating: 0°C to 60°C, Storage: -40°C to 85°C, Humidity: 10% to 90% non-condensing',
 'form_factor': 'COM Express Basic Module Type 6',
 'full_product_description': 'The SOM 7th Gen Intel Xeon/Core/Celeron Processors COM Express Basic Module from Advantech is designed for embedded applications requiring high performance and flexibility. It supports dual DDR4 memory up to 32GB, multiple I/O interfaces including PCIe, SATA, USB, and Ethernet, and various display outputs such as HDMI, DVI, and DisplayPort. The module operates within a temperature range of 0°C to 60°C and supports Windows IoT Enterprise LTSB.',
 'full_summary': 'The SOM 7th Gen Intel Xeon/Core/Celeron Processors COM Express Basic Module offers flexible I/O support, dual DDR4 memory up to 32GB, and various processor options with different TDPs. It supports multiple display outputs and is suitable for embedded applicatio

In [8]:
unique_original_products = get_unique_products(original_products)
unique_original_products

[Product(name='SOM 7th Gen Intel Xeon Core Celeron Processors COM Express Basic Module', ids='SOM-5897C7-U0A2', manufacturer='Advantech', form_factor='COM Express Basic Module Type 6', processor='7th Gen Intel Xeon/Core/Celeron', core_count='Quad/Dual Cores', processor_tdp='Varies (25W, 35W, 45W)', memory='Dual DDR4, up to 32GB', io='PCIe x16, PCIe x1, SATA III, USB 3.0, USB 2.0, Intel HD Audio, Ethernet, GPIO, COM Port', operating_system='Windows IoT Enterprise LTSB', environmentals='Operating: 0°C to 60°C, Storage: -40°C to 85°C, Humidity: 10% to 90% non-condensing', certifications='Class I, Division 2 Solution', short_summary='SOM 7th Gen Intel Xeon/Core/Celeron Processors COM Express Basic Module with flexible I/O support and dual DDR4 memory.', full_summary='The SOM 7th Gen Intel Xeon/Core/Celeron Processors COM Express Basic Module offers flexible I/O support, dual DDR4 memory up to 32GB, and various processor options with different TDPs. It supports multiple display outputs and 

In [10]:
products_for_reranking = [{"name": p.name, "short_summary": p.short_summary} for p in unique_original_products]
res_initial, input_tokens, output_tokens = await query_processor.rerank_products(
    clear_intent_1, products_for_reranking, top_k=5
)
print(f"Input tokens: {input_tokens}, Output tokens: {output_tokens}")
res_initial

INFO:httpx:HTTP Request: POST https://api.openai.com/v1/chat/completions "HTTP/1.1 200 OK"


rerank_products response from OpenAI: [{'name': 'Computer On Module', 'relevance_score': 0.95}, {'name': 'SOM 7th Gen Intel Xeon Core Celeron Processors COM Express Basic Module', 'relevance_score': 0.9}, {'name': 'ET COM Express', 'relevance_score': 0.88}, {'name': 'SOM 13th Gen Intel Core Processor', 'relevance_score': 0.85}, {'name': 'Com Express Basic Module', 'relevance_score': 0.83}]
Input tokens: 601, Output tokens: 149


['Computer On Module',
 'SOM 7th Gen Intel Xeon Core Celeron Processors COM Express Basic Module',
 'ET COM Express',
 'SOM 13th Gen Intel Core Processor',
 'Com Express Basic Module']

In [11]:
products_for_reranking = [{"name": p.name, "full_summary": p.full_summary} for p in unique_original_products]
res_initial2, input_tokens, output_tokens = await query_processor.rerank_products(
    clear_intent_1, products_for_reranking, top_k=5
)
print(f"Input tokens: {input_tokens}, Output tokens: {output_tokens}")
res_initial2

INFO:httpx:HTTP Request: POST https://api.openai.com/v1/chat/completions "HTTP/1.1 200 OK"


rerank_products response from OpenAI: [{'name': 'SOM Intel Xeon D Processor', 'relevance_score': 0.95}, {'name': 'Computer On Module', 'relevance_score': 0.92}, {'name': 'Com Express Basic Module', 'relevance_score': 0.9}, {'name': 'SOM 13th Gen Intel Core Processor', 'relevance_score': 0.88}, {'name': 'ET COM Express', 'relevance_score': 0.85}]
Input tokens: 1066, Output tokens: 137


['SOM Intel Xeon D Processor',
 'Computer On Module',
 'Com Express Basic Module',
 'SOM 13th Gen Intel Core Processor',
 'ET COM Express']

In [12]:
products_for_reranking = [
    {"name": p.name, "description": p.full_product_description} for p in unique_original_products
]
res_initial3, input_tokens, output_tokens = await query_processor.rerank_products(
    clear_intent_1, products_for_reranking, top_k=5
)
print(f"Input tokens: {input_tokens}, Output tokens: {output_tokens}")
res_initial3

INFO:httpx:HTTP Request: POST https://api.openai.com/v1/chat/completions "HTTP/1.1 200 OK"


rerank_products response from OpenAI: [{'name': 'Computer On Module', 'relevance_score': 0.95}, {'name': 'SOM Intel Xeon D Processor', 'relevance_score': 0.92}, {'name': 'Com Express Basic Module', 'relevance_score': 0.9}, {'name': 'SOM 13th Gen Intel Core Processor', 'relevance_score': 0.88}, {'name': 'ET COM Express', 'relevance_score': 0.85}]
Input tokens: 1829, Output tokens: 137


['Computer On Module',
 'SOM Intel Xeon D Processor',
 'Com Express Basic Module',
 'SOM 13th Gen Intel Core Processor',
 'ET COM Express']

#### Comprehensive query expansion

In [10]:
res, input_tokens, output_tokens = await query_processor.process_query_comprehensive3(clear_intent_1, chat_history)
print(f"Input tokens: {input_tokens}, Output tokens: {output_tokens}")
res

INFO:httpx:HTTP Request: POST https://api.openai.com/v1/chat/completions "HTTP/1.1 200 OK"


Input tokens: 734, Output tokens: 159


{'extracted_attributes': {'form_factor': 'Computer on Module',
  'memory': 'high',
  'io': 'high'},
 'expanded_queries': ['Computer on Modules with high RAM and storage capacity and extensive I/O interfaces',
  'Top Computer on Modules featuring large memory capacities and multiple I/O options',
  'High-memory Computer on Modules with a wide range of I/O interfaces'],
 'search_params': {'form_factor': ['Computer on Module'],
  'memory': ['RAM: >= 1 GB', 'Storage: >= 8 GB'],
  'io': ['Ethernet', 'USB', 'UART', 'I2C', 'SPI', 'GPIO']}}

In [16]:
from typing import Any, Dict

def generate_semantic_search_queries(comprehensive_result: Dict[str, Any]) -> List[str]:
    """Generates semantic search queries based on the extracted attributes."""
    expanded_queries = comprehensive_result["expanded_queries"]
    search_params = comprehensive_result["search_params"]
    extracted_attributes = comprehensive_result["extracted_attributes"]

    # Generate queries based on expanded queries
    queries = expanded_queries.copy()

    # Generate queries based on search parameters
    search_param_query = ", ".join(
        [f"{key}: {', '.join(value)}" for key, value in search_params.items()]
    )
    queries.append(search_param_query)

    # Generate query based on extracted attributes
    extracted_attributes_query = ", ".join(
        [f"{key}: {value}" for key, value in extracted_attributes.items()]
    )
    queries.append(extracted_attributes_query)

    return queries

In [17]:
queries = generate_semantic_search_queries(res)
queries

['Computer on Modules with high RAM and storage capacity and extensive I/O interfaces',
 'Top Computer on Modules featuring large memory capacities and multiple I/O options',
 'High-memory Computer on Modules with a wide range of I/O interfaces',
 'form_factor: Computer on Module, memory: RAM: >= 1 GB, Storage: >= 8 GB, io: Ethernet, USB, UART, I2C, SPI, GPIO',
 'form_factor: Computer on Module, memory: high, io: high']

In [18]:
extended_products = []
for query in queries:
    products = await display_search_results(query, ["name"])
    extended_products.extend(products)

Searching for: Computer on Modules with high RAM and storage capacity and extensive I/O interfaces


INFO:httpx:HTTP Request: POST http://localhost:8080/v1/graphql "HTTP/1.1 200 OK"


--------------------------------------------------------------------------------
Name: ET COM Express
Name: Computer On Module
Name: SOM 7th Gen Intel Xeon Core Celeron Processors COM Express Basic Module
Name: CongaBE COM Express
Name: CPU COM Express Basic Type
--------------------------------------------------------------------------------
Searching for: Top Computer on Modules featuring large memory capacities and multiple I/O options


INFO:httpx:HTTP Request: POST http://localhost:8080/v1/graphql "HTTP/1.1 200 OK"


--------------------------------------------------------------------------------
Name: SOM 7th Gen Intel Xeon Core Celeron Processors COM Express Basic Module
Name: Computer On Module
Name: ET COM Express
Name: COMecRP E
Name: Congabxd
--------------------------------------------------------------------------------
Searching for: High-memory Computer on Modules with a wide range of I/O interfaces


INFO:httpx:HTTP Request: POST http://localhost:8080/v1/graphql "HTTP/1.1 200 OK"


--------------------------------------------------------------------------------
Name: ET COM Express
Name: Computer On Module
Name: ExpressBD
Name: CongaBE COM Express
Name: SOM 7th Gen Intel Xeon Core Celeron Processors COM Express Basic Module
--------------------------------------------------------------------------------
Searching for: form_factor: Computer on Module, memory: RAM: >= 1 GB, Storage: >= 8 GB, io: Ethernet, USB, UART, I2C, SPI, GPIO


INFO:httpx:HTTP Request: POST http://localhost:8080/v1/graphql "HTTP/1.1 200 OK"


--------------------------------------------------------------------------------
Name: EDCMIO Computer
Name: CM Industrial
Name: PC CPU Module
Name: QS QFN Style Computer On Module
Name: Phycore Imx M Mininano
--------------------------------------------------------------------------------
Searching for: form_factor: Computer on Module, memory: high, io: high


INFO:httpx:HTTP Request: POST http://localhost:8080/v1/graphql "HTTP/1.1 200 OK"


--------------------------------------------------------------------------------
Name: SOM COMExpress Basic Module
Name: QS QFN Style Computer On Module
Name: Computer On Module
Name: QSXM QFN Style SolderDown Computer On Module
Name: CPU COM Express Basic Type
--------------------------------------------------------------------------------


In [19]:
unique_extended_products = get_unique_products(extended_products)
unique_extended_products

[Product(name='ET COM Express', ids='ET970K-i7', manufacturer='IBASE', form_factor='125 mm x 95 mm', processor='Intel Xeon E Core iii', core_count='Not available', processor_tdp='Not available', memory='DDR4 SODIMM, Max 32 GB', io='4x PCIe, 4x USB 3.0, 8x USB 2.0, 2x COM, 4x SATA III, 2x GbE LAN', operating_system='Windows 10 (64-bit)', environmentals='Operating Temperature: 0°C to 60°C, Storage Temperature: -40°C to 85°C, Relative Humidity: 5% to 95% non-condensing', certifications='CE, FCC Class B, RoHS', short_summary='ET COM Express module with Intel Xeon E Core iii processors, DDR4 SODIMM, and multiple I/O interfaces.', full_summary='The ET COM Express module features Intel Xeon E Core iii processors, supports up to 32 GB DDR4 SODIMM memory, and offers a variety of I/O interfaces including PCIe, USB, COM, SATA, and GbE LAN. It is designed for embedded systems and supports multiple independent displays.', full_product_description='The ET COM Express module from IBASE is equipped wi

In [20]:
products_for_reranking = [
    {"name": p.name, "memory": p.memory, "io": p.io, "form_factor": p.form_factor} for p in unique_extended_products
]
res, input_tokens, output_tokens = await query_processor.rerank_products(
    clear_intent_1, products_for_reranking, top_k=5
)
print(f"Input tokens: {input_tokens}, Output tokens: {output_tokens}")
res

INFO:httpx:HTTP Request: POST https://api.openai.com/v1/chat/completions "HTTP/1.1 200 OK"


rerank_products response from OpenAI: [{'name': 'Computer On Module', 'relevance_score': 0.95}, {'name': 'Congabxd', 'relevance_score': 0.9}, {'name': 'CPU COM Express Basic Type', 'relevance_score': 0.88}, {'name': 'ExpressBD', 'relevance_score': 0.85}, {'name': 'CongaBE COM Express', 'relevance_score': 0.83}]
Input tokens: 1661, Output tokens: 129


['Computer On Module',
 'Congabxd',
 'CPU COM Express Basic Type',
 'ExpressBD',
 'CongaBE COM Express']