# Manager -> **Query pre process**

In [1]:
# Iniciando componentes

import sys
sys.path.append("C:\\Users\\lauth\\OneDrive\\Desktop\\sql_assistant_v3")
from src.components.memory.memory import Memory
from src.components.collector.collector import AppDataCollector, LLMResponseCollector
from src.components.models.llms.llms import Langchain_OpenAI_LLM, HF_Llama38b_LLM
from src.components.models.embeddings.embeddings import HF_MultilingualE5_Embeddings, Openai_Embeddings
from src.components.memory.memory import Memory
from src.components.memory.memory_interfaces import AIMessage, HumanMessage
from typing import List, Union

# Iniciando modelos LLM
langchain_llm = Langchain_OpenAI_LLM()
langchain_llm.init_model()
hf_llm = HF_Llama38b_LLM()
hf_llm.init_model()

# Iniciando modelos Embeddings
mle5_embeddings = HF_MultilingualE5_Embeddings()
mle5_embeddings.init_model()

openai_embeddings = Openai_Embeddings()
openai_embeddings.init_model()


In [2]:
collector = AppDataCollector()
llm_collector = LLMResponseCollector()

In [4]:
last_user_message = memory.add_user_message("I want information from firmware EMED-2012-LR-P12")
collector.current_conversation_data.last_user_message = last_user_message

In [None]:
# Identificando el user intent y clasificando como simple o complex

from src.app.pipeline_processes.query_pre_process.manager import query_pre_process
import traceback

try:
    r = query_pre_process(hf_llm, memory, collector, llm_collector)
except Exception as e:
    print(e)
    traceback.print_exc()
    

In [None]:
# Manejando un request simple

from src.app.pipeline_processes.query_pre_process.manager import simple_request_process
import traceback

collector.user_request = "The user is introducing themselves."
collector.request_type = "simple"

try:
    r = simple_request_process(
        llm=hf_llm,
        collector=collector,
        llm_collector=llm_collector,
    )
except Exception as e:
    print(e)
    traceback.print_exc()

In [3]:
# Manejando un request complejo. Parte 1:

from src.app.pipeline_processes.sql_pre_process.manager import (
    complex_request_process_modification,
)
import traceback

collector.current_conversation_data.current_conversation_summary = 'The conversation starts with the user saying "hi there" to initiate a conversation. The assistant responds with a greeting and asks how it can assist. The user then asks to find the max static pressure for EMED-3135.01-067.'
collector.current_conversation_data.current_slots = 'product=EMED-3135.01-067'
collector.user_request="The user wants to find the max static pressure for a specific product (EMED-3135.01-067)."
collector.technical_terms = ['static pressure', 'flow computer', 'EMED-3135.01-067']

try:
    r = complex_request_process_modification(
        llm=hf_llm,
        embeddings=mle5_embeddings,
        collector=collector,
        llm_collector=llm_collector,
    )
except Exception as e:
    print(e)
    traceback.print_exc()

enhanced-request
{'text': '\nresponse:  The user is looking for the maximum static pressure for the product EMED-3135.01-067.'} 


technical-terms
{'text': '\nterms: static pressure, product, EMED-3135.01-067'} 


flavored-request
{'text': '\nresponse: static pressure, variable, EMED-3135.01-067, measurement system'} 




In [None]:
# Manejando un request complejo. Parte 2:

from src.app.pipeline_processes.sql_pre_process.manager import (
    complex_request_process_semantics
)
import traceback

collector.current_conversation_data.current_conversation_summary = (
    "The user wants information from firmware EMED-2012-LR-P12."
)
collector.current_conversation_data.current_slots = "firmware=EMED-2012-LR-P12"
collector.user_request = (
    "The user wants to get information about the firmware EMED-2012-LR-P12."
)

try:
    r = complex_request_process_semantics(
        llm=hf_llm,
        embeddings=openai_embeddings,
        collector=collector,
        llm_collector=llm_collector,
    )
except Exception as e:
    print(e)
    traceback.print_exc()

In [None]:
# Manejando un request complejo. Parte 3:

from src.app.pipeline_processes.sql_generation_process.manager import (
    complex_request_sql_generation,
)
import traceback

collector.current_conversation_data.current_conversation_summary = (
    "The user wants information from firmware EMED-2012-LR-P12."
)
collector.current_conversation_data.current_slots = "firmware=EMED-2012-LR-P12"
collector.user_request = (
    "The user wants to get information about the firmware EMED-2012-LR-P12."
)
collector.modified_user_request = (
    "The user wants to get information about the firmware EMED-2012-LR-P12."
)

collector.semantic_info = {
    "fcs_firmware": [
        (
            "Id",
            "INT",
            "Unique identifier for each flow computer firmware",
            1,
            ("fcs_computadores", "fcs_firmware"),
        ),
        (
            "Id",
            "INT",
            "Unique identifier for each flow computer firmware",
            1,
            "fcs_firmware",
        ),
        (
            "Firmware",
            "VARCHAR(100)",
            "Name or version of the firmware",
            2,
            "fcs_firmware",
        ),
        (
            "Estado",
            "VARCHAR(10)",
            "Status of the firmware, active ('ACT') or inactive ('INA')",
            3,
            "fcs_firmware",
        ),
        (
            "IdTipo_Computador",
            "INT",
            "Foreign key linking firmwares with flow computer types",
            999,
            ("fcs_firmware", "fcs_tipo_computador"),
        ),
    ],
    "fcs_tipo_computador": [
        (
            "Id",
            "INT",
            "Unique identifier for each flow computer type",
            1,
            ("fcs_firmware", "fcs_tipo_computador"),
        ),
        (
            "Id",
            "INT",
            "Unique identifier for each flow computer type",
            1,
            "fcs_tipo_computador",
        ),
        (
            "Nombre",
            "VARCHAR(50)",
            "Name or designation of the flow computer type (e.g., OMNI, KHRONE, S600, FC302, FLOWBOSS, F407, F107, Thermofischer)",
            2,
            "fcs_tipo_computador",
        ),
        (
            "Estado",
            "VARCHAR(10)",
            "Status of the computer flow type, active ('ACT') or inactive ('INA')",
            3,
            "fcs_tipo_computador",
        ),
    ],
}

collector.terms_dictionary = [
    {
        "original_term": "firmware",
        "cleaned_term": "firmware",
        "definitions": [
            {
                "sql_instructions": "",
                "table_name": "fcs_firmware",
                "standard_term": "firmware",
                "definition": "Firmware is known as the version of the flow computer.",
                "replace_instruction": "Use 'firmware' instead of version.",
            }
        ],
    },
    {
        "original_term": "EMED-2012-LR-P12",
        "cleaned_term": "emed lrp",
        "definitions": [
            {
                "sql_instructions": "",
                "table_name": "med_sistema_medicion",
                "standard_term": "measurement system",
                "definition": "When talking about the term 'EMED' or similar, it refers to the name/tag of the measurement system.",
                "replace_instruction": "When talking about 'EMED' as a measurement system replace with 'measurement system' instead.",
            },
            {
                "sql_instructions": "When context is related to flow computer firmware, 'EMED-010' could be the name of the flow computer firmware.",
                "table_name": "fcs_firmware",
                "standard_term": "firmware",
                "definition": "Terms related to 'EMED-010' sometimes are names of flow computers firmwares/type",
                "replace_instruction": "When talking about 'EMED-010' as a flow computer firmware, replace with 'firmware' instead of EMED-010.",
            },
            {
                "sql_instructions": "",
                "table_name": "fcs_computadores",
                "standard_term": "flow computer",
                "definition": "When talking about the term 'EMED' or similar, it refers to the name of flow computer.",
                "replace_instruction": "When talking about 'EMED' as a flow computer replace with 'flow computer' instead.",
            },
        ],
    },
]

try:
    r = complex_request_sql_generation(
        llm=hf_llm, collector=collector, llm_collector=llm_collector
    )
except Exception as e:
    print(e)
    traceback.print_exc()

In [6]:
collector.terms_dictionary = [{'original_term': 'static pressure',
  'cleaned_term': 'static pressure',
  'definitions': [{'sql_instructions': "Use 'Pressão Estática (kPa)' in the WHERE statement when refers to static pressure.",
    'table_name': 'var_tipo_variable',
    'standard_term': 'variable',
    'definition': "When talking about the term 'static pressure' or similar, this refers to a variable read in the measurement system.",
    'replace_instruction': "The term 'static pressure' or related refers to a variable read in measurement system, replace with 'variable'."},
   {'sql_instructions': 'Use idVariable_fk=12 in the WHERE statement when refers to static pressure.',
    'table_name': 'var_variable_datos',
    'standard_term': 'variable',
    'definition': ' ',
    'replace_instruction': ' '}]},
 {'original_term': 'EMED-3135.01-067',
  'cleaned_term': 'emed',
  'definitions': [{'sql_instructions': '',
    'table_name': 'med_sistema_medicion',
    'standard_term': 'measurement system',
    'definition': "When talking about the term 'EMED' or similar, it refers to the name/tag of the measurement system.",
    'replace_instruction': "When talking about 'EMED' as a measurement system replace with 'measurement system' instead."},
   {'sql_instructions': "When context is related to flow computer firmware, 'EMED-010' could be the name of the flow computer firmware.",
    'table_name': 'fcs_firmware',
    'standard_term': 'firmware',
    'definition': "Terms related to 'EMED-010' sometimes are names of flow computers firmwares/type",
    'replace_instruction': "When talking about 'EMED-010' as a flow computer firmware, replace with 'firmware' instead of EMED-010."},
   {'sql_instructions': '',
    'table_name': 'fcs_computadores',
    'standard_term': 'flow computer',
    'definition': "When talking about the term 'EMED...' or similar, it refers to the name of flow computer.",
    'replace_instruction': "When talking about 'EMED...' as a flow computer replace with 'flow computer' instead."}]}]

collector.semantic_info = {'pla_plataforma': {('Estado',
   'VARCHAR(10)',
   "Platform status, active ('ACT') or inactive ('INA')",
   3,
   'pla_plataforma'),
  ('Id', 'INT', 'Unique Identifier for each platform', 1, 'pla_plataforma'),
  ('Id',
   'INT',
   'Unique Identifier for each platform',
   1,
   ('med_sistema_medicion', 'pla_plataforma')),
  ('Nombre', 'VARCHAR(MAX)', 'Platform name', 2, 'pla_plataforma')},
 'flu_tipo_fluido': {('Estado',
   'VARCHAR(100)',
   "Fluid type status, active ('ACT') or inactive ('INA')",
   3,
   'flu_tipo_fluido'),
  ('Id', 'INT', 'Unique identifier for the fluid type', 1, 'flu_tipo_fluido'),
  ('Id',
   'INT',
   'Unique identifier for the fluid type',
   1,
   ('flu_tipo_fluido', 'med_sistema_medicion')),
  ('Nombre',
   'VARCHAR(100)',
   'Name of the fluid type, must be in: (Gás Natural / Óleo Cru / Água / Condensado / Vapour / Água + MEG / MEG)',
   2,
   'flu_tipo_fluido')},
 'med_tipo_medicion': {('Estado',
   'VARCHAR(10)',
   "Status of the measurement type, active ('ACT') or inactive ('INA')",
   3,
   'med_tipo_medicion'),
  ('Id',
   'INT',
   'Unique identifier for each measure type',
   1,
   'med_tipo_medicion'),
  ('Id',
   'INT',
   'Unique identifier for each measure type',
   1,
   ('med_tipo_medicion', 'med_sistema_medicion')),
  ('Nombre',
   'VARCHAR(150)',
   'Name of the measurement type',
   2,
   'med_tipo_medicion')},
 'med_sistema_medicion': {('Estado',
   'VARCHAR(100)',
   "Status of the measurement system, active ('ACT') or inactive ('INA')",
   4,
   'med_sistema_medicion'),
  ('Id',
   'INT',
   'Unique identifier for the measurement system',
   1,
   'med_sistema_medicion'),
  ('IdAplicabilidad_fk',
   'INT',
   'Foreign key linking measurement system with their measure type',
   999,
   ('med_tipo_medicion', 'med_sistema_medicion')),
  ('IdPlataforma_fk',
   'INT',
   'Foreign key linking measurement systems with platforms',
   999,
   ('med_sistema_medicion', 'pla_plataforma')),
  ('IdTipoFluido_fk',
   'INT',
   'Foreign key linking fluid type with measurement systems',
   999,
   ('flu_tipo_fluido', 'med_sistema_medicion')),
  ('Nombre',
   'VARCHAR(100)',
   'Name of the measurement system',
   2,
   'med_sistema_medicion'),
  ('Tag',
   'VARCHAR(100)',
   'Tag associated to the measurement system',
   3,
   'med_sistema_medicion'),
  ('Uso',
   'VARCHAR(100)',
   'Usage description of the measurement system could be: CONTINUA / WELL_TESTING',
   6,
   'med_sistema_medicion')},
 'fcs_tipo_computador': {('Estado',
   'VARCHAR(10)',
   "Status of the computer flow type, active ('ACT') or inactive ('INA')",
   3,
   'fcs_tipo_computador'),
  ('Id',
   'INT',
   'Unique identifier for each flow computer type',
   1,
   'fcs_tipo_computador'),
  ('Id',
   'INT',
   'Unique identifier for each flow computer type',
   1,
   ('fcs_firmware', 'fcs_tipo_computador')),
  ('Nombre',
   'VARCHAR(50)',
   'Name or designation of the flow computer type (e.g., OMNI, KHRONE, S600, FC302, FLOWBOSS, F407, F107, Thermofischer)',
   2,
   'fcs_tipo_computador')},
 'fcs_firmware': {('Estado',
   'VARCHAR(10)',
   "Status of the firmware, active ('ACT') or inactive ('INA')",
   3,
   'fcs_firmware'),
  ('Firmware',
   'VARCHAR(100)',
   'Name or version of the firmware',
   2,
   'fcs_firmware'),
  ('Id',
   'INT',
   'Unique identifier for each flow computer firmware',
   1,
   'fcs_firmware'),
  ('Id',
   'INT',
   'Unique identifier for each flow computer firmware',
   1,
   ('fcs_computadores', 'fcs_firmware')),
  ('IdTipo_Computador',
   'INT',
   'Foreign key linking firmwares with flow computer types',
   999,
   ('fcs_firmware', 'fcs_tipo_computador'))},
 'fcs_computadores': {('Estado',
   'VARCHAR(100)',
   "Current status of the flow computer, active ('1') or inactive ('2')",
   5,
   'fcs_computadores'),
  ('IP',
   'VARCHAR(100)',
   'Flow computer IP address of the flow computer',
   2,
   'fcs_computadores'),
  ('Id',
   'INT',
   'Unique identifier for each flow computer',
   1,
   'fcs_computadores'),
  ('Id',
   'INT',
   'Unique identifier for each flow computer',
   1,
   ('fcs_computador_medidor', 'fcs_computadores')),
  ('IdFirmware_fk',
   'INT',
   'Foreign key linking flow computers with respective firmware',
   999,
   ('fcs_computadores', 'fcs_firmware')),
  ('Numero_Maximo_Diarios ',
   'INT',
   'Maximum number of daily logs that can be handled',
   18,
   'fcs_computadores'),
  ('Numero_Maximo_Horarios',
   'INT',
   'Maximum number of schedules that can be handled',
   17,
   'fcs_computadores'),
  ('Numero_Maximo_Proves',
   'INT',
   'Maximum number of proves (verification processes) that can be handled',
   19,
   'fcs_computadores'),
  ('Puerto',
   'INT',
   'Flow computer primary port used for communications',
   3,
   'fcs_computadores'),
  ('Tag', 'VARCHAR(100)', 'Flow computer name', 4, 'fcs_computadores'),
  ('Tipo_Protocolo ',
   'VARCHAR(100)',
   'Type of protocol used for communication',
   21,
   'fcs_computadores'),
  ('Unidad_Destino',
   'VARCHAR(100)',
   'Target unit within the target group for data or commands',
   14,
   'fcs_computadores'),
  ('Unidad_Fuente ',
   'VARCHAR(100)',
   'Source unit within the source group from which data or commands are issued',
   16,
   'fcs_computadores')},
 'fcs_computador_medidor': {('Codigo_Medidor ',
   'VARCHAR(100)',
   'Unique identifier meter code (1,2,3,4,5...10)',
   2,
   'fcs_computador_medidor'),
  ('Estado',
   'VARCHAR(100)',
   "Current status of the computer-meter association active ('2') or inactive ('1') or not available ('0')",
   3,
   'fcs_computador_medidor'),
  ('IdComputador_fk',
   'INT',
   'Foreign key linking meters with flow computers',
   999,
   ('fcs_computador_medidor', 'fcs_computadores'))},
 'var_tipo_variable': {('Estado',
   'VARCHAR(100)',
   "Current status of the variable, active ('ACT') or inactive ('INA')",
   3,
   'var_tipo_variable'),
  ('Id', 'INT', 'Unique identifier for each variable', 1, 'var_tipo_variable'),
  ('Nombre', 'VARCHAR(100)', 'Measured variable name', 2, 'var_tipo_variable'),
  ('Reporte_Manual',
   'VARCHAR(100)',
   'Indicates whether manual reporting is applicable or required for this variable type',
   4,
   'var_tipo_variable')},
 'var_variable_datos': {('Fecha',
   'DATETIME',
   'Date and time when the variable data was measured',
   1,
   'var_variable_datos'),
  ('IdVariable_fk',
   'INT',
   'Foreign key linking variable name with respective measured value',
   999,
   ('med_sistema_medicion', 'var_variable_datos')),
  ('Valor',
   'FLOAT',
   'Numeric value of the variable measured',
   2,
   'var_variable_datos'),
  ('Valor_String',
   'VARCHAR(100)',
   'String representation of the variable value, for non-numeric data',
   3,
   'var_variable_datos'),
  ('idSistemaMedicion_fk',
   'INT',
   'Foreign key linking variable measured value with respective variable name',
   999,
   ('var_tipo_variable', 'var_variable_datos'))},
 'equ_equipo': {('Caracteristica',
   'VARCHAR(200)',
   'Characteristics or specifications of the equipment',
   6,
   'equ_equipo'),
  ('Categoria',
   'VARCHAR(3)',
   'Category code of the equipment',
   8,
   'equ_equipo'),
  ('Estado',
   'VARCHAR(10)',
   "Current status of the equipment, active ('ACT') or inactive ('INA')",
   3,
   'equ_equipo'),
  ('Id', 'INT', 'Unique identifier for each equipment', 1, 'equ_equipo'),
  ('IdTipoEquipo_fk',
   'INT',
   'Foreign key linking equipment type with equipment',
   999,
   ('equ_equipo', 'teq_tipo_equipo')),
  ('Modelo', 'VARCHAR(200)', 'Model of the equipment', 5, 'equ_equipo'),
  ('Serial',
   'VARCHAR(550)',
   'Serial number of the equipment',
   2,
   'equ_equipo')},
 'teq_tipo_equipo': {('Estado',
   'VARCHAR(10)',
   "Current status of the equipment type, active ('ACT') or inactive ('INA')",
   3,
   'teq_tipo_equipo'),
  ('Id',
   'INT',
   'Unique identifier for each equipment type',
   1,
   'teq_tipo_equipo'),
  ('Id',
   'INT',
   'Unique identifier for each equipment type',
   1,
   ('equ_equipo', 'teq_tipo_equipo')),
  ('IdClasificacion_fk',
   'INT',
   'Foreign key linking equipment type with respective equipment classification',
   999,
   ('teq_clasificacion', 'teq_tipo_equipo')),
  ('Nombre',
   'VARCHAR(150)',
   'Name or description of the equipment type',
   2,
   'teq_tipo_equipo'),
  ('ReporteDS',
   'VARCHAR(100)',
   'Indicates whether a DS (Data Sheet) report is associated with this equipment type',
   7,
   'teq_tipo_equipo')},
 'teq_clasificacion': {('Estado',
   'VARCHAR(10)',
   "Current status of the classification, active ('ACT') or inactive ('INA')",
   3,
   'teq_clasificacion'),
  ('Id',
   'INT',
   'Unique identifier for each equipment classification',
   1,
   'teq_clasificacion'),
  ('Id',
   'INT',
   'Unique identifier for each equipment classification',
   1,
   ('teq_clasificacion', 'teq_tipo_equipo')),
  ('Nombre',
   'VARCHAR(150)',
   "Name of the classification (e.g: 'Medidor primario', ' Medidor secundario', ' Medidor terciario', ' Válvulas e accesorios')",
   2,
   'teq_clasificacion')},
 'med_tag': {('Estado',
   'VARCHAR(10)',
   "Current status of the tag, active ('ACT') or inactive ('INA')",
   3,
   'med_tag'),
  ('IdEquipo_fk',
   'INT',
   'Foreign key linking equipment with respective measurement system tag',
   999,
   ('equ_equipo', 'med_tag')),
  ('IdSistemaMedicion_fk',
   'INT',
   'Foreign key linking measurement systems with their respective measurement system tag',
   999,
   ('med_sistema_medicion', 'med_tag')),
  ('Nombre',
   'VARCHAR(MAX)',
   'Combined name from measurement system and equipment',
   2,
   'med_tag'),
  ('SensoresHabilitadosMv',
   'VARCHAR(100)',
   'Information about enabled sensors for multivariable equipment',
   4,
   'med_tag')}}

collector.technical_terms = ['static pressure', 'measurement system', 'EMED-3135.01-067']
collector.user_request="The user wants to find the max static pressure for a specific product (EMED-3135.01-067)."

In [12]:
from src.app.pipeline_processes.sql_pre_process.retrievers import (
    retrieve_semantic_term_definitions,
)
from src.app.pipeline_processes.sql_pre_process.generation import (
    generate_flavored_request,
)

terms_dictionary, has_replacement_definitions, _ = retrieve_semantic_term_definitions(
    mle5_embeddings, collector.technical_terms
)
output = generate_flavored_request(
    hf_llm, llm_collector, collector.user_request, terms_dictionary
)
print(output)

flavored-request
{'text': '\nmodified_sentence: The user wants to find the max variable for a specific measurement system (measurement system).'} 


{'modified_sentence': 'The user wants to find the max variable for a specific measurement system (measurement system).'}


In [15]:
print(terms_dictionary)

[{'original_term': 'static pressure', 'cleaned_term': 'static pressure', 'definitions': [{'sql_instructions': "Use 'Pressão Estática (kPa)' in the WHERE statement when refers to static pressure.", 'table_name': 'var_tipo_variable', 'standard_term': 'variable', 'definition': "When talking about the term 'static pressure' or similar, this refers to a variable read in the measurement system.", 'replace_instruction': "The term 'static pressure' or related refers to a variable read in measurement system, replace with 'variable'."}, {'sql_instructions': 'Use idVariable_fk=12 in the WHERE statement when refers to static pressure.', 'table_name': 'var_variable_datos', 'standard_term': 'variable', 'definition': ' ', 'replace_instruction': ' '}]}, {'original_term': 'measurement system', 'cleaned_term': 'measurement system', 'definitions': [{'sql_instructions': "Use 'Medidor terciario' in WHERE statement when refers to secondary meter.", 'table_name': 'teq_clasificacion', 'standard_term': 'equipm

In [21]:
print(len(llm_collector.llm_responses))
print(llm_collector.llm_responses[4].llm_response)

5
modified_sentence: The user wants to find the max variable for a specific measurement system (measurement system).
