# Manager -> **Query pre process**

In [1]:
import sys
sys.path.append("C:\\Users\\lauth\\OneDrive\\Desktop\\sql_assistant_v3")

from src.components.collector.collector import AppDataCollector, LLMResponseCollector
from src.components.models.llms.llms import Langchain_OpenAI_LLM, HF_Llama38b_LLM
from src.components.models.embeddings.embeddings import HF_MultilingualE5_Embeddings, Openai_Embeddings
from src.components.memory.memory import Memory

langchain_llm = Langchain_OpenAI_LLM()
langchain_llm.init_model()

hf_llm = HF_Llama38b_LLM()
hf_llm.init_model()

mle5_embeddings = HF_MultilingualE5_Embeddings()
mle5_embeddings.init_model()

openai_embeddings = Openai_Embeddings()
openai_embeddings.init_model()

memory = Memory()

collector = AppDataCollector()
llm_collector = LLMResponseCollector()


In [2]:
memory.add_user_message("I want information from firmware EMED-2012-LR-P12")

In [2]:
collector.user_request = (
    "The human is requesting information from the FQI-EMED-3135.01-067 flow computer."
)
collector.modified_user_request = (
    "The human is requesting information from the FQI-EMED-3135.01-067 flow computer."
)
collector.flavored_request_for_semantic_search = (
    '"The human is requesting information from the firmware."'
)
collector.semantic_info = {
    "fcs_firmware": [
        (
            "Id",
            "INT",
            "Unique identifier for each flow computer firmware",
            1,
            "fcs_firmware",
        ),
        (
            "Id",
            "INT",
            "Unique identifier for each flow computer firmware",
            1,
            ("fcs_computadores", "fcs_firmware"),
        ),
        (
            "Firmware",
            "VARCHAR(100)",
            "Name or version of the firmware",
            2,
            "fcs_firmware",
        ),
        (
            "Estado",
            "VARCHAR(10)",
            "Status of the firmware, active ('ACT') or inactive ('INA')",
            3,
            "fcs_firmware",
        ),
        (
            "IdTipo_Computador",
            "INT",
            "Foreign key linking firmwares with flow computer types",
            999,
            ("fcs_firmware", "fcs_tipo_computador"),
        ),
    ],
    "fcs_computadores": [
        (
            "Id",
            "INT",
            "Unique identifier for each flow computer",
            1,
            "fcs_computadores",
        ),
        (
            "IP",
            "VARCHAR(100)",
            "Flow computer IP address of the flow computer",
            2,
            "fcs_computadores",
        ),
        (
            "Puerto",
            "INT",
            "Flow computer primary port used for communications",
            3,
            "fcs_computadores",
        ),
        ("Tag", "VARCHAR(100)", "Flow computer name", 4, "fcs_computadores"),
        (
            "Estado",
            "VARCHAR(100)",
            "Current status of the flow computer, active ('1') or inactive ('2')",
            5,
            "fcs_computadores",
        ),
        (
            "IdFirmware_fk",
            "INT",
            "Foreign key linking flow computers with respective firmware",
            999,
            ("fcs_computadores", "fcs_firmware"),
        ),
    ],
}

collector.sql_code=("""

SELECT c.Tag, f.Firmware AS 'Revision/Version', tc.Nombre AS Marca FROM dbo_v2.fcs_computadores c JOIN dbo_v2.fcs_firmware f ON f.Id = c.IdFirmware_fk JOIN dbo_v2.fcs_tipo_computador tc ON tc.Id = f.IdTipo_Computador_fk WHERE c.Tag = 'FQI-EMED-3135.01-067'

""").strip()

collector.assistant_sql_code_suggestion = 'Please replace the placeholder "<Flow computer Tag>" with the actual tag of the flow computer you are interested in.'

collector.assistant_sql_code_analysis='The query is almost complete and ready for execution, but it needs one more piece of information, "Flow computer Tag", to be specified.'

collector.ai_pre_response = "The FQI-EMED-3135.01-067 flow computer is a FB107 revision/version manufactured by FLOWBOSS."

collector.ai_post_response = "El computador de flujo FQI-EMED-3135.01-067 es una revisión/versión FB107 fabricada por FLOWBOSS."



In [4]:
"""Para pre procesar, obtener el request y clasificacion del request se usa: """
# query_pre_process(langchain_llm, memory, collector)


"""Si el request es simple, se usa: """
# simple_request_process(langchain_llm, memory, collector)



"""Si el request es complejo se usa: """
# ? Para modificar el request:
# complex_request_process_modification(hf_llm, mle5_embeddings, collector)


# ? Para buscar tablas e informacion semantica de la base sql en la base vectorial
# complex_request_process_semantics(langchain_llm, openai_embeddings, collector)


# ? Para generar el sql en primera instancia
# complex_request_sql_generation(langchain_llm, collector)

# ? Para verificar la clasificacion del sql completo o incompleto
# complex_request_sql_verification(hf_llm, collector)

# ? Para generando un pre query si el sql es incompleto
# complex_request_pre_query_generation(langchain_llm, collector)


# ? Para generar la respuesta de resumen al dtaframe del sql
# primero se debe ejecutar el sql para obtener el dataframe df
# df = run_sql(collector.sql_code) o run_sql(collector.sql_pre_query)
# complex_request_sql_summary_response(langchain_llm, collector, df)

"""Post procesando la respuesta: """
# query_post_process(langchain_llm, collector, "Quiero saber informacion de FQI-EMED-3135.01-067")


'Post procesando la respuesta: '

In [4]:
from src.app.pipeline_processes.query_pre_process.generation import generate_request

memory.clear_memory()
memory.add_user_message("Give me the average temperature for EMED-3138.12-050 in august 2023")
# memory.add_user_message("I want information from firmware EMED-2012-LR-P12")

a = generate_request(hf_llm, llm_collector, memory)
print(a)

generate-request
{'text': '\nintention:  The human is seeking the average temperature for a specific location (EMED-3138.12-050) in a specific month and year (August 2023).'} 


{'intention': ' The human is seeking the average temperature for a specific location (EMED-3138.12-050) in a specific month and year (August 2023).'}


In [9]:
print(llm_collector.llm_responses[0].prompt)

<|begin_of_text|><|start_header_id|>system<|end_header_id|>
Your are a very helpfull assistant<|eot_id|><|start_header_id|>user<|end_header_id|>
I need your help, I'm trying to generate a summarize request from a user in a conversation.

Following the next conversation
Human Message: Give me the average temperature for EMED-3138.12-050 in august 2023

END OF CONVERSATION

Your task is to analyze the messages and briefly describe in one line the human intention and what he is looking for or doing.

Here are some advices for a better response:
 - Pay attention if the last message refers to previous ones to add necessary information located in previous messages.
 - You may add sensitive information to your response, like names or technical terms that are mentioned in conversation.
 - Do not include any explanations or apologies in your response.
 - Do not add your own conclusions or clarifications.
 - Do not add words nor nouns nor adjectives to complement the response if these are not me

In [21]:
asd = """<|begin_of_text|><|start_header_id|>system<|end_header_id|>
Your are a very helpfull assistant<|eot_id|><|start_header_id|>user<|end_header_id|>
Following the next conversation
Human Message: My name is Lauther
AI Message: Hello Lauther, I'm M-bot your assistant for measurement. What can I do for you?
Human Message: Give me the average temperature for EMED-3138.12-050 in august 2023

END OF CONVERSATION

Your task is to describe and summarize the conversation.

Here are some advices for a better response:
 - You may add sensitive information to your response, like names or technical terms that are mentioned in conversation.
 - Do not include any explanations or apologies in your response.
 - Do not add your own conclusions or clarifications.
 - Do not add words nor nouns nor adjectives to complement the response if these are not mentioned in the conversation.

Output format response:
The output should be formatted with the key format below. Do not add anything beyond the key format.
Start Key format:
"summary" is the key and its content is: Brief detailed summary of the conversation . . .
End of Key format
"""

p = hf_llm.apply_model_template(asd, "summary: ")

res = hf_llm.query_llm(p)

print(res)

{'text': "\nsummary:  Hello Lauther, I'm M-bot your assistant for measurement. What can I do for you? Give me the average temperature for EMED-3138.12-050 in August 2023."}


In [23]:
from src.app.pipeline_processes.sql_pre_process.manager import complex_request_process_modification


collector.user_request="Hello Lauther, I'm M-bot your assistant for measurement. What can I do for you? Give me the average temperature for EMED-3138.12-050 in August 2023."

r = complex_request_process_modification(hf_llm, mle5_embeddings, collector, llm_collector)

Se esta levantando el modelo de embeddings :D, listo en: 44.79123306274414s
technical-terms
{'text': '\nterms: temperature, EMED-3138.12-050'} 


has-multi-definition
{'text': '\nclass: clear\nanalysis: The sentence is clear because the term "temperature" is unambiguously referring to a variable read in the measurement system, and the term "EMED-3138.12-050" is unambiguously referring to the name/tag of the measurement system, which is consistent with its definition.'} 


modified-request
{'text': '\nmodified_sentence: "Hello Lauther, I\'m M-bot your assistant for measurement. What can I do for you? Give me the average variable for the measurement system in August 2023."'} 




In [24]:
print(r)

AppDataCollector(user_request="Hello Lauther, I'm M-bot your assistant for measurement. What can I do for you? Give me the average temperature for EMED-3138.12-050 in August 2023.", modified_user_request="Hello Lauther, I'm M-bot your assistant for measurement. What can I do for you? Give me the average temperature for EMED-3138.12-050 in August 2023.", request_type=None, simple_response=None, technical_terms=['temperature', 'EMED-3138.12-050'], terms_dictionary=[{'original_term': 'temperature', 'cleaned_term': 'temperature', 'definitions': [{'sql_instructions': "Use 'Temperatura (°C)' in the WHERE statement when refers to temperature.", 'table_name': 'var_tipo_variable', 'standard_term': 'variable', 'definition': "When talking about the term 'temperature' or similar, this refers to a variable read in the measurement system.", 'replace_instruction': "The term 'temperature' or related refers to a variable read in measurement system, replace with 'variable'."}]}, {'original_term': 'EMED-