In [6]:
import sys
sys.path.append("C:\\Users\\lauth\\OneDrive\\Desktop\\sql_assistant_v3")

from src.components.collector.collector import AppDataCollector, LLMResponseCollector
from src.components.models.llms.llms import Langchain_OpenAI_LLM, HF_Llama38b_LLM
from src.components.models.embeddings.embeddings import HF_MultilingualE5_Embeddings, Openai_Embeddings
from src.components.memory.memory import Memory

langchain_llm = Langchain_OpenAI_LLM()
langchain_llm.init_model()

hf_llm = HF_Llama38b_LLM()
hf_llm.init_model()

mle5_embeddings = HF_MultilingualE5_Embeddings()
mle5_embeddings.init_model()

openai_embeddings = Openai_Embeddings()
openai_embeddings.init_model()

memory = Memory()

collector = AppDataCollector()
llm_collector = LLMResponseCollector()

In [2]:
collector.user_request = "The human is seeking detailed information, specifically the average temperature for a specific location identified as EMED-3138.12-050, in a specific month and year, August 2023."
collector.request_type = "complex"
collector.conversation_summary = "The conversation asked for the average temperature for EMED-3138.12-050 in August 2023."
collector.terms_dictionary = [
    {
        "original_term": "average temperature",
        "cleaned_term": "average temperature",
        "definitions": [
            {
                "sql_instructions": "Use 'Temperatura (\u00b0C)' in the WHERE statement when refers to temperature.",
                "table_name": "var_tipo_variable",
                "standard_term": "variable",
                "definition": "When talking about the term 'temperature' or similar, this refers to a variable read in the measurement system.",
                "replace_instruction": "The term 'temperature' or related refers to a variable read in measurement system, replace with 'variable'.",
            }
        ],
    },
    {
        "original_term": "EMED-3138.12-050",
        "cleaned_term": "emed",
        "definitions": [
            {
                "sql_instructions": "",
                "table_name": "med_sistema_medicion",
                "standard_term": "measurement system",
                "definition": "When talking about the term 'EMED' or similar, it refers to the name/tag of the measurement system.",
                "replace_instruction": "When talking about 'EMED' as a measurement system replace with 'measurement system' instead.",
            },
            {
                "sql_instructions": "When context is related to flow computer firmware, 'EMED-010' could be the name of the flow computer firmware.",
                "table_name": "fcs_firmware",
                "standard_term": "firmware",
                "definition": "Terms related to 'EMED-010' sometimes are names of flow computers firmwares/type",
                "replace_instruction": "When talking about 'EMED-010' as a flow computer firmware, replace with 'firmware' instead of EMED-010.",
            },
            {
                "sql_instructions": "",
                "table_name": "fcs_computadores",
                "standard_term": "flow computer",
                "definition": "When talking about the term 'EMED' or similar, it refers to the name of flow computer.",
                "replace_instruction": "When talking about 'EMED' as a flow computer replace with 'flow computer' instead.",
            },
        ],
    },
]


In [7]:
# primer request y clasificacion

from src.app.pipeline_processes.query_pre_process.manager import query_pre_process

memory.clear_memory()
memory.add_user_message("Hello, my name is Lauther")
memory.add_ai_message("Hi Lauther, I'm M-bot, how can I help you?")
memory.add_user_message("Which platform has id 33?")

request = query_pre_process(hf_llm, memory, collector, llm_collector)
print(request.user_request)
print(request.request_type)

generate-request
{'text': '\nintention:  The human is requesting information about a specific platform with associated id 33.'} 


request-type
{'text': "\ntype:  complex\nanalysis: This input is related to get information from the measurement system database, but I don't have access to it, so it is classified as complex."} 


The human is requesting information about a specific platform with associated id 33.
complex


In [8]:
# Resumiendo la conversacion

from src.components.memory import MEMORY_TYPES
from src.app.rag.rag_utils import base_llm_generation

# Prompting
conversation_summary_instructions = """<|begin_of_text|><|start_header_id|>system<|end_header_id|>
Your are a very helpfull assistant<|eot_id|><|start_header_id|>user<|end_header_id|>
Following the next conversation
{chat_history}
END OF CONVERSATION

Your task is to describe and summarize the conversation.

Here are some advices for a better response:
 - You may add sensitive information to your response, like names or technical terms that are mentioned in conversation.
 - Do not include any explanations or apologies in your response.
 - Do not add your own conclusions or clarifications.
 - Do not add words nor nouns nor adjectives to complement the response if these are not mentioned in the conversation.

Output format response:
The output should be formatted with the key format below. Do not add anything beyond the key format.
Start Key format:
"summary" is the key and its content is: Brief detailed summary of the conversation . . .
End of Key format"""
conversation_summary_suffix = "summary: "

def get_conversation_summary_prompt(memory: Memory):
    current_messages = memory.get_current_messages()
    chat_history = ""
    for message in current_messages:
        m = message["content"]
        if message["type"] == MEMORY_TYPES["AI"]:
            chat_history += f"AI Message: {m}\n"
        else:
            chat_history += f"Human Message: {m}\n"

    prompt = conversation_summary_instructions.format(chat_history=chat_history)
    suffix = conversation_summary_suffix
    return prompt, suffix

instruction, suffix = get_conversation_summary_prompt(memory)

# Generation
prompt = hf_llm.apply_model_template(instruction, suffix)
res = base_llm_generation(hf_llm, llm_collector, prompt, "summary-conversation")

# Manager
collector.conversation_summary = str(res["summary"]).strip()
print(collector.conversation_summary)


summary-conversation
{'text': '\nsummary:  Lauther initiated a conversation with M-bot, asking which platform has ID 33.'} 


Lauther initiated a conversation with M-bot, asking which platform has ID 33.


In [None]:
from src.app.pipeline_processes.sql_pre_process.generation import generate_technical_terms
from src.app.pipeline_processes.sql_pre_process.retrievers import retrieve_semantic_term_definitions, retrieve_terms_examples
from src.settings.settings import Settings
import json

# Extrayendo las palabras tecnicas

# Retriever
conversation_summary = collector.conversation_summary
terms_examples = retrieve_terms_examples(conversation_summary, mle5_embeddings)

# Generation
output = generate_technical_terms(hf_llm, llm_collector, conversation_summary, terms_examples)
technical_terms = output["terms"]

# Parte 2: Recuperando definiciones para los terminos desde la bd vectorial (retrieval) para crear el diccionario
terms_collection = Settings.Chroma.get_terms_collection()
terms_dictionary, has_replacement_definitions, _ = (
    retrieve_semantic_term_definitions(
        mle5_embeddings, terms_collection, technical_terms
    )
)

collector.terms_dictionary = terms_dictionary
print(json.dumps(collector.terms_dictionary, indent=2))

In [15]:
from src.components.memory import MEMORY_TYPES
from src.app.rag.rag_utils import base_llm_generation

# Generando un nuevo request con el diccionario
request_generation_prompt = """I need your help, I'm trying to generate a summarize request from a user in a conversation. Follow carefully the next steps.

First, look up the next messages between HUMAN and AI:
'''\n{chat_history}\n'''

Second, look up the next relevant definitions from dictionary:
''''\n{terms}\n''

Third, analyze the messages and briefly describe in one line the human intention and what he is looking for or doing. If you have to complement a term, use only definitions on previous dictionary.

Fourth, evaluation. Evaluate if your response has the necessary information retrieved from the conversation and dictionary. Also it must starts with: 'The human is ...' .
  
 - Pay attention if the last message refers to previous ones to add necessary information located in previous messages.
 - You may add sensitive information to your response, like names or technical terms that are mentioned in conversation.
 - Do not include any explanations or apologies in your response.
 - Do not add your own conclusions or clarifications.
 - Do not add your own thoughts about the request.

Output format response:
The output should be formatted with the key format below. Do not add anything beyond the key format.
Start Key format:
"response" is the key and its content is: Detailed user request. It may start with The human is . . .
End of Key format
Begin!"""

request_generation_suffix="response: "


def get_request_generation_prompt(memory: Memory, terms_dictionary: dict[str, any]):
    current_messages = memory.get_current_messages()
    chat_history = ""
    for message in current_messages:
        m = message["content"]
        if message["type"] == MEMORY_TYPES["AI"]:
            chat_history += f"AI Message: {m}\n"
        else:
            chat_history += f"Human Message: {m}\n"
            
    definitions=[]
        
    if terms_dictionary:
        for item in terms_dictionary:
            for inner_item in item["definitions"]:
                definitions.append(str(inner_item["definition"]).strip())
        terms = "    - "
        
        content = "\n    - ".join(definitions)
        terms += f"{content}\n"

    instructions = request_generation_prompt.format(chat_history=chat_history, terms=terms)
    suffix = request_generation_suffix
    return instructions, suffix


# Generation
instruction, suffix = get_request_generation_prompt(memory, collector.terms_dictionary)
prompt  = hf_llm.apply_model_template(instruction, suffix)
res = base_llm_generation(hf_llm, llm_collector, prompt, "enhanced-request-conversation")
print(res)

enhanced-request-conversation
{'text': '\nresponse:  The human is trying to identify the specific platform with the given unique identifier, id 33.'} 


{'response': ' The human is trying to identify the specific platform with the given unique identifier, id 33.'}
