In [1]:
# Iniciando Componentes

import sys
sys.path.append("C:\\Users\\lauth\\OneDrive\\Desktop\\sql_assistant_v3")
from src.components.memory.memory import Memory
from src.components.collector.collector import AppDataCollector, LLMResponseCollector
from src.components.models.llms.llms import Langchain_OpenAI_LLM, HF_Llama38b_LLM
from src.components.models.embeddings.embeddings import HF_MultilingualE5_Embeddings, Openai_Embeddings
from src.components.memory.memory import Memory
from src.components.memory.memory_interfaces import AIMessage, HumanMessage
from typing import List, Union

# Iniciando modelos LLM
langchain_llm = Langchain_OpenAI_LLM()
langchain_llm.init_model()
hf_llm = HF_Llama38b_LLM()
hf_llm.init_model()

# Iniciando modelos Embeddings
mle5_embeddings = HF_MultilingualE5_Embeddings()
mle5_embeddings.init_model()

openai_embeddings = Openai_Embeddings()
openai_embeddings.init_model()

# Iniciando memoria
messages: List[Union[AIMessage, HumanMessage]]  = []
memory = Memory(messages)

In [2]:
# Iniciando workflow

from src.app.pipeline_processes.query_post_process.manager import query_post_process
from src.app.pipeline_processes.query_pre_process.manager import (
    query_pre_process,
    simple_request_process,
)
from src.app.pipeline_processes.sql_generation_process.manager import (
    complex_request_sql_generation,
)
from src.app.pipeline_processes.sql_post_process.manager import (
    complex_request_pre_query_generation,
    complex_request_sql_summary_response,
    complex_request_sql_verification,
)
from src.app.pipeline_processes.sql_pre_process.manager import (
    complex_request_process_modification,
    complex_request_process_semantics,
)
from src.utils.sql_utils import run_sql
import traceback
import pandas as pd


def pre_process_pipeline(
    memory: Memory,
    collector: AppDataCollector,
    llm_collector: LLMResponseCollector,
):
    try:
        query_pre_process(
            llm=hf_llm,
            memory=memory,
            collector=collector,
            llm_collector=llm_collector,
        )
    except Exception as e:
        print(f"Error en: query_pre_process:\n {e}")
        traceback.print_exc()


def simple_request_pipeline(
    collector: AppDataCollector, llm_collector: LLMResponseCollector
):
    try:
        simple_request_process(
            llm=hf_llm,
            collector=collector,
            llm_collector=llm_collector,
        )
    except Exception as e:
        print(f"Error en: simple_request_process:\n {e}")
        traceback.print_exc()


def complex_request_pipeline(
    collector: AppDataCollector, llm_collector: LLMResponseCollector, memory: Memory
):
    try:
        print("complex_request_process_modification")
        complex_request_process_modification(
            llm=hf_llm,
            embeddings=mle5_embeddings,
            memory=memory,
            collector=collector,
            llm_collector=llm_collector,
        )
    except Exception as e:
        print(f"Error en: complex_request_process_modification:\n {e}")
        traceback.print_exc()

    try:
        print("complex_request_process_semantics")
        complex_request_process_semantics(
            llm=hf_llm,
            embeddings=openai_embeddings,
            collector=collector,
            llm_collector=llm_collector,
        )
    except Exception as e:
        print(f"Error en: complex_request_process_semantics:\n {e}")
        traceback.print_exc()

    try:
        print("complex_request_sql_generation")
        complex_request_sql_generation(
            llm=hf_llm, collector=collector, llm_collector=llm_collector
        )
    except Exception as e:
        print(f"Error en: complex_request_sql_generation:\n {e}")
        traceback.print_exc()

    try:
        print("complex_request_sql_verification")
        complex_request_sql_verification(
            llm=hf_llm, collector=collector, llm_collector=llm_collector
        )
    except Exception as e:
        print(f"Error en: complex_request_sql_verification:\n {e}")
        traceback.print_exc()

    is_prequery = collector.assistant_sql_code_class.strip() == "incomplete"
    if is_prequery:
        try:
            print("complex_request_pre_query_generation")
            complex_request_pre_query_generation(
                llm=langchain_llm, collector=collector, llm_collector=llm_collector
            )
        except Exception as e:
            print(f"Error en: complex_request_pre_query_generation:\n {e}")
            traceback.print_exc()
            
       
def post_sql_generation_pipeline(
    collector: AppDataCollector, llm_collector: LLMResponseCollector
):
    df = None
    is_prequery = (
        collector.assistant_sql_code_class.strip() == "incomplete"
        if collector.assistant_sql_code_class
        else False
    )

    if is_prequery:
        try:
            print("Ejecutando codigo SQL de prequery ...")
            df = run_sql(collector.sql_pre_query)
        except Exception as e:
            print(f"Error al ejecutar collector.sql_pre_query \n{e}")
            traceback.print_exc()
        try:
            complex_request_sql_summary_response(
                llm=langchain_llm,
                collector=collector,
                llm_collector=llm_collector,
                dataframe=df,
            )
        except Exception as e:
            print(f"Error al ejecutar complex_request_sql_summary_response\n{e}")
            traceback.print_exc()
    else:
        try:
            print("Ejecutando codigo SQL ...")
            df = run_sql(collector.sql_code)
        except Exception as e:
            print(f"Error al ejecutar collector.sql_code \n{e}")
            traceback.print_exc()

        try:
            complex_request_sql_summary_response(
                llm=langchain_llm,
                collector=collector,
                llm_collector=llm_collector,
                dataframe=df,
            )
        except Exception as e:
            print(f"Error al ejecutar complex_request_sql_summary_response\n{e}")
            traceback.print_exc()

    
def post_process_pipeline(
    collector: AppDataCollector, llm_collector: LLMResponseCollector
):
    try:
        query_post_process(
            llm=langchain_llm,
            collector=collector,
            llm_collector=llm_collector,
        )
    except Exception as e:
        print(f"Error al ejecutar query_post_process\n{e}")
        traceback.print_exc()


def generate_sql(
    memory: Memory,
    collector: AppDataCollector,
    llm_collector: LLMResponseCollector,
):
    pre_process_pipeline(
        memory=memory, collector=collector, llm_collector=llm_collector
    )
    is_simple = collector.request_type.lower().strip() == "simple"

    if is_simple:
        simple_request_pipeline(
            collector=collector, llm_collector=llm_collector
        )
    else:
        complex_request_pipeline(
            collector=collector, llm_collector=llm_collector, memory=memory
        )
        post_sql_generation_pipeline(collector, llm_collector)
        
    post_process_pipeline(
        collector=collector, llm_collector=llm_collector
    )
     
         

In [3]:

def chat(
    user_message: str,
    memory: Memory,
    collector: AppDataCollector,
    llm_collector: LLMResponseCollector,
):
    last_user_message = memory.add_user_message(user_message)
    collector.current_conversation_data.last_user_message = last_user_message
    
    generate_sql(
        memory=memory,
        collector=collector,
        llm_collector=llm_collector,
    )
    
    ai_message = collector.ai_post_response
    dataframe : pd.DataFrame | None = collector.dataframe_response
    
    last_ai_message = memory.add_ai_message(ai_message, last_user_message.message_id, dataframe)
    collector.current_conversation_data.last_ai_message = last_ai_message
    
    return collector
    

In [12]:
# Ejecutando

# Iniciando collector
collector = AppDataCollector()
llm_collector = LLMResponseCollector()
# memory.clear_memory()

res = chat(
    "now give me the max static pressure from the second one in list",
    memory,
    collector,
    llm_collector,
)

generate-request
{'text': '\nsummary:  The conversation starts with the user asking the assistant to provide the maximum static pressure from the second option in the list.\nuser_intent: The user is asking for the maximum static pressure from the second option in the list.\nslots: None'} 


enhanced-request
{'text': '\nresponse:  The user is asking for the maximum static pressure from a specific option.'} 


request-type
{'text': '\ntype:  complex\nanalysis: This input is related to getting information from a specific option, which is likely related to a measurement system database, but I do not have access to it.'} 


complex_request_process_modification
technical-terms
{'text': '\nterms: static pressure, second option, list'} 


enhanced-request
{'text': '\nresponse:  The user is asking for the maximum static pressure from a specific option.'} 


has-multi-definition
{'text': '\nclass: clear\nanalysis: The term "static pressure" has a single definition, which is a variable read in th

  df = pd.read_sql_query(sql, conn)


summary-response-sql
{'text': 'response: The maximum static pressure for the specific option is 568.865.'} 


post-process-translation
{'text': 'detected_language: English\nresponse: The maximum static pressure for the second option in the list is 568.865.'} 




In [17]:
print(len(llm_collector.llm_responses))
print(llm_collector.llm_responses[1].prompt)
# Hay que agregar a la lista el nuevo mensaje generado por el asistente 

12
<|begin_of_text|><|start_header_id|>system<|end_header_id|>
Your are a very helpfull assistant<|eot_id|><|start_header_id|>user<|end_header_id|>
I need your help with a very important task for my work. Follow carefully this instructions step by step.

First, read this current user intent:
The next is a conversation between Assistant and Human.
<user_intent>The user is asking for the maximum static pressure from the second option in the list.</user_intent> 

Second, the next are relevant slots from a conversation that are necessary to complete the previous user intent:
<Slots>None</Slots>

Third, complementation. Add to the user intent the necessary slots to generate a complete and better user intent.

Finally, evaluation:
- The user intent should contain the relevant slots. A correct intent should answer the question: "What does the user want?". 
- Do not add your own ideas or clarifications or recommendations.

Output format response:
The output should be formatted with the key for

In [14]:
print(collector.current_conversation_data.current_slots)

None


In [None]:
import pyperclip
print(len(llm_collector.llm_responses))
print(llm_collector.llm_responses[0].prompt)

pyperclip.copy(llm_collector.llm_responses[8].prompt)

In [11]:
import json
print(json.dumps(collector.terms_dictionary, indent=2))

null


In [15]:
print(memory.list_chat_messages())


<Human>: 2024-07-20 20:57:04.537012
Hola!
<Human>: 2024-07-20 20:57:27.085011
Hola!
<Human>: 2024-07-20 20:58:17.618424
Hola!
<Human>: 2024-07-20 20:58:46.889084
Hola!
<Assistant>: 2024-07-20 20:58:52.493341
¡Hola! Me alegra poder ayudarte. ¿En qué puedo asistirte hoy?
<Human>: 2024-07-20 21:00:13.559027
me puedes dar una lista de sistemas de medicion
<Assistant>: 2024-07-20 21:01:31.993822
Aquí está una lista de sistemas de medición: EMED-3138.11-128, EMED-3138.12-050, EMED-3138.12-052, EMED-3138.12-065, EMED-3138.12-105, EST-3138.00-CONS_UDM, EST-3138.12-PILOTO, EST-LIBTQ-CX, FQIT-3138.12-015, TANQ-3138.11-011.
Here is a dataframe from SQL: 
|    | Measurement_System_Name   | Measurement_System_Tag   |
|---:|:--------------------------|:-------------------------|
|  0 | EMED-3138.11-128          | EMED-3138.11-128         |
|  1 | EMED-3138.12-050          | EMED-3138.12-050         |
|  2 | EMED-3138.12-052          | EMED-3138.12-052         |
|  3 | EMED-3138.12-065          | EM

In [None]:
import sqlglot
import pyperclip

sql = collector.sql_code
code = sqlglot.transpile(sql, write="tsql", identify=True, pretty=True)[0]
pyperclip.copy(code)
print(code)