# Imports

In [29]:
from typing import TypedDict, Optional, Dict,Annotated ,List ,Literal
from langgraph.graph import StateGraph ,START,END
from langchain_openai import ChatOpenAI
from langchain_groq import  ChatGroq
from langchain.prompts import PromptTemplate
from langgraph.graph.message import  add_messages
from langgraph.checkpoint.memory import MemorySaver
from langgraph.graph import  MessagesState
from langchain_core.messages import HumanMessage,SystemMessage ,AnyMessage, AIMessage
from pydantic import  BaseModel,Field
from Prompts import  Prompts

# Enviornments

In [None]:
KRUTRIM_API_KEY="LNDn2rbGUIGZznn1NXT7U4VcADf-d"
KRUTRIM_ENDPOINT="https://cloud.olakrutrim.com/v1"
MODEL_NAME="Llama-3.3-70B-Instruct"
EVALUATION_MODEL_NAME="Llama-3.3-70B-Instruct"
GROQ_API_KEY="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"
GROQ_MODEL_NAME="llama-3.3-70b-versatile"

# Models

In [None]:
groq_llm= ChatGroq(groq_api_key=GROQ_API_KEY,model_name= GROQ_MODEL_NAME)
groq_evaluation_llm= ChatGroq(groq_api_key=GROQ_API_KEY,model =GROQ_MODEL_NAME)
groq_evaluation_llm=groq_evaluation_llm.with_structured_output(schema=Decision)

In [10]:
class StoneBreakerState(BaseModel):
    sql_context:Optional[Annotated[str,Field(description="")]]=None
    sql_prompt:Optional[Annotated[str,Field(description="")]]=None
    sql_context_from_vector_store:Optional[Annotated[str,Field(description="")]]=None
    sql_refined_once:Optional[Annotated[str,Field(description="")]]=None
    sql_refined_twice:Optional[Annotated[str,Field(description="")]]=None
    sql_converted_to_trino:Optional[Annotated[str,Field(description="")]]=None
    sql_converted_to_spark:Optional[Annotated[str,Field(description="")]]=None

In [None]:
class StoneBreakerSubState(BaseModel):
    

# Nodes

In [11]:
def sql_prompt_node(state:StoneBreakerState):
    sql_context= input("Enter the sql schema you are working with:")
    sql_prompt=input("Enter the Sql prompt you want")
    return {   
        "sql_context":sql_context,
        "sql_prompt":sql_prompt
    }

In [14]:
def revaluation_node(state:StoneBreakerState):
    sql_context= state.sql_context
    if not sql_context:
        # ask for the context
        sql_context= input("Enter the context:")
    evaluation_prompt =  Prompts.gen_fix_sql_context_analysis.format(
        sql_context=sql_context
    )
    evaluation_response = groq_evaluation_llm.invoke(evaluation_prompt)
    
    return {
        "sql_context":evaluation_response.sql_context,
    }

In [26]:
def sql_context_from_vector_store(state:StoneBreakerState):
    query=f"""
    SQL Context: {state.sql_prompt}
    SQL Prompt: {state.sql_context}
    """
    paraquetRag= ParaquetRAG()
    results=paraquetRag.retrieve(query)
    sql_context_from_vector_store= ""
    for d in results:
        sql_context_from_vector_store+=d.page_content
    return {
        "sql_context_from_vector_store": sql_context_from_vector_store
    }
        

In [27]:
def llm_refine_node_one(state:StoneBreakerState):
    
    evaluation_prompt =  Prompts.gen_sql.format(
        sql_context_from_vector_store=state.sql_context_from_vector_store,
        sql_context=state.sql_context,
        sql_prompt=state.sql_prompt
    )
    # Assume response to be a base model
    response= grok_llm.invoke(evaluation_prompt)
    return {
        "sql_refined_once":response.sql,
        "sql_refined_once_explnation":response.exaplanation
    }
    

IndentationError: unexpected indent (2531163983.py, line 4)

In [None]:
def llm_refine_node_twice(state:StoneBreakerState):
    evaluation_prompt =  Prompts.gen_sql.format(
        sql_context_from_vector_store=state.sql_context_from_vector_store,
        sql_context=state.sql_context,
        sql_prompt=state.sql_prompt
    )
    # Assume response to be a base model
    response= grok_llm.invoke(evaluation_prompt)
    return {
        "sql_refined_once":response.sql,
        "sql_refined_once_explnation":response.exaplanation
    }
    

In [25]:
paraquetRag= ParaquetRAG()
results=paraquetRag.retrieve("Retrieve all employees earning more than $5000 per month")
results

[Document(id='332c51e3-6da9-447b-b852-dd13ffb21094', metadata={'id': 79461, 'domain': 'manufacturing', 'sql_complexity': 'basic SQL', 'sql_task_type': 'analytics and reporting'}, page_content="SQL Context: CREATE TABLE employees (id INT, name VARCHAR(50), salary DECIMAL(10, 2)); INSERT INTO employees (id, name, salary) VALUES (1, 'John Doe', 60000.00), (2, 'Jane Smith', 45000.00), (3, 'Alice Johnson', 52000.00);\n        SQL Prompt: Get the names and salaries of all employees earning more than $50,000\n        SQL Query: SELECT name, salary FROM employees WHERE salary > 50000.00;\n        SQL Explanation: This SQL query retrieves the names and salaries of all employees who earn more than $50,000 by selecting the name and salary columns for all records where the salary is greater than 50000.00.\n        Domain: manufacturing\n        SQL Task Type: analytics and reporting"),
 Document(id='4769694a-4b5e-4bc6-9452-b7cb2e2b6b7e', metadata={'id': 98492, 'domain': 'human resources', 'sql_com

In [21]:
from langchain.vectorstores import FAISS
from langchain.embeddings import HuggingFaceEmbeddings

class ParaquetRAG:
    def __init__(self):
        self.embeddings_model = HuggingFaceEmbeddings(model_name="sentence-transformers/all-MiniLM-L6-v2",model_kwargs={"token":"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"})
        self.load_faiss_index()

    def load_faiss_index(self):
        """Loads the FAISS index from the local storage"""
        self.vector_store = FAISS.load_local("faiss_parquet_index", self.embeddings_model,allow_dangerous_deserialization=True)
        self.retriever = self.vector_store.as_retriever(search_kwargs={"k": 3})
    
    def retrieve(self, query):
        """Retrieves the most relevant SQL context from FAISS based on the query"""
        results = self.retriever.get_relevant_documents(query)
        return results

In [24]:
def get_sql_context_node(state:StoneBreakerState):
    evaluation_prompt =  Prompts.gen_sql_context_analysis.format(
        sql_context=sql_context
    )
    evaluation_response = groq_evaluation_llm.invoke(evaluation_prompt)
    if evaluation_response.state == 'Compelte':
        return 'sql_context_from_vector_store'
    else:
        return "revaluation_node"
    
    return "revaluation_node"

# Edges

In [13]:
def routing_function(state: StoneBreakerState):
    if state.sql_context :
        evaluation_prompt =  Prompts.gen_sql_context_analysis.format(
            sql_context=sql_context
        )
        evaluation_response = groq_evaluation_llm.invoke(evaluation_prompt)
        if evaluation_response.state == 'Compelte':
            return 'llm_revision_one_node'
        else:
            return "revaluation_node"
    return "revaluation_node"


In [9]:
from sentence_transformers import SentenceTransformer
sentences = ["This is an example sentence", "Each sentence is converted"]
model = SentenceTransformer('sentence-transformers/all-MiniLM-L6-v2',use_auth_token="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")
embeddings = model.encode(sentences)
print(embeddings)


modules.json:   0%|          | 0.00/349 [00:00<?, ?B/s]

config_sentence_transformers.json:   0%|          | 0.00/116 [00:00<?, ?B/s]

README.md:   0%|          | 0.00/10.5k [00:00<?, ?B/s]

sentence_bert_config.json:   0%|          | 0.00/53.0 [00:00<?, ?B/s]

config.json:   0%|          | 0.00/612 [00:00<?, ?B/s]

model.safetensors:   0%|          | 0.00/90.9M [00:00<?, ?B/s]

tokenizer_config.json:   0%|          | 0.00/350 [00:00<?, ?B/s]

vocab.txt:   0%|          | 0.00/232k [00:00<?, ?B/s]

tokenizer.json:   0%|          | 0.00/466k [00:00<?, ?B/s]

special_tokens_map.json:   0%|          | 0.00/112 [00:00<?, ?B/s]

config.json:   0%|          | 0.00/190 [00:00<?, ?B/s]

[[ 6.76569566e-02  6.34959713e-02  4.87130880e-02  7.93049708e-02
   3.74480635e-02  2.65279715e-03  3.93749438e-02 -7.09846430e-03
   5.93614466e-02  3.15369666e-02  6.00980744e-02 -5.29051758e-02
   4.06068303e-02 -2.59308331e-02  2.98428573e-02  1.12686353e-03
   7.35148340e-02 -5.03818467e-02 -1.22386590e-01  2.37028245e-02
   2.97265239e-02  4.24768515e-02  2.56337672e-02  1.99515396e-03
  -5.69190383e-02 -2.71598566e-02 -3.29035111e-02  6.60248920e-02
   1.19007207e-01 -4.58791107e-02 -7.26214722e-02 -3.25840488e-02
   5.23412861e-02  4.50553223e-02  8.25300161e-03  3.67024243e-02
  -1.39415702e-02  6.53918982e-02 -2.64272317e-02  2.06357261e-04
  -1.36643350e-02 -3.62810642e-02 -1.95043795e-02 -2.89737936e-02
   3.94270457e-02 -8.84090886e-02  2.62424676e-03  1.36714466e-02
   4.83063087e-02 -3.11566498e-02 -1.17329188e-01 -5.11690676e-02
  -8.85287970e-02 -2.18963549e-02  1.42986048e-02  4.44167629e-02
  -1.34815751e-02  7.43392631e-02  2.66382564e-02 -1.98763050e-02
   1.79191