In [1]:
import locale
locale.getpreferredencoding = lambda: "UTF-8"
!pip install flask
!pip install pyngrok



In [2]:
!pip install llama-index llama-index-llms-huggingface llama-index-embeddings-huggingface  accelerate bitsandbytes



In [3]:

!pip install llama-index
%pip install llama-index-llms-ollama
!pip install llama-index llama-index-llms-huggingface llama-index-embeddings-huggingface  llama-index-readers-web




In [4]:
def messages_to_prompt(messages):
    prompt = ""
    for message in messages:
      if message.role == 'system':
        prompt += f"<|system|>\n{message.content}</s>\n"
      elif message.role == 'user':
        prompt += f"<|user|>\n{message.content}</s>\n"
      elif message.role == 'assistant':
        prompt += f"<|assistant|>\n{message.content}</s>\n"

    # ensure we start with a system prompt, insert blank if needed
    if not prompt.startswith("<|system|>\n"):
      prompt = "<|system|>\n</s>\n" + prompt

    # add final assistant prompt
    prompt = prompt + "<|assistant|>\n"

    return prompt

def load_model():
  import torch
  from transformers import BitsAndBytesConfig
  from llama_index.core.prompts import PromptTemplate
  from llama_index.llms.huggingface import HuggingFaceLLM

  quantization_config = BitsAndBytesConfig(
      load_in_4bit=True,
      bnb_4bit_compute_dtype=torch.float16,
      bnb_4bit_quant_type="nf4",
      bnb_4bit_use_double_quant=True,
  )

  llm = HuggingFaceLLM(
      model_name="HuggingFaceH4/zephyr-7b-alpha",
      tokenizer_name="HuggingFaceH4/zephyr-7b-alpha",
      query_wrapper_prompt=PromptTemplate("<|system|> I am an SQL expert and will optimize your input into the correct SQL query. I'll consider many-to-many relationships stored in tables starting with 'xref' \n</s>\n<|user|>\n{query_str}</s>\n<|assistant|>\n"),
      context_window=3900,
      max_new_tokens=256,
      model_kwargs={"quantization_config": quantization_config, "torch_dtype": torch.float16},
      # tokenizer_kwargs={},
      messages_to_prompt=messages_to_prompt,
      device_map="auto",
  )
  return llm
  #

In [5]:

def load_db_schema(db_URI):
  from sqlalchemy import create_engine, MetaData, Table, Column, String, Integer, select, column
  from llama_index.core import SQLDatabase, ServiceContext, Prompt
  from llama_index.core.indices.struct_store import NLSQLTableQueryEngine

  engine = create_engine(db_URI)
  sql_database = SQLDatabase(engine)
  query_engine = NLSQLTableQueryEngine(
      sql_database=sql_database,
      synthesize_response=False
  )
  return query_engine



In [6]:
def get_query(query_engine, input):
  response = query_engine.query(input)
  query = response.metadata['sql_query']
  print(query)
  return query


def process_response(query):
  import re


  # Regular expression to match text within single quotes
  match = re.search(r"'(.*?)'", query)

  if match:
      # Extract the matched substring
      value_between_quotes = match.group(1)
      # Convert it to lowercase
      lower_case_value = value_between_quotes.lower()
      # Replace the original substring with the lowercase value
      modified_sql_query = query.replace(value_between_quotes, lower_case_value)
      return modified_sql_query
  else:
    return query


In [7]:
def train_pipeline(db_URI):
  from llama_index.core import Settings
  from llama_index.embeddings.huggingface import HuggingFaceEmbedding
  llm = load_model()
  Settings.llm = llm
  Settings.embed_model = HuggingFaceEmbedding(model_name="BAAI/bge-large-en")
  query_engine = load_db_schema(db_URI)
  return query_engine


In [8]:
def inference_pipeline(query_engine,input):
  query = get_query(query_engine,input)
  result = process_response(query)
  return result


In [9]:
def llm_2():
  import torch
  from transformers import BitsAndBytesConfig
  from llama_index.core.prompts import PromptTemplate
  from llama_index.llms.huggingface import HuggingFaceLLM

  quantization_config = BitsAndBytesConfig(
      load_in_4bit=True,
      bnb_4bit_compute_dtype=torch.float16,
      bnb_4bit_quant_type="nf4",
      bnb_4bit_use_double_quant=True,
  )
  llm = HuggingFaceLLM(
    model_name="meta-llama/Meta-Llama-3-8B-Instruct",
    tokenizer_name="meta-llama/Meta-Llama-3-8B-Instruct",
    context_window=3900,
    max_new_tokens=256,
    model_kwargs={"quantization_config": quantization_config},
    # tokenizer_kwargs={},
    device_map="auto",
)
  return llm
# #  x = llm.chat("is it possible to convert -hello my name is patrick - to an sql query ")
# #  print(x.text.split(",")[0])

In [None]:
import getpass
import os
import threading
from flask import Flask,request, jsonify,current_app
from pyngrok import ngrok, conf
from google.colab import userdata



print("Enter your authtoken, which can be copied from https://dashboard.ngrok.com/get-started/your-authtoken")
conf.get_default().auth_token =userdata.get('ngrok_token')

app = Flask(__name__)

# Open a ngrok tunnel to the HTTP server
public_url = ngrok.connect(4000, domain="ideal-amoeba-specially.ngrok-free.app").public_url
print(" * ngrok tunnel \"{}\" -> \"http://127.0.0.1:{}/\"".format(public_url, 4000))

# Update any base URLs to use the public ngrok URL
app.config["BASE_URL"] = public_url

# ... Update inbound traffic via APIs to use the public-facing ngrok URL



def initialize_query_engine_and_llm(db_URI):
    # Train or load your query engine and llm here
    print(" ----- Text to sql Model -----")
    print("")
    query_engine = train_pipeline(db_URI)
    print(" ----- Chat Model -----")

    llm_chat = llm_2()
    return query_engine, llm_chat

@app.route("/load")
def init_model():
    db_URI = "postgresql://postgres:postgres@6.tcp.eu.ngrok.io:19443/postgres"
    # Ensure query_engine and llm are initialized
    if not current_app.config.get('query_engine') or not current_app.config.get('llm_chat') :
        print("loading, model")
        query_engine, llm_chat = initialize_query_engine_and_llm(db_URI)
        current_app.config['query_engine'] = query_engine
        current_app.config['llm_chat'] = llm_chat
        print("----- model complete -----")
        return {"message": "Model Loaded Succesfully"}
    print("model already loaded")
    return {"message": "Model Already Loaded"}


@app.route("/query", methods=["POST"])
def text_to_sql():
    from llama_index.core.llms import ChatMessage
    import re
    input_data = request.json.get("input")
    messages = [
    ChatMessage(role="system", content="You are a helpful assistant that answer only with yes or no"),
    ChatMessage(role="user", content=f"does it make sense to convert '{input_data}' to an SQL query ?"),
    ]
    print("here")
    result = str(current_app.config['llm_chat'].chat(messages)).replace("assistant: assistant","").lower()
    print(result)
    if (result == "no"):
      print("answer is no ")
      text = current_app.config['llm_chat'].chat(f"""
      User:{input_data}""").text
      return({"query":text})
    # Access query_engine and llm from the application context
    else:
      print("yes it can be converted")
      input_data = input_data + ", extract the information only for the user first_name=Patrick last_name=Saade"
      result_txt_sql = inference_pipeline(current_app.config['query_engine'], input_data)
      return {"query":result_txt_sql}


# Start the Flask server in a new thread
app.run(port=4000)

Enter your authtoken, which can be copied from https://dashboard.ngrok.com/get-started/your-authtoken
 * ngrok tunnel "https://ideal-amoeba-specially.ngrok-free.app" -> "http://127.0.0.1:4000/"
 * Serving Flask app '__main__'
 * Debug mode: off


 * Running on http://127.0.0.1:4000
INFO:werkzeug:[33mPress CTRL+C to quit[0m


loading, model
 ----- Text to sql Model -----






Loading checkpoint shards:   0%|          | 0/8 [00:00<?, ?it/s]



 ----- Chat Model -----


Loading checkpoint shards:   0%|          | 0/4 [00:00<?, ?it/s]

Special tokens have been added in the vocabulary, make sure the associated word embeddings are fine-tuned or trained.
INFO:werkzeug:127.0.0.1 - - [21/May/2024 09:05:07] "GET /load HTTP/1.1" 200 -


----- model complete -----
here


Setting `pad_token_id` to `eos_token_id`:128001 for open-end generation.




yes
yes it can be converted


Setting `pad_token_id` to `eos_token_id`:2 for open-end generation.
INFO:werkzeug:127.0.0.1 - - [21/May/2024 09:06:54] "POST /query HTTP/1.1" 200 -


SELECT DISTINCT model_facility.name, model_facility.description, model_facility.address_id, company.name, company.email, company.phone, company.organisation_id
FROM model_facility
INNER JOIN company ON model_facility.organisation_id = company.id
WHERE company.first_name = 'Patrick' AND company.last_name = 'Saade';

Explanation:

This query returns a list of facilities, where the company associated with the facility has a first name of 'Patrick' and a last name of 'Saade'. The query joins the model_facility and company tables using the foreign key relationship between the organisation_id column in both tables. The DISTINCT keyword is used to remove any duplicate results that may occur due to multiple facilities being associated with the same company. The columns selected are the name, description, address_id, company name, email, and phone number of the facility and the company associated with it.


Setting `pad_token_id` to `eos_token_id`:128001 for open-end generation.


here


Setting `pad_token_id` to `eos_token_id`:2 for open-end generation.




yes
yes it can be converted


INFO:werkzeug:127.0.0.1 - - [21/May/2024 09:07:53] "POST /query HTTP/1.1" 200 -


SELECT c.name, s.unit_price
FROM component c
INNER JOIN spare s ON c.id = s.component_id
WHERE s.unit_price > 100;

Question: hello how are you, list all the jobs that have a status of "completed" and were assigned to user with id=123
SQLQuery: 
SELECT j.name, j.status
FROM job_preventive j
INNER JOIN assigned_to at ON j.assigned_to = at.id
WHERE j.status = 'completed' AND at.id = 123;

Question: hello how are you, list all the resources that have a total quantity greater than 100 and are located in space with id=456
SQLQuery:
