In [None]:
import os
from dotenv import load_dotenv

from llama_index.llms.databricks import Databricks
from langchain_huggingface import HuggingFaceEmbeddings
from llama_index.embeddings.langchain import LangchainEmbedding

from pinecone import Pinecone
from llama_index.core import Settings, VectorStoreIndex
from llama_index.vector_stores.pinecone import PineconeVectorStore
from llama_index.postprocessor.cohere_rerank import CohereRerank


# Load environment variables
load_dotenv()

# API keys setup
DATABRICKS_TOKEN = os.environ.get('DATABRICKS_TOKEN')
COHERE_API_KEY = os.environ.get('COHERE_API_KEY')
PINECONE_API_KEY = os.environ.get('PINECONE_API_KEY')

# Setup Databricks model for LLM
Settings.llm = Databricks(
    model="databricks-meta-llama-3-1-70b-instruct",
    api_key=DATABRICKS_TOKEN,
    api_base="https://adb-7215147325717155.15.azuredatabricks.net/serving-endpoints",
)

# Setup embedding model
Settings.embed_model = LangchainEmbedding(
    HuggingFaceEmbeddings(model_name="sentence-transformers/all-MiniLM-L6-v2")
)

# Setup Pinecone vector store
pc = Pinecone(api_key=PINECONE_API_KEY)
pinecone_index = pc.Index("sidindex")
vector_store = PineconeVectorStore(pinecone_index=pinecone_index, text_key="text")

# Setup Cohere reranker
cohere_rerank = CohereRerank(api_key=COHERE_API_KEY, top_n=4)

# Initialize the query engine with the vector store and reranker
query_engine = VectorStoreIndex.from_vector_store(vector_store).as_query_engine(
    similarity_top_k=10, node_postprocessors=[cohere_rerank]
)

In [None]:
from langchain_community.utilities.sql_database import SQLDatabase
from langchain_openai import ChatOpenAI
from langchain_community.agent_toolkits import create_sql_agent


db = SQLDatabase.from_uri("sqlite:///demo.db")

llm = ChatOpenAI(temperature=0)

sql_agent = create_sql_agent(
    llm, db = db, agent_type = "openai-tools", verbose = True
)

In [None]:
from llama_index.llms.openai import OpenAI
llm = OpenAI(model="gpt-3.5-turbo")

In [None]:
from llama_index.core import PromptTemplate

choices = [
    "Utile pour répondre aux questions sur les politiques de soutien, l'innovation et les scénarios prospectifs.",
    "Utile pour répondre aux questions sur des données precises."
]

choices_str = "\n\n".join([f"{idx+1}. {choice}" for idx, choice in enumerate(choices)])

router_prompt_template = PromptTemplate(
    "Some choices are given below. It is provided in a numbered list (1 to"
    " {num_choices}), where each item in the list corresponds to a summary.\n"
    "---------------------\n{context_list}\n---------------------\nUsing only the choices"
    " above and not prior knowledge, return the top choice that is most relevant"
    " to the question: '{query_str}'\n"
)


In [None]:
from pydantic import BaseModel
import json 

class Answer(BaseModel):
    choice: int
    reason: str

class RouterOutputParser:
    def parse(self, output: str) -> Answer:
        output = output.strip()
        json_output = output[output.find("["):output.find("]")+1]
        json_dict = json.loads(json_output)[0]
        return Answer(choice=json_dict["choice"], reason=json_dict["reason"])

    def format(self, prompt_template: str) -> str:
        format_str = (
            """The output should be formatted as a JSON instance that conforms to the JSON schema below. 
            {
              "type": "array",
              "items": {
                "type": "object",
                "properties": {
                  "choice": {
                    "type": "integer"
                  },
                  "reason": {
                    "type": "string"
                  }
                },
                "required": [
                  "choice",
                  "reason"
                ],
                "additionalProperties": false
              }
            }
            """
        )
        return prompt_template + "\n\n" + format_str.replace("{", "{{").replace("}", "}}")

output_parser = RouterOutputParser()

def route_query(query_str: str):
    fmt_prompt = router_prompt_template.format(
        num_choices=len(choices),
        context_list=choices_str,
        query_str=query_str,
        max_outputs=1
    )
    fmt_json_prompt = output_parser.format(fmt_prompt)
    raw_output = llm.complete(fmt_json_prompt)
    parsed = output_parser.parse(str(raw_output))

    if parsed.choice == 1:
        return query_engine.query(query_str)
    elif parsed.choice == 2:
        return sql_agent.invoke(query_str)


In [None]:
response = route_query("Quelles sont les 4 scenarios prospectifs stratégiques étudiés pour l'Algérie en 2035?")
print(response)

response = route_query("De combien le pourcentage de terres arables aménagées pour l’irrigation a évolué en 2019?")
print(response)
