# Setup Environment

In [None]:
# To run locally, uncomment the below
# ! pip install -e ../

# To run in Onyx, uncomment the below
import sys
sys.path.append("..")

from onyxgenai.embed import EmbeddingClient
from onyxgenai.model import ModelClient

collection_name = "test_collection"
embedding_client = EmbeddingClient("http://embed.onyx-services", model_name="all-MiniLM-L6-v2", model_version=1, num_workers=2, collection_name=collection_name)
embedding_model_client = ModelClient("http://store.onyx-services", model_name="all-MiniLM-L6-v2", model_version=1, replicas=2, deployment_name="MiniLM", options={"num_cpus": 2, "memory": 8000 * 1024 * 1024})
llm_client = ModelClient("http://store.onyx-services", model_name="Mistral-7B-Instruct-v0.3", model_version=1, replicas=1, deployment_name="Mistral", options={})

# Deploy Embedding Model

In [None]:
embedding_model_client.deploy_model()

# Deploy LLM

In [None]:
llm_client.deploy_model()

# Embed Prompt for Vector Search

In [None]:
query = "What is the capital of France?"
data = [query]
embeddings = embedding_model_client.embed_text(data)

# Perform Vector Search

In [None]:
vector_data = embedding_client.vector_search(embeddings, collection_name)
vector_data_results = vector_data["results"]
print(vector_data_results)

# Execute a RAG Search

In [None]:
# Append RAG Data for Knowledge
context = ""
if vector_data_results:
    for data in vector_data_results:
        context = context + data["payload"]["text"]

prompt = f"""
Context Information:
---------------------
{context}
---------------------
Answer the following question using only the context information. If the question cannot be answered, say so.
Query: {query}
Answer:
"""  # noqa: E501

answer = llm_client.generate_completion(prompt)
print(answer)

# Delete Model Deployments

In [None]:
embedding_model_client.delete_deployment()
llm_client.delete_deployment()