# Setup Environment

In [None]:
# To run locally, uncomment the below
# ! pip install -e ../

# To run in Onyx, uncomment the below
import sys
sys.path.append("..")

from onyxgenai.embed import EmbeddingClient
from onyxgenai.model import ModelClient

# Set the model and embedding names
embedding_model_name = "all-MiniLM-L6-v2"
embedding_model_version = "1"
language_model_name = "Mistral-7B-Instruct-v0.3"
language_model_version = "1"

embedding_client = EmbeddingClient("http://embed.onyx-services")
model_client = ModelClient("http://store.onyx-services")

# Deploy Embedding Model

In [None]:
response = model_client.deploy_model(embedding_model_name, embedding_model_version, 2, {"num_cpus": 2, "memory": 8000 * 1024 * 1024})
print(response)

# Deploy LLM

In [None]:
response = model_client.deploy_model(language_model_name, language_model_version, 1, {})
print(response)

# Embed Prompt for Vector Search

In [None]:
query = "What is the capital of France?"
embeddings = model_client.embed_text(query, embedding_model_name)

# Perform Vector Search

In [None]:
vector_data = embedding_client.vector_search(embeddings, "test_collection")
print(vector_data)

# Execute a RAG Search

In [None]:
# Append RAG Data for Knowledge
context = ""
if vector_data:
    for data in vector_data:
        context = context + data["payload"]["text"]

prompt = f"""
Context Information:
---------------------
{context}
---------------------
Answer the following question using only the context information. If the question cannot be answered, say so.
Query: {query}
Answer:
"""  # noqa: E501

messages = [
    { "role": "user", "content": prompt }
]

answer = model_client.generate_text(messages, model_name=language_model_name)
print(answer)

# Delete Model Deployments

In [None]:
model_client.delete_deployment(embedding_model_name)
model_client.delete_deployment(language_model_name)