# Setup Environment

In [None]:
# To run locally, uncomment the below
# ! pip install -e ../

# To run in Onyx, uncomment the below
import sys
sys.path.append("..")

from onyxgenai.embed import Embedder
from onyxgenai.model import ModelStore

collection_name = "test_collection"
embedding_service = Embedder("http://embed.onyx-services", model_name="all-MiniLM-L6-v2", model_version=1, num_workers=2, collection_name=collection_name)
model_service = ModelStore("http://store.onyx-services")

# Deploy Embedding Model

In [None]:
embedding_deployment_name = model_service.deploy_model(model_name="all-MiniLM-L6-v2", model_version=1, replicas=2, options={"num_cpus": 2, "memory": 8000 * 1024 * 1024}, deployment_name="MiniLM")

# Deploy LLM

In [None]:
llm_deployment_name = model_service.deploy_model(model_name="Mistral-7B-Instruct-v0.3", model_version=1, replicas=1, options={}, deployment_name="Mistral")

# Embed Prompt for Vector Search

In [None]:
query = "What is the capital of France?"
data = [query]
embeddings = model_service.embed_text(deployment_name=embedding_deployment_name, data=data)

# Perform Vector Search

In [None]:
vector_data = embedding_service.vector_search(embeddings, collection_name)
vector_data_results = vector_data["results"]
print(vector_data_results)

# Execute a RAG Search

In [None]:
# Append RAG Data for Knowledge
context = ""
if vector_data_results:
    for data in vector_data_results:
        context = context + data["payload"]["text"]

prompt = f"""
Context Information:
---------------------
{context}
---------------------
Answer the following question using only the context information. If the question cannot be answered, say so.
Query: {query}
Answer:
"""  # noqa: E501

answer = model_service.generate_completion(deployment_name=llm_deployment_name, data=prompt)
print(answer)

# Delete Model Deployments

In [None]:
model_service.delete_deployment(embedding_deployment_name)
model_service.delete_deployment(llm_deployment_name)