# Setup Environment

In [None]:
# To run locally, uncomment the below
# ! pip install -e ../

# To run in Onyx, uncomment the below
import sys
sys.path.append("..")

In [None]:
from onyxgenai.embed import Embedder
from onyxgenai.model import ModelStore

embedding_model = "all-MiniLM-L6-v2"
llm_model = "Mistral-7B-Instruct-v0.3"
collection_name = "test_collection"
query = "What is the capital of France?"

embedding_service = Embedder("http://embed.onyx-services/", model=embedding_model, model_version=1, num_workers=2, collection_name=collection_name)
model_service = ModelStore("http://store.onyx-services/")

# Start/Serve Embedding Model

In [None]:
embedding_app_name = model_service.serve_model(model=embedding_model, model_version=1, replicas=2, options={"num_cpus": 2, "memory": 8000 * 1024 * 1024})
print(embedding_app_name)

# Start/Serve LLM

In [None]:
llm_app_name = model_service.serve_model(model=llm_model, model_version=1, replicas=1, options={})
print(llm_app_name)

# Embed Prompt for Vector Search

In [None]:
data = [query]
embeddings = model_service.predict_text(app_name=embedding_model, data=data)
print(embeddings)

# Perform Vector Search

In [None]:

embedded_prompt = embeddings["embeddings"][0]
vector_data = embedding_service.vector_search(embedded_prompt, collection_name)
vector_data_results = vector_data["results"]
print(vector_data_results)

# Execute a RAG Search

In [None]:
# Append RAG Data for Knowledge
context = ""
if vector_data_results:
    for data in vector_data_results:
        context = context + data["payload"]["text"]

prompt = f"""
Context Information:
---------------------
{context}
---------------------
Answer the following question using only the context information. If the question cannot be answered, say so.
Query: {query}
Answer:
"""  # noqa: E501

answer = model_service.generate_text(app_name=llm_model, prompt=prompt)

In [None]:
print(answer)

# Stop/Shutdown Models

In [None]:
model_service.stop_model(embedding_model)
model_service.stop_model(llm_model)
