In [None]:
%pip install mlflow>=2.15 llama-index>=0.10.44 -q


In [2]:
import os
from getpass import getpass

from llama_index.core import Document, VectorStoreIndex
from llama_index.core.llms import ChatMessage

import mlflow

In [3]:
import os
from llama_index.core import Settings
from llama_index.llms.ollama import Ollama
from llama_index.embeddings.ollama import OllamaEmbedding

# Configure Ollama LLM
ollama_llm = Ollama(
    #model="llama3.2:latest",
    model="mistral:7b",
    base_url="http://localhost:11434",
    temperature=0.1
)

# Configure embedding model
ollama_embedding = OllamaEmbedding(
    model_name="nomic-embed-text:latest",
    base_url="http://localhost:11434",
    ollama_additional_kwargs={"mirostat": 0}
)

Settings.llm = ollama_llm
Settings.embed_model = ollama_embedding

In [4]:
import nest_asyncio
nest_asyncio.apply()

In [None]:
print(
    "------------- Example Document used to Enrich LLM Context -------------"
)
llama_index_example_document = Document.example()
print(llama_index_example_document)
index = VectorStoreIndex.from_documents([llama_index_example_document])

print("\n------------- Example Query Engine -------------")
query_response = index.as_query_engine().query("What is llama_index?")
print(query_response)

print("\n------------- Example Retriever   -------------")
retriever_response = index.as_retriever().retrieve("What is llama_index?")
print(retriever_response)

In [None]:
mlflow.llama_index.autolog()  # This is for enabling tracing

with mlflow.start_run() as run:
    mlflow.llama_index.log_model(
        index,
        artifact_path="llama_index",
        engine_type="query",  # Defines the pyfunc and spark_udf inference type
        input_example="hi",  # Infers signature
        registered_model_name="my_llama_index_vector_store",  # Stores an instance in the model registry
    )

    run_id = run.info.run_id
    model_uri = f"runs:/{run_id}/llama_index"
    print(f"Unique identifier for the model location for loading: {model_uri}")

In [None]:
print("\n------------- Inference via Llama Index   -------------")
index = mlflow.llama_index.load_model(model_uri)
query_response = index.as_query_engine().query("hi")
print(query_response)

print("\n------------- Inference via MLflow PyFunc -------------")
index = mlflow.pyfunc.load_model(model_uri)
query_response = index.predict("hi")
print(query_response)

In [9]:
import os
import subprocess
from IPython.display import IFrame

# Start the MLflow UI in a background process
mlflow_ui_command = ["mlflow", "ui", "--port", "5000"]

# Use subprocess.Popen without preexec_fn for Windows compatibility
process = subprocess.Popen(
    mlflow_ui_command,
    stdout=subprocess.PIPE,
    stderr=subprocess.PIPE,
    creationflags=subprocess.CREATE_NEW_PROCESS_GROUP,  # Windows equivalent for new process group
)
