In [1]:
%pip install mlflow>=2.15 llama-index>=0.10.44 -q


Note: you may need to restart the kernel to use updated packages.


In [2]:
import os
from getpass import getpass

from llama_index.core import Document, VectorStoreIndex
from llama_index.core.llms import ChatMessage

import mlflow

In [3]:
import os
from llama_index.core import Settings
from llama_index.llms.ollama import Ollama
from llama_index.embeddings.ollama import OllamaEmbedding

# Configure Ollama LLM
ollama_llm = Ollama(
    #model="llama3.2:latest",
    model="mistral:7b",
    base_url="http://localhost:11434",
    temperature=0.1
)

# Configure embedding model
ollama_embedding = OllamaEmbedding(
    model_name="nomic-embed-text:latest",
    base_url="http://localhost:11434",
    ollama_additional_kwargs={"mirostat": 0}
)

Settings.llm = ollama_llm
Settings.embed_model = ollama_embedding

In [4]:
import nest_asyncio
nest_asyncio.apply()

In [5]:
print(
    "------------- Example Document used to Enrich LLM Context -------------"
)
llama_index_example_document = Document.example()
print(llama_index_example_document)
index = VectorStoreIndex.from_documents([llama_index_example_document])

print("\n------------- Example Query Engine -------------")
query_response = index.as_query_engine().query("What is llama_index?")
print(query_response)

print("\n------------- Example Retriever   -------------")
retriever_response = index.as_retriever().retrieve("What is llama_index?")
print(retriever_response)

------------- Example Document used to Enrich LLM Context -------------
Doc ID: 940539c0-d8b0-4769-a5ba-075b6e24bfa1
Text: Context LLMs are a phenomenal piece of technology for knowledge
generation and reasoning. They are pre-trained on large amounts of
publicly available data. How do we best augment LLMs with our own
private data? We need a comprehensive toolkit to help perform this
data augmentation for LLMs.  Proposed Solution That's where LlamaIndex
comes in. Ll...

------------- Example Query Engine -------------
 LlamaIndex is a data framework designed to help build applications utilizing Language Learning Models (LLMs). It offers tools for ingesting various data sources, structuring data, providing an advanced retrieval/query interface over the data, and integrating with outer application frameworks. It caters to both beginner and advanced users, offering a high-level API for easy usage and a lower-level API for customization and extension.

------------- Example Retriever   ---

In [6]:
mlflow.llama_index.autolog()  # This is for enabling tracing

with mlflow.start_run() as run:
    mlflow.llama_index.log_model(
        index,
        artifact_path="llama_index",
        engine_type="query",  # Defines the pyfunc and spark_udf inference type
        input_example="hi",  # Infers signature
        registered_model_name="my_llama_index_vector_store",  # Stores an instance in the model registry
    )

    run_id = run.info.run_id
    model_uri = f"runs:/{run_id}/llama_index"
    print(f"Unique identifier for the model location for loading: {model_uri}")

2024/12/23 15:39:48 INFO mlflow.llama_index.serialize_objects: API key(s) will be removed from the global Settings object during serialization to protect against key leakage. At inference time, the key(s) must be passed as environment variables.


Downloading artifacts:   0%|          | 0/12 [00:00<?, ?it/s]

Unique identifier for the model location for loading: runs:/b3d9992b0c144dd3848463a7c1b7b73f/llama_index


Registered model 'my_llama_index_vector_store' already exists. Creating a new version of this model...
Created version '2' of model 'my_llama_index_vector_store'.


In [7]:
print("\n------------- Inference via Llama Index   -------------")
index = mlflow.llama_index.load_model(model_uri)
query_response = index.as_query_engine().query("hi")
print(query_response)

print("\n------------- Inference via MLflow PyFunc -------------")
index = mlflow.pyfunc.load_model(model_uri)
query_response = index.predict("hi")
print(query_response)


------------- Inference via Llama Index   -------------




 Hello! LlamaIndex is a data framework designed to help you build applications using Language Learning Models (LLMs). It offers tools such as data connectors for various sources like APIs, PDFs, docs, SQL, and more. Additionally, it provides ways to structure your data for easy use with LLMs and an advanced retrieval/query interface. This allows you to feed in any LLM input prompt and receive retrieved context and knowledge-augmented output. It also offers easy integrations with various application frameworks like LangChain, Flask, Docker, and more. LlamaIndex caters to both beginner and advanced users, offering a high-level API for quick usage and lower-level APIs for customization and extension.

------------- Inference via MLflow PyFunc -------------




 Hello! LlamaIndex is a data framework designed to help you build applications using Language Learning Models (LLMs). It offers tools such as data connectors for various sources like APIs, PDFs, docs, SQL, etc., ways to structure your data, an advanced retrieval/query interface, and easy integrations with other application frameworks. For beginner users, it provides a high-level API that allows you to ingest and query your data in just 5 lines of code. Advanced users can customize and extend its modules for their specific needs.


In [8]:
import os
import subprocess
from IPython.display import IFrame

# Start the MLflow UI in a background process
mlflow_ui_command = ["mlflow", "ui", "--port", "5000"]

# Use subprocess.Popen without preexec_fn for Windows compatibility
process = subprocess.Popen(
    mlflow_ui_command,
    stdout=subprocess.PIPE,
    stderr=subprocess.PIPE,
    creationflags=subprocess.CREATE_NEW_PROCESS_GROUP,  # Windows equivalent for new process group
)


In [None]:
# Do this in linux based system
import os
import subprocess

from IPython.display import IFrame

# Start the MLflow UI in a background process
mlflow_ui_command = ["mlflow", "ui", "--port", "5000"]
subprocess.Popen(
    mlflow_ui_command,
    stdout=subprocess.PIPE,
    stderr=subprocess.PIPE,
    preexec_fn=os.setsid,
)