In [1]:
import mlflow

from pathlib import Path
from llama_index.core import VectorStoreIndex, SimpleDirectoryReader

from dotenv import load_dotenv
load_dotenv()

True

In [2]:
DATA_DIR = Path('..', 'data')

In [3]:
# Creating a new index
documents = SimpleDirectoryReader(DATA_DIR).load_data(show_progress=True, num_workers=4)
index = VectorStoreIndex.from_documents(documents)

In [4]:
# Using a local MLflow tracking server
mlflow.set_tracking_uri("http://localhost:5000")

In [5]:
mlflow.set_experiment("LLamaIndex Tracing")

with mlflow.start_run():
    model_info = mlflow.llama_index.log_model(
        index,
        artifact_path="index",
        engine_type="chat",
        input_example="What did the author do growing up?",
    )

2024/11/08 20:55:43 INFO mlflow.tracking.fluent: Experiment with name 'LLamaIndex Tracing' does not exist. Creating a new experiment.
2024/11/08 20:55:47 INFO mlflow.llama_index.serialize_objects: API key(s) will be removed from the global Settings object during serialization to protect against key leakage. At inference time, the key(s) must be passed as environment variables.


Downloading artifacts:   0%|          | 0/12 [00:00<?, ?it/s]

2024/11/08 20:56:17 INFO mlflow.tracking._tracking_service.client: 🏃 View run victorious-snipe-405 at: http://localhost:5000/#/experiments/372917090010095068/runs/67f9b3da714243428f5f4e52476357b0.
2024/11/08 20:56:17 INFO mlflow.tracking._tracking_service.client: 🧪 View experiment at: http://localhost:5000/#/experiments/372917090010095068.
2024/11/08 20:56:17 INFO mlflow.tracking._tracking_service.client: 🧪 View experiment at: http://localhost:5000/#/experiments/372917090010095068.


In [6]:
model_info.model_uri

'runs:/67f9b3da714243428f5f4e52476357b0/index'

In [7]:
model = mlflow.pyfunc.load_model(model_info.model_uri)

response = model.predict("What was the first program the author wrote?")
print(response)

Downloading artifacts:   0%|          | 0/12 [00:00<?, ?it/s]

The first program the author wrote was on the IBM 1401 using an early version of Fortran in 9th grade, around the age of 13 or 14.


In [8]:
# The chat engine keeps track of the conversation history
response = model.predict("How did the author feel about it?")
print(response)

The author felt puzzled by the first program they wrote on the IBM 1401 in 9th grade. They couldn't figure out what to do with it and realized there wasn't much they could have done with it due to the limitations of the system. The author mentioned that the programs they wrote on the IBM 1401 couldn't have done much, and their clearest memory was when they learned that it was possible for programs not to terminate.


In [9]:
# Enble Tracing
mlflow.llama_index.autolog(
    silent=True,
    log_traces=True,
    # log_models=True,
    # log_model_signatures=True,
    # log_input_examples=True,
)

chat_engine = index.as_chat_engine()
response = chat_engine.chat("What was the first program the author wrote?")

In [10]:
print(response.response)

The first program the author wrote was on the IBM 1401 using an early version of Fortran in 9th grade, when he was around 13 or 14 years old.
