In [1]:
from getpass import getpass
import logging
import os
import warnings
from typing import Tuple, List

import pandas as pd

import mlflow

from haystack.utils import Secret
from haystack import Pipeline

from haystack.dataclasses import Document

from haystack.document_stores.in_memory import InMemoryDocumentStore
from haystack.components.retrievers import InMemoryEmbeddingRetriever
from haystack.document_stores.types import DuplicatePolicy
from haystack.utils import ComponentDevice

from haystack.evaluation.eval_run_result import EvaluationRunResult

from haystack.components.builders import PromptBuilder, AnswerBuilder
from haystack.components.joiners import DocumentJoiner
from haystack.components.writers import DocumentWriter
from haystack.components.evaluators import (
    FaithfulnessEvaluator,
    ContextRelevanceEvaluator,
)

from haystack.components.fetchers.link_content import LinkContentFetcher
from haystack.components.converters import HTMLToDocument, PyPDFToDocument
from haystack.components.preprocessors import DocumentSplitter, DocumentCleaner
from haystack.components.generators import OpenAIGenerator
from haystack.components.embedders import (
    SentenceTransformersDocumentEmbedder,
    SentenceTransformersTextEmbedder,
)

from mlflow.metrics.genai.metric_definitions import relevance

In [2]:
logging.getLogger("mlflow").setLevel(logging.ERROR)
warnings.filterwarnings("ignore", category=FutureWarning)

In [3]:
MLFLOW_TRACKING_URI = "http://127.0.0.1:8080"
# Do not use the proxy for local addresses
os.environ["NO_PROXY"] = "127.0.0.1"  # Comment this line if you are not using any proxy
# Set the mlflow tracking server (assuming that the server is running locally)
mlflow.set_tracking_uri(MLFLOW_TRACKING_URI)

In [4]:
# You need a paid OPENAI API KEY
OPENAI_API_KEY = getpass("OPENAI_API_KEY: ")

In [5]:
def create_vector_database(urls: List[str], split_length: int) -> InMemoryDocumentStore:
    """
    Creates the vector database (persisted) for the RAG system.
    It creates documents from html byte
    streams from user-defined web pages.

    Returns
    =======
    An instance of a vector database
    """
    # In-memory document store
    document_store = InMemoryDocumentStore()
    # Pipeline components to create the vector database
    fetcher = LinkContentFetcher()
    html_converter = HTMLToDocument()
    cleaner = DocumentCleaner()
    ## Overlapping chunks helps preserving contextual integrity
    splitter = DocumentSplitter(
        split_by="word", split_length=split_length, split_overlap=16
    )
    # Document Embedder (sentence transformer)
    document_embedder = SentenceTransformersDocumentEmbedder(
        model="BAAI/bge-large-en-v1.5",  # Check the hugging face website for more info about the transformer
        device=ComponentDevice.from_str(
            "cuda:0"
        ),  # Replace cuda:0 with cpu if GPU is not available
    )
    ## Download the model
    document_embedder.warm_up()
    ## Writes documents and their embeddings into the vector database
    writer = DocumentWriter(document_store=document_store, policy=DuplicatePolicy.SKIP)

    ## Pipeline
    indexing_pipeline = Pipeline()

    # Adding components into the pipeline
    indexing_pipeline.add_component(instance=fetcher, name="fetcher")
    indexing_pipeline.add_component(instance=html_converter, name="html_converter")
    indexing_pipeline.add_component(instance=cleaner, name="cleaner")
    indexing_pipeline.add_component(instance=splitter, name="splitter")
    indexing_pipeline.add_component(
        instance=document_embedder, name="document_embedder"
    )
    indexing_pipeline.add_component(instance=writer, name="writer")

    ## Pipeline connections
    indexing_pipeline.connect("fetcher.streams", "html_converter.sources")
    indexing_pipeline.connect("html_converter.documents", "cleaner")
    indexing_pipeline.connect("cleaner", "splitter")
    indexing_pipeline.connect("splitter", "document_embedder")
    indexing_pipeline.connect("document_embedder", "writer.documents")

    ## Write the html byte streams into the vector database
    indexing_pipeline.run(data={"fetcher": {"urls": urls}})

    return document_store


def create_rag_pipeline(document_store: InMemoryDocumentStore) -> Pipeline:
    """
    Creates a RAG pipeline using an in-memory vector database.

    Parameters
    ==========
    document_store:
        An instance of a document store

    Returns
    =======
    An instance of a Pipeline (RAG)
    """
    # Create the prompt for the LLM (generative model)
    prompt_template = """
    Answer the following question given the documents.
    If the answer is not contained within the documents reply with 'no_answer'. 
    Your answer should not exceed 100 words. 
    Query: {{question}}
    Documents:
    {% for document in documents %}
    {{document.content}}
    {% endfor %}
    """

    # Pipeline components for RAG
    prompt_builder = PromptBuilder(template=prompt_template)
    text_embedder = SentenceTransformersTextEmbedder(
        model="BAAI/bge-large-en-v1.5", device=ComponentDevice.from_str("cuda:0")
    )
    retriever = InMemoryEmbeddingRetriever(document_store)
    llm = OpenAIGenerator(
        model="gpt-4o-mini", api_key=Secret.from_token(OPENAI_API_KEY)
    )

    ## Pipeline
    rag_pipeline = Pipeline()

    ## Adding components into the pipeline
    rag_pipeline.add_component("text_embedder", text_embedder)
    rag_pipeline.add_component("retriever", retriever)
    rag_pipeline.add_component("prompt_builder", prompt_builder)
    rag_pipeline.add_component("llm", llm)
    rag_pipeline.add_component("answer_builder", AnswerBuilder())

    ## Pipeline connections
    rag_pipeline.connect("text_embedder.embedding", "retriever.query_embedding")
    rag_pipeline.connect("retriever", "prompt_builder.documents")
    rag_pipeline.connect("prompt_builder", "llm")
    rag_pipeline.connect("llm.replies", "answer_builder.replies")
    rag_pipeline.connect("llm.meta", "answer_builder.meta")
    rag_pipeline.connect("retriever", "answer_builder.documents")

    return rag_pipeline

In [6]:
urls = [
    "https://mlflow.org/docs/latest/index.html",
    "https://mlflow.org/docs/latest/tracking/autolog.html",
    "https://mlflow.org/docs/latest/getting-started/tracking-server-overview/index.html",
    "https://mlflow.org/docs/latest/python_api/mlflow.deployments.html",
]

In [7]:
eval_data = pd.DataFrame(
    {
        "question": [
            "What is MLflow?",
            "What is Databricks?",
            "How to serve a model on Databricks?",
            "How to enable MLflow Autologging for my workspace by default?",
        ],
        "source": [
            ["https://mlflow.org/docs/latest/index.html"],
            [
                "https://mlflow.org/docs/latest/getting-started/tracking-server-overview/index.html"
            ],
            ["https://mlflow.org/docs/latest/python_api/mlflow.deployments.html"],
            ["https://mlflow.org/docs/latest/tracking/autolog.html"],
        ],
    }
)

In [48]:
mlflow.set_experiment("Evaluate Split Length")

<Experiment: artifact_location='mlflow-artifacts:/227213818757815639', creation_time=1725017666836, experiment_id='227213818757815639', last_update_time=1725017666836, lifecycle_stage='active', name='Evaluate Split Length', tags={}>

In [9]:
def evaluate_split_length(split_length: int) -> "Evaluation":

    document_store = create_vector_database(urls=urls, split_length=split_length)
    rag_pipeline = create_rag_pipeline(document_store=document_store)

    def extract_source(doc: Document) -> str:
        if "url" in doc.meta:
            return doc.meta["url"]
        else:
            raise KeyError("'url' key does not exist in the metadata")

    def retrieve_doc_sources(question: str) -> List[str]:
        response = rag_pipeline.run(
            {
                "text_embedder": {"text": question},
                "prompt_builder": {"question": question},
                "answer_builder": {"query": question},
            }
        )
        docs = response["answer_builder"]["answers"][0].documents
        return [extract_source(doc) for doc in docs]

    def retriever_model_function(question_df: pd.DataFrame) -> pd.Series:
        return question_df["question"].apply(retrieve_doc_sources)

    with mlflow.start_run():
        return mlflow.evaluate(
            model=retriever_model_function,
            data=eval_data,
            model_type="retriever",
            targets="source",
            evaluators="default",
        )

In [10]:
result1 = evaluate_split_length(1000)
result2 = evaluate_split_length(2000)

Batches:   0%|          | 0/1 [00:00<?, ?it/s]

Batches:   0%|          | 0/1 [00:00<?, ?it/s]

Batches:   0%|          | 0/1 [00:00<?, ?it/s]

Batches:   0%|          | 0/1 [00:00<?, ?it/s]

Batches:   0%|          | 0/1 [00:00<?, ?it/s]

Batches:   0%|          | 0/1 [00:00<?, ?it/s]

Batches:   0%|          | 0/1 [00:00<?, ?it/s]

Batches:   0%|          | 0/1 [00:00<?, ?it/s]

Batches:   0%|          | 0/1 [00:00<?, ?it/s]

Batches:   0%|          | 0/1 [00:00<?, ?it/s]

In [11]:
display(result1.tables["eval_results_table"])
display(result2.tables["eval_results_table"])

Downloading artifacts:   0%|          | 0/1 [00:00<?, ?it/s]

Unnamed: 0,question,source,outputs,precision_at_3/score,recall_at_3/score,ndcg_at_3/score
0,What is MLflow?,[https://mlflow.org/docs/latest/index.html],"[https://mlflow.org/docs/latest/index.html, ht...",0.333333,1,1.0
1,What is Databricks?,[https://mlflow.org/docs/latest/getting-starte...,[https://mlflow.org/docs/latest/python_api/mlf...,0.333333,1,0.63093
2,How to serve a model on Databricks?,[https://mlflow.org/docs/latest/python_api/mlf...,[https://mlflow.org/docs/latest/python_api/mlf...,0.666667,1,1.0
3,How to enable MLflow Autologging for my worksp...,[https://mlflow.org/docs/latest/tracking/autol...,[https://mlflow.org/docs/latest/tracking/autol...,0.666667,1,1.0


Downloading artifacts:   0%|          | 0/1 [00:00<?, ?it/s]

Unnamed: 0,question,source,outputs,precision_at_3/score,recall_at_3/score,ndcg_at_3/score
0,What is MLflow?,[https://mlflow.org/docs/latest/index.html],"[https://mlflow.org/docs/latest/index.html, ht...",0.333333,1,1.0
1,What is Databricks?,[https://mlflow.org/docs/latest/getting-starte...,[https://mlflow.org/docs/latest/python_api/mlf...,0.333333,1,0.63093
2,How to serve a model on Databricks?,[https://mlflow.org/docs/latest/python_api/mlf...,[https://mlflow.org/docs/latest/python_api/mlf...,0.666667,1,1.0
3,How to enable MLflow Autologging for my worksp...,[https://mlflow.org/docs/latest/tracking/autol...,[https://mlflow.org/docs/latest/tracking/autol...,0.333333,1,1.0


### Evaluate the RAG system for its relevancy and latency

In [53]:
mlflow.set_experiment("Evaluate Latency and relevance")

<Experiment: artifact_location='mlflow-artifacts:/464488942055089167', creation_time=1725054362749, experiment_id='464488942055089167', last_update_time=1725054362749, lifecycle_stage='active', name='Evaluate Latency and relevance', tags={}>

In [56]:
eval_df = pd.DataFrame(
    {
        "questions": [
            "What is MLflow?",
            "What is Databricks?",
            "How to serve a model on Databricks?",
            "How to enable MLflow Autologging for my workspace by default?",
        ],
    }
)

In [57]:
display(eval_df)

Unnamed: 0,questions
0,What is MLflow?
1,What is Databricks?
2,How to serve a model on Databricks?
3,How to enable MLflow Autologging for my worksp...


In [58]:
document_store = create_vector_database(urls=urls, split_length=253)
rag_pipeline = create_rag_pipeline(document_store=document_store)

Batches:   0%|          | 0/1 [00:00<?, ?it/s]

In [63]:
def qa(question: str) -> dict:
    out = {}
    response = rag_pipeline.run(
        {
            "text_embedder": {"text": question},
            "prompt_builder": {"question": question},
            "answer_builder": {"query": question},
        }
    )["answer_builder"]["answers"][0]
    out["result"] = response.data
    out["source_documents"] = [doc.content for doc in response.documents]
    return out


def model(input_df):
    return input_df["questions"].map(qa).tolist()

In [60]:
# Define the env variable
os.environ["OPENAI_API_KEY"] = OPENAI_API_KEY

# Judge model for relevance
judge_model_uri = "openai:/gpt-4o-mini"
relevance_metric = relevance(model=judge_model_uri)

In [62]:
with mlflow.start_run():
    results = mlflow.evaluate(
        model,
        eval_df,
        model_type="question-answering",
        evaluators="default",
        predictions="result",
        extra_metrics=[relevance_metric, mlflow.metrics.latency()],
        evaluator_config={
            "col_mapping": {
                "inputs": "questions",
                "context": "source_documents",
            }
        },
    )
    print(results.metrics)

display(results.tables["eval_results_table"])

Batches:   0%|          | 0/1 [00:00<?, ?it/s]

Batches:   0%|          | 0/1 [00:00<?, ?it/s]

Batches:   0%|          | 0/1 [00:00<?, ?it/s]

Batches:   0%|          | 0/1 [00:00<?, ?it/s]

  0%|          | 0/1 [00:00<?, ?it/s]

  0%|          | 0/4 [00:00<?, ?it/s]

{'latency/mean': 1.5651635527610779, 'latency/variance': 0.10542284979147709, 'latency/p90': 1.9355256319046021, 'relevance/v1/mean': 5.0, 'relevance/v1/variance': 0.0, 'relevance/v1/p90': 5.0}


Downloading artifacts:   0%|          | 0/1 [00:00<?, ?it/s]

Downloading artifacts:   0%|          | 0/1 [00:00<?, ?it/s]

Unnamed: 0,questions,outputs,source_documents,latency,token_count,relevance/v1/score,relevance/v1/justification
0,What is MLflow?,MLflow is an open-source platform designed to ...,[MLflow: A Tool for Managing the Machine Learn...,1.242014,66,5,The output comprehensively answers the questio...
1,What is Databricks?,Databricks is a cloud-based platform designed ...,[get_deploy_client client = get_deploy_client(...,1.293358,98,5,The output comprehensively answers the questio...
2,How to serve a model on Databricks?,"To serve a model on Databricks, use MLflow to ...",[enterprise user and willing to productionize ...,1.680429,105,5,The output comprehensively addresses the quest...
3,How to enable MLflow Autologging for my worksp...,To enable MLflow autologging by default for yo...,[Automatic Logging with MLflow Tracking\nAuto ...,2.044853,106,5,The output directly addresses the question abo...
