In [None]:
# MLFLOW

# Track Application Versions with Externally Managed Code
# This guide demonstrates MLflow's primary approach for versioning GenAI applications when your core application code resides 
# in an external version control system (VCS) like Git. In this workflow, an MLflow LoggedModel acts as a metadata hub, 
# linking a conceptual application version to its specific external code (e.g., a Git commit), configurations, 
# and associated MLflow entities like traces and evaluation results.

# The mlflow.set_active_model() function is key to this process, establishing a context so that subsequent traces and
# evaluations are automatically associated with the correct application version.


# Models from code makes model transfering repducable between environments...
# https://mlflow.org/docs/latest/ml/model/models-from-code/


# DATABRIKS

# In reality looks like we should be able to log agents
# https://docs.databricks.com/aws/en/generative-ai/agent-framework/log-agent 

# Code-based logging
# Databricks recommends using MLflow's Models from Code functionality when logging agents.

# In this approach, the agent's code is captured as a Python file, and the Python environment is captured as a list of packages.
# ' When the agent is deployed, the Python environment is restored, and the agent's code is executed to load the agent into memory 
# so it can be invoked when the endpoint is called.

# You can couple this approach with the use of pre-deployment validation APIs like mlflow.models.predict() to ensure 
# that the agent runs reliably when deployed for serving.

# To see an example of code-based logging, see ChatAgent authoring example notebooks.

# CODE BASED LOGGING
# The following instructions and code sample show you how to log an agent with LangChain.

# Create a notebook or Python file with your code. For this example, the notebook or file is named agent.py. The notebook or file must contain a LangChain agent, referred to here as lc_agent.

# Include mlflow.models.set_model(lc_agent) in the notebook or file.

# Create a new notebook to serve as the driver notebook (called driver.py in this example).

# In the driver notebook, use the following code to run agent.py and log the results to an MLflow model:

# Python
# mlflow.langchain.log_model(lc_model="/path/to/agent.py", resources=list_of_databricks_resources)

# The resources parameter declares Databricks-managed resources needed to serve the agent, such as a vector search index or serving endpoint that serves a foundation model. For more information, see Authentication for Databricks resources.

# Deploy the model. See Deploy an agent for generative AI applications.

# When the serving environment is loaded, agent.py is executed.

# When a serving request comes in, lc_agent.invoke(...) is called.

# import mlflow

# code_path = "/Workspace/Users/first.last/agent.py"
# config_path = "/Workspace/Users/first.last/config.yml"

# # Input example used by MLflow to infer Model Signature
# input_example = {
#   "messages": [
#     {
#       "role": "user",
#       "content": "What is Retrieval-augmented Generation?",
#     }
#   ]
# }

# # example using langchain
# with mlflow.start_run():
#   logged_agent_info = mlflow.langchain.log_model(
#     lc_model=code_path,
#     model_config=config_path, # If you specify this parameter, this configuration is used by agent code. The development_config is overwritten.
#     artifact_path="agent", # This string is used as the path inside the MLflow model where artifacts are stored
#     input_example=input_example, # Must be a valid input to the agent
#     example_no_conversion=True, # Required
#   )

# print(f"MLflow Run: {logged_agent_info.run_id}")
# print(f"Model URI: {logged_agent_info.model_uri}")

# # To verify that the model has been logged correctly, load the agent and call `invoke`:
# model = mlflow.langchain.load_model(logged_agent_info.model_uri)
# model.invoke(example)


# REGISTER IN UNITY CATALOG

# import mlflow

# mlflow.set_registry_uri("databricks-uc")

# catalog_name = "test_catalog"
# schema_name = "schema"
# model_name = "agent_name"

# model_name = catalog_name + "." + schema_name + "." + model_name
# uc_model_info = mlflow.register_model(model_uri=logged_agent_info.model_uri, name=model_name)


# DEPLOY TO SERVING FROM UNITY CATALOG

# from databricks.agents import list_deployments, get_deployments, delete_deployment

# # Print all current deployments
# deployments = list_deployments()
# print(deployments)

# # Get the deployment for a specific agent model name and version
# agent_model_name = ""  # Set to your Unity Catalog model name
# agent_model_version = 1  # Set to your agent model version
# deployment = get_deployments(model_name=agent_model_name, model_version=agent_model_version)

# # Delete an agent deployment
# delete_deployment(model_name=agent_model_name, model_version=agent_model_version)

# CHAT API ON DATABRICKS
# https://docs.databricks.com/aws/en/generative-ai/agent-framework/chat-app

# CHAT agent examples
# https://docs.databricks.com/aws/en/generative-ai/agent-framework/author-agent#chatagent-examples

In [2]:
import os
import openai
import mlflow
client = openai.OpenAI(
    api_key=os.getenv('OPENAI_API_KEY'),
    base_url=os.getenv('OPENAI_BASE_URL')
)
mlflow.set_tracking_uri("http://127.0.0.1:5001")

In [3]:
import mlflow
import openai
import subprocess


# Configure MLflow Tracking
mlflow.set_experiment("my-genai-app")

# Get current git commit hash
git_commit = (
    subprocess.check_output(["git", "rev-parse", "HEAD"]).decode("ascii").strip()[:8]
)

# Define your application version using the git commit
app_name = "customer_support_agent"
logged_model_name = f"{app_name}-{git_commit}"

# Set the active model context - traces will be linked to this
mlflow.set_active_model(name=logged_model_name)

# Enable autologging for OpenAI, which automatically logs traces
mlflow.openai.autolog()




# Define and test your agent code - traces are automatically linked
client = openai.OpenAI()
questions = [
    "How do I reset my password?",
    "What are your business hours?",
    "Can I get a refund for my last order?",
    "Where can I find the user manual for product model number 15869?",
    "I'm having trouble with the payment page, can you help?",
]
for question in questions:
    response = client.chat.completions.create(
        model="gpt-4o-mini",
        messages=[{"role": "user", "content": question}],
        temperature=0.7,
        max_tokens=1000,
    )

2025/07/06 20:52:51 INFO mlflow.tracking.fluent: Experiment with name 'my-genai-app' does not exist. Creating a new experiment.
2025/07/06 20:52:51 INFO mlflow.tracking.fluent: LoggedModel with name 'customer_support_agent-48c87a7e' does not exist, creating one...
2025/07/06 20:52:51 INFO mlflow.tracking.fluent: Active model is set to the logged model with ID: m-d11b143371234fa88bbef6241591eee3


# PROMPT

In [4]:
import mlflow

system_prompt = mlflow.genai.register_prompt(
    name="chatbot_prompt",
    template="You are a chatbot that can answer questions about IT. Answer this question: {{question}}",
    commit_message="Initial version of chatbot",
)

2025/07/06 20:57:50 INFO mlflow.store.model_registry.abstract_store: Waiting up to 300 seconds for prompt version to finish creation. Prompt name: chatbot_prompt, version 1


In [7]:
system_prompt.to_single_brace_format()

'You are a chatbot that can answer questions about IT. Answer this question: {question}'

In [10]:
from langchain.schema.output_parser import StrOutputParser
from langchain_core.prompts import ChatPromptTemplate
from langchain_openai import ChatOpenAI

# Convert MLflow prompt to LangChain format
prompt = ChatPromptTemplate.from_template(system_prompt.to_single_brace_format())

# Build the chain: prompt → LLM → output parser
chain = prompt | ChatOpenAI(temperature=0.7) | StrOutputParser()

# Test the chain
question = "What is MLflow?"
print(chain.invoke({"question": question}))

MLflow is an open-source platform for managing the end-to-end machine learning lifecycle. It allows data scientists and machine learning engineers to track and reproduce experiments, package and share models, and deploy models into production. MLflow supports multiple programming languages and integrates with popular machine learning libraries and frameworks.


In [11]:
# Set the active model for linking traces
mlflow.set_active_model(name="langchain_model")

# Enable autologging - all traces will be automatically linked to the active model
mlflow.langchain.autolog()

2025/07/06 21:20:25 INFO mlflow.tracking.fluent: LoggedModel with name 'langchain_model' does not exist, creating one...
2025/07/06 21:20:25 INFO mlflow.tracking.fluent: Active model is set to the logged model with ID: m-70c4cd6d38504188ae1a06af880d171f


In [12]:
questions = [
    {"question": "What is MLflow Tracking and how does it work?"},
    {"question": "What is Unity Catalog?"},
    {"question": "What are user-defined functions (UDFs)?"},
]
outputs = []

for question in questions:
    outputs.append(chain.invoke(question))

# Verify traces are linked to the active model
active_model_id = mlflow.get_active_model_id()
mlflow.search_traces(model_id=active_model_id)

Unnamed: 0,trace_id,trace,client_request_id,state,request_time,execution_duration,request,response,trace_metadata,tags,spans,assessments
0,8fd73ef223ee4065b3ed01f504f9f1ad,Trace(trace_id=8fd73ef223ee4065b3ed01f504f9f1ad),,TraceState.OK,1751826058353,1821,{'question': 'What are user-defined functions ...,User-defined functions (UDFs) are functions cr...,"{'mlflow.trace.tokenUsage': '{""input_tokens"": ...",{'mlflow.artifactLocation': 'mlflow-artifacts:...,"[{'trace_id': 'kn84i4hifs5+Ue+H5JGl5A==', 'spa...",[]
1,948563b499104315892b22b44d1b5735,Trace(trace_id=948563b499104315892b22b44d1b5735),,TraceState.OK,1751826056547,1793,{'question': 'What is Unity Catalog?'},Unity Catalog is a centralized repository of s...,"{'mlflow.trace.tokenUsage': '{""input_tokens"": ...",{'mlflow.artifactLocation': 'mlflow-artifacts:...,"[{'trace_id': 'KwC+YCLIpMpKrkWYezdBtA==', 'spa...",[]
2,f450a3d985eb448698ed689d902d72c6,Trace(trace_id=f450a3d985eb448698ed689d902d72c6),,TraceState.OK,1751826053143,3381,{'question': 'What is MLflow Tracking and how ...,MLflow Tracking is a component of the open-sou...,"{'mlflow.trace.tokenUsage': '{""input_tokens"": ...",{'mlflow.artifactLocation': 'mlflow-artifacts:...,"[{'trace_id': 'nk02si9JxsTk9KUS1sVqpg==', 'spa...",[]


In [None]:
# import pandas as pd
# eval_df = pd.DataFrame(
#     {
#         "inputs": questions,
#         "expected_response": [
#             """MLflow Tracking is a key component of the MLflow platform designed to record and manage machine learning experiments. It enables data scientists and engineers to log parameters, code versions, metrics, and artifacts in a systematic way, facilitating experiment tracking and reproducibility.

# How It Works:

# At the heart of MLflow Tracking is the concept of a run, which is an execution of a machine learning code. Each run can log the following:

# Parameters: Input variables or hyperparameters used in the model (e.g., learning rate, number of trees). Metrics: Quantitative measures to evaluate the model's performance (e.g., accuracy, loss). Artifacts: Output files like models, datasets, or images generated during the run. Source Code: The version of the code or Git commit hash used. These logs are stored in a tracking server, which can be set up locally or on a remote server. The tracking server uses a backend storage (like a database or file system) to keep a record of all runs and their associated data.

# Users interact with MLflow Tracking through its APIs available in multiple languages (Python, R, Java, etc.). By invoking these APIs in the code, you can start and end runs, and log data as the experiment progresses. Additionally, MLflow offers autologging capabilities for popular machine learning libraries, automatically capturing relevant parameters and metrics without manual code changes.

# The logged data can be visualized using the MLflow UI, a web-based interface that displays all experiments and runs. This UI allows you to compare runs side-by-side, filter results, and analyze performance metrics over time. It aids in identifying the best models and understanding the impact of different parameters.

# By providing a structured way to record experiments, MLflow Tracking enhances collaboration among team members, ensures transparency, and makes it easier to reproduce results. It integrates seamlessly with other MLflow components like Projects and Model Registry, offering a comprehensive solution for managing the machine learning lifecycle.""",
#             """Unity Catalog is a feature in Databricks that allows you to create a centralized inventory of your data assets, such as tables, views, and functions, and share them across different teams and projects. It enables easy discovery, collaboration, and reuse of data assets within your organization.

# With Unity Catalog, you can:

# 1. Create a single source of truth for your data assets: Unity Catalog acts as a central repository of all your data assets, making it easier to find and access the data you need.
# 2. Improve collaboration: By providing a shared inventory of data assets, Unity Catalog enables data scientists, engineers, and other stakeholders to collaborate more effectively.
# 3. Foster reuse of data assets: Unity Catalog encourages the reuse of existing data assets, reducing the need to create new assets from scratch and improving overall efficiency.
# 4. Enhance data governance: Unity Catalog provides a clear view of data assets, enabling better data governance and compliance.

# Unity Catalog is particularly useful in large organizations where data is scattered across different teams, projects, and environments. It helps create a unified view of data assets, making it easier to work with data across different teams and projects.""",
#             """User-defined functions (UDFs) in the context of Databricks and Apache Spark are custom functions that you can create to perform specific tasks on your data. These functions are written in a programming language such as Python, Java, Scala, or SQL, and can be used to extend the built-in functionality of Spark.

# UDFs can be used to perform complex data transformations, data cleaning, or to apply custom business logic to your data. Once defined, UDFs can be invoked in SQL queries or in DataFrame transformations, allowing you to reuse your custom logic across multiple queries and applications.

# To use UDFs in Databricks, you first need to define them in a supported programming language, and then register them with the SparkSession. Once registered, UDFs can be used in SQL queries or DataFrame transformations like any other built-in function.""",
#         ],
#         "outputs": outputs,
#     }
# )


# from mlflow.genai.scorers import Correctness, RelevanceToQuery, Guidelines
# from mlflow.metrics.genai import answer_correctness

# # Run evaluation with GenAI metrics
# result = mlflow.genai.evaluate(
#     data=eval_df,
#     scorers=[
#         Correctness(),
#         RelevanceToQuery(),
#     ],
# )

# # View evaluation results
# result.tables["eval_results"]

In [21]:
# Some issues are here

# import mlflow.langchain
# import pandas as pd
# eval_df = pd.DataFrame(
#     {
#         "question": [question['question'] for question in questions],
#         "expected_response": [
#             """MLflow Tracking is a key component of the MLflow platform designed to record and manage machine learning experiments. It enables data scientists and engineers to log parameters, code versions, metrics, and artifacts in a systematic way, facilitating experiment tracking and reproducibility.

# How It Works:

# At the heart of MLflow Tracking is the concept of a run, which is an execution of a machine learning code. Each run can log the following:

# Parameters: Input variables or hyperparameters used in the model (e.g., learning rate, number of trees). Metrics: Quantitative measures to evaluate the model's performance (e.g., accuracy, loss). Artifacts: Output files like models, datasets, or images generated during the run. Source Code: The version of the code or Git commit hash used. These logs are stored in a tracking server, which can be set up locally or on a remote server. The tracking server uses a backend storage (like a database or file system) to keep a record of all runs and their associated data.

# Users interact with MLflow Tracking through its APIs available in multiple languages (Python, R, Java, etc.). By invoking these APIs in the code, you can start and end runs, and log data as the experiment progresses. Additionally, MLflow offers autologging capabilities for popular machine learning libraries, automatically capturing relevant parameters and metrics without manual code changes.

# The logged data can be visualized using the MLflow UI, a web-based interface that displays all experiments and runs. This UI allows you to compare runs side-by-side, filter results, and analyze performance metrics over time. It aids in identifying the best models and understanding the impact of different parameters.

# By providing a structured way to record experiments, MLflow Tracking enhances collaboration among team members, ensures transparency, and makes it easier to reproduce results. It integrates seamlessly with other MLflow components like Projects and Model Registry, offering a comprehensive solution for managing the machine learning lifecycle.""",
#             """Unity Catalog is a feature in Databricks that allows you to create a centralized inventory of your data assets, such as tables, views, and functions, and share them across different teams and projects. It enables easy discovery, collaboration, and reuse of data assets within your organization.

# With Unity Catalog, you can:

# 1. Create a single source of truth for your data assets: Unity Catalog acts as a central repository of all your data assets, making it easier to find and access the data you need.
# 2. Improve collaboration: By providing a shared inventory of data assets, Unity Catalog enables data scientists, engineers, and other stakeholders to collaborate more effectively.
# 3. Foster reuse of data assets: Unity Catalog encourages the reuse of existing data assets, reducing the need to create new assets from scratch and improving overall efficiency.
# 4. Enhance data governance: Unity Catalog provides a clear view of data assets, enabling better data governance and compliance.

# Unity Catalog is particularly useful in large organizations where data is scattered across different teams, projects, and environments. It helps create a unified view of data assets, making it easier to work with data across different teams and projects.""",
#             """User-defined functions (UDFs) in the context of Databricks and Apache Spark are custom functions that you can create to perform specific tasks on your data. These functions are written in a programming language such as Python, Java, Scala, or SQL, and can be used to extend the built-in functionality of Spark.

# UDFs can be used to perform complex data transformations, data cleaning, or to apply custom business logic to your data. Once defined, UDFs can be invoked in SQL queries or in DataFrame transformations, allowing you to reuse your custom logic across multiple queries and applications.

# To use UDFs in Databricks, you first need to define them in a supported programming language, and then register them with the SparkSession. Once registered, UDFs can be used in SQL queries or DataFrame transformations like any other built-in function.""",
#         ]
#     }
# )

# from mlflow.metrics.genai import answer_correctness

# with mlflow.start_run() as run:
#     # system_prompt = "Answer the following question in two sentences"
#     # Wrap "gpt-4" as an MLflow model.
#     logged_model_info = mlflow.langchain.log_model(
#         lc_model=chain,
#         # task=openai.chat.completions,
#         name="langchain_model",
#         # messages=[
#         #     {"role": "system", "content": system_prompt},
#         #     {"role": "user", "content": "{question}"},
#         # ],
#     )

#     # Use predefined question-answering metrics to evaluate our model.
#     results = mlflow.evaluate(
#         logged_model_info.model_uri,
#         eval_df,
#         targets="expected_response",
#         model_type="question-answering",
#         extra_metrics=[
#             answer_correctness(),
#             # latency(),
#         ],
#     )
#     print(f"See aggregated evaluation results below: \n{results.metrics}")

#     # Evaluation result for each data record is available in `results.tables`.
#     eval_table = results.tables["eval_results_table"]
#     print(f"See evaluation table below: \n{eval_table}")