In [None]:
# Importing libraries
import mlflow as ml
import openai
import os
import pandas as pd

from getpass import getpass

In [None]:
# Setting up the OpenAI API key
OPENAI_API_KEY = 'your-api-key-here'

In [None]:
# Setting up remote tracking server
ml.set_tracking_uri('http://user:password@ip:port')

In [None]:
# Creating an experiment and setting it
ml.create_experiment("test_experiment")
ml.set_experiment("test_experiment")

In [None]:
# Starting a run
ml.start_run(run_name="Test")

# Creating an evaluation data
eval_data = pd.DataFrame(
    {
        "inputs": [
            "What is MLflow?",
            "What is Spark?",
        ],
        "ground_truth": [
            "MLflow is an open-source platform for managing the end-to-end machine learning (ML) "
            "lifecycle. It was developed by Databricks, a company that specializes in big data and "
            "machine learning solutions. MLflow is designed to address the challenges that data "
            "scientists and machine learning engineers face when developing, training, and deploying "
            "machine learning models.",
            "Apache Spark is an open-source, distributed computing system designed for big data "
            "processing and analytics. It was developed in response to limitations of the Hadoop "
            "MapReduce computing model, offering improvements in speed and ease of use. Spark "
            "provides libraries for various tasks such as data ingestion, processing, and analysis "
            "through its components like Spark SQL for structured data, Spark Streaming for "
            "real-time data processing, and MLlib for machine learning tasks",
        ],
    }
)

# Defining the system prompt
system_prompt = "Answer the following question in two sentences"

# Logging the model information
logged_model_info = ml.openai.log_model(
    model="gpt-4",
    task=openai.ChatCompletion,
    artifact_path="model",
    messages=[
        {"role": "system", "content": system_prompt},
        {"role": "user", "content": "{question}"},
    ],
)

# Evaluating the model
results = ml.evaluate(
    logged_model_info.model_uri,
    eval_data,
    targets="ground_truth",
    model_type="question-answering",
)
print(f"See aggregated evaluation results below: \n{results.metrics}")

# Logging the evaluation table
eval_table = results.tables["eval_results_table"]
print(f"See evaluation table below: \n{eval_table}")

# Ending the run
ml.end_run()