# MLflow Tracing and Feedback Test

This notebook tests MLflow tracing functionality and feedback logging.


In [1]:
"""Test script to debug MLflow feedback logging."""

import mlflow
from mlflow.entities import AssessmentSource, AssessmentSourceType
from src.config import DatabricksConfig


## 1. Load Configuration


In [2]:
print("=" * 80)
print("MLflow Feedback Logging Test")
print("=" * 80)

# Load config
config = DatabricksConfig.from_config()
print(f"\n📋 Experiment name: {config.experiment_name}")
print(f"📋 Databricks host: {config.host}")


MLflow Feedback Logging Test

📋 Experiment name: /Users/jag.jasani@databricks.com/mas-1ab024e9-dev-experiment
📋 Databricks host: https://e2-demo-field-eng.cloud.databricks.com


## 2. Set Up MLflow Tracking


## 3. Get Experiment Details
mlflow

In [5]:
config.experiment_name

'/Users/jag.jasani@databricks.com/mas-1ab024e9-dev-experiment'

In [34]:
mlflow.set_tracking_uri("databricks://aws")
print(f"\n✅ Set tracking URI to: databricks")



✅ Set tracking URI to: databricks


In [40]:
import os
os.environ['MLFLOW_TRACKING_URI'] = "databricks://aws"

In [None]:
import os
if "MLFLOW_TRACKING_URI" in os.environ:
    mlflow.set_tracking_uri(os.environ['MLFLOW_TRACKING_URI'])
else:
    mlflow.set_tracking_uri("databricks")

In [44]:
mlflow.get_tracking_uri()

'databricks'

In [36]:
traces = mlflow.search_traces(experiment_ids=['2894186667355780'],max_results=1)
traces

  traces = mlflow.search_traces(experiment_ids=['2894186667355780'],max_results=1)


Unnamed: 0,trace_id,trace,client_request_id,state,request_time,execution_duration,request,response,trace_metadata,tags,spans,assessments
0,tr-2528dea4d8d93b0f726ca9765258bc8f,"{""info"": {""trace_id"": ""tr-2528dea4d8d93b0f726c...",1a4f511b-01f6-4d65-8b7e-d871f510e749,OK,1761665981297,34973,"{'request': {'tool_choice': None, 'truncation'...","[{'type': 'response.output_item.done', 'custom...",{'mlflow.databricks.modelServingEndpointName':...,{'mlflow.user': '36e54aae-60ee-4b37-b711-d7559...,"[{'trace_id': 'JSjepNjZOw9ybKl2Uli8jw==', 'spa...",[]


In [29]:
traces = mlflow.search_traces(experiment_ids=['3036037102232956'],max_results=1)
traces

  traces = mlflow.search_traces(experiment_ids=['3036037102232956'],max_results=1)


MlflowException: Experiment 3036037102232956 does not exist.

In [19]:
mlflow.search_experiments(max_results=3)

MlflowException: Detected Unity Catalog tracking URI 'databricks-uc'. Setting the tracking URI to a Unity Catalog backend is not supported in the current version of the MLflow client (3.5.1). Please specify a different tracking URI via mlflow.set_tracking_uri, with one of the supported schemes: ['', 'file', 'databricks', 'http', 'https', 'postgresql', 'mysql', 'sqlite', 'mssql']. If you're trying to access models in the Unity Catalog, please upgrade to the latest version of the MLflow Python client, then specify a Unity Catalog model registry URI via mlflow.set_registry_uri('databricks-uc') or mlflow.set_registry_uri('databricks-uc://profile_name') where 'profile_name' is the name of the Databricks CLI profile to use for authentication. A OSS Unity Catalog model registry URI can also be specified via mlflow.set_registry_uri('uc:http://localhost:8080').Be sure to leave the registry URI configured to use one of the supportedschemes listed above.

In [3]:
# Get experiment
try:
    experiment = mlflow.get_experiment_by_name(config.experiment_name)
    if experiment:
        print(f"\n✅ Found experiment:")
        print(f"   - Name: {experiment.name}")
        print(f"   - ID: {experiment.experiment_id}")
        print(f"   - Artifact Location: {experiment.artifact_location}")
    else:
        print(f"\n❌ Experiment not found: {config.experiment_name}")
        raise ValueError(f"Experiment not found: {config.experiment_name}")
except Exception as e:
    print(f"\n❌ Error getting experiment: {e}")
    import traceback
    traceback.print_exc()
    raise



❌ Experiment not found: /Users/jag.jasani@databricks.com/mas-1ab024e9-dev-experiment

❌ Error getting experiment: Experiment not found: /Users/jag.jasani@databricks.com/mas-1ab024e9-dev-experiment


Traceback (most recent call last):
  File "/var/folders/2x/pf13chqx4614qjlmjdgmndv00000gp/T/ipykernel_128/3498501235.py", line 11, in <module>
    raise ValueError(f"Experiment not found: {config.experiment_name}")
ValueError: Experiment not found: /Users/jag.jasani@databricks.com/mas-1ab024e9-dev-experiment


ValueError: Experiment not found: /Users/jag.jasani@databricks.com/mas-1ab024e9-dev-experiment

## 4. Search for Traces


In [None]:
# Search for traces
print(f"\n🔍 Searching for traces in experiment {experiment.experiment_id}...")
try:
    traces = mlflow.search_traces(
        experiment_ids=[experiment.experiment_id], 
        max_results=5, 
        order_by=["timestamp_ms DESC"]
    )

    if traces is not None and not traces.empty:
        print(f"\n✅ Found {len(traces)} traces:")
        print("\n" + "=" * 80)
        for idx, trace_row in traces.iterrows():
            trace_id = trace_row.get("request_id")
            timestamp = trace_row.get("timestamp_ms")
            status = trace_row.get("status")
            print(f"\nTrace {idx + 1}:")
            print(f"   - Request ID: {trace_id}")
            print(f"   - Timestamp: {timestamp}")
            print(f"   - Status: {status}")

            # Show all available columns
            print(f"   - Available columns: {list(trace_row.index)}")
        print("=" * 80)
    else:
        print(f"\n⚠️ No traces found in experiment")
        print(f"\nThis means the agent is not logging traces to this experiment.")
        print(f"The traces might be in a different experiment or not being logged at all.")
        traces = None

except Exception as e:
    print(f"\n❌ Error searching traces: {e}")
    import traceback
    traceback.print_exc()
    raise


## 5. Test Feedback Logging

Test logging feedback to the most recent trace.


In [None]:
if traces is not None and not traces.empty:
    # Get the most recent trace
    most_recent = traces.iloc[0]
    trace_id = most_recent.get("request_id")

    print(f"\n📋 Most recent trace ID: {trace_id}")

    # Test logging feedback to this trace
    print(f"\n🧪 Testing feedback logging to trace: {trace_id}")

    user_id = "test_user@example.com"

    # Test 1: Log satisfaction feedback
    print(f"\n1️⃣ Testing log_feedback (user_satisfaction)...")
    try:
        mlflow.log_feedback(
            trace_id=trace_id,
            name="user_satisfaction",
            value=True,
            source=AssessmentSource(
                source_type=AssessmentSourceType.HUMAN, 
                source_id=user_id
            ),
        )
        print(f"   ✅ Successfully logged satisfaction feedback")
    except Exception as e:
        print(f"   ❌ Error logging satisfaction feedback: {e}")
        import traceback
        traceback.print_exc()

    # Test 2: Log review request
    print(f"\n2️⃣ Testing log_feedback (flagged_for_review)...")
    try:
        mlflow.log_feedback(
            trace_id=trace_id,
            name="flagged_for_review",
            value=True,
            source=AssessmentSource(
                source_type=AssessmentSourceType.HUMAN, 
                source_id=user_id
            ),
        )
        print(f"   ✅ Successfully logged review request")
    except Exception as e:
        print(f"   ❌ Error logging review request: {e}")
        import traceback
        traceback.print_exc()

    # Test 3: Log correction/expectation
    print(f"\n3️⃣ Testing log_expectation (user_correction)...")
    try:
        mlflow.log_expectation(
            trace_id=trace_id,
            name="user_correction",
            value=["This is a test correction from the debug script"],
            source=AssessmentSource(
                source_type=AssessmentSourceType.HUMAN, 
                source_id=user_id
            ),
        )
        print(f"   ✅ Successfully logged correction")
    except Exception as e:
        print(f"   ❌ Error logging correction: {e}")
        import traceback
        traceback.print_exc()
else:
    print("\n⚠️ Skipping feedback tests - no traces available")


## 6. Verify Feedback Was Logged

Retrieve the trace again to check if feedback was added.
