In [1]:
import pandas as pd
from sklearn.datasets import load_iris
from sklearn.ensemble import RandomForestClassifier
from sklearn.metrics import accuracy_score
from sklearn.model_selection import train_test_split
import mlflow


iris = load_iris()
df = pd.DataFrame(data=iris.data, columns=iris.feature_names)
df['target'] = iris.target
df.to_csv('iris_data.csv')

X_train, X_test, y_train, y_test = train_test_split(df[iris.feature_names], iris.target, test_size=0.2, random_state=42)


rf = RandomForestClassifier(n_estimators=100, random_state=42)
rf.fit(X_train, y_train)

y_pred = rf.predict(X_test)
accuracy = accuracy_score(y_test, y_pred)

eval_data = X_test
eval_data['label'] = y_test

# Log the experiment using MLflow
run_name = 'iris_experiment'
with mlflow.start_run(run_name=run_name):

    mlflow.sklearn.log_model(rf, 'iris_model')
    model_uri = mlflow.get_artifact_uri('iris_model')
    
    result = mlflow.evaluate(
        model_uri,
        eval_data,
        targets="label",
        model_type="classifier",
        evaluators=["default"],
    )

    

2023/04/09 20:31:39 INFO mlflow.models.evaluation.base: Evaluating the model with the default evaluator.
2023/04/09 20:31:39 INFO mlflow.models.evaluation.default_evaluator: The evaluation dataset is inferred as multiclass dataset, number of classes is inferred as 3
2023/04/09 20:31:41 INFO mlflow.models.evaluation.default_evaluator: Shap explainer Exact is used.
Unable to serialize underlying model using MLflow, will use SHAP serialization
No data for colormapping provided via 'c'. Parameters 'vmin', 'vmax' will be ignored
No data for colormapping provided via 'c'. Parameters 'vmin', 'vmax' will be ignored
