In [20]:
# Importing Libraries
from sklearn.datasets import make_classification
from sklearn.model_selection import train_test_split
from sklearn.ensemble import RandomForestClassifier
import pandas as pd
import mlflow

In [21]:
# Creating MLFlow Experiment

mlflow.set_tracking_uri(uri="http://127.0.0.1:5000")
experiment_name= "classic_evaluation"
mlflow.set_experiment(experiment_name)

<Experiment: artifact_location='mlflow-artifacts:/159889069568961193', creation_time=1734243043122, experiment_id='159889069568961193', last_update_time=1734243043122, lifecycle_stage='active', name='classic_evaluation', tags={}>

In [22]:
# Creating a sample dataset

x, y= make_classification(n_samples=1000, n_features=5, n_classes=2, random_state=42)

x_df=pd.DataFrame(x, columns=[f"feature_{i+1}"for i in range(x.shape[1])])
y_df=pd.DataFrame(y, columns=['target'])
x_train, x_test,y_train,y_test = train_test_split(x_df, y_df, test_size=0.2, random_state=42)


In [23]:
# Random Forest Classifier
rfc= RandomForestClassifier(n_estimators=1)
rfc.fit(x_train, y_train)
predictions=rfc.predict(x_test)

  return fit_method(estimator, *args, **kwargs)


In [24]:
eval_df= pd.DataFrame({
    "target": y_test.to_numpy().flatten(),
    "predictions": predictions

})
print(eval_df.head())

   target  predictions
0       1            1
1       1            1
2       1            1
3       1            1
4       0            1


In [25]:
#using the eval_df of evaluating model

result = mlflow.evaluate(
    data= eval_df,
    model_type="classifier",
    targets="target",
    predictions="predictions"
)

2024/12/15 20:57:17 INFO mlflow.models.evaluation.evaluators.classifier: The evaluation dataset is inferred as binary dataset, positive label is 1, negative label is 0.
2024/12/15 20:57:17 INFO mlflow.models.evaluation.default_evaluator: Testing metrics on first row...


🏃 View run unique-pig-136 at: http://127.0.0.1:5000/#/experiments/159889069568961193/runs/3375410416d34d4196abf75aea66f4fd
🧪 View experiment at: http://127.0.0.1:5000/#/experiments/159889069568961193


In [26]:
# Evaluating the model using estimator

# Using model as a function


def random_forest_clf(model_input):
    return rfc.predict(model_input)


In [27]:
eval_df_for_model= x_test.copy()
eval_df_for_model["target"]=y_test

In [28]:
eval_df_for_model.head()

Unnamed: 0,feature_1,feature_2,feature_3,feature_4,feature_5,target
521,1.250932,-1.064291,-2.238231,-0.914547,1.261286,1
737,-0.196283,0.19082,-0.243384,0.154804,-0.256094,1
740,2.659138,-0.265773,1.072978,-0.996758,-2.195564,1
660,0.087778,-0.021011,-0.66778,-0.038709,-0.042586,1
411,-0.662457,0.741043,-0.35834,0.568499,-1.101298,0


In [29]:
result= mlflow.evaluate(
    model=random_forest_clf,
    data=eval_df_for_model,
    model_type="classifier",
    targets="target",
    predictions = "predictions"

)

2024/12/15 20:57:25 INFO mlflow.models.evaluation.evaluators.classifier: The evaluation dataset is inferred as binary dataset, positive label is 1, negative label is 0.
2024/12/15 20:57:25 INFO mlflow.models.evaluation.default_evaluator: Testing metrics on first row...


🏃 View run illustrious-auk-883 at: http://127.0.0.1:5000/#/experiments/159889069568961193/runs/6e21099c8a3e4901b63c9fa636e3544e
🧪 View experiment at: http://127.0.0.1:5000/#/experiments/159889069568961193


In [30]:
result.metrics

{'true_negatives': 81,
 'false_positives': 16,
 'false_negatives': 17,
 'true_positives': 86,
 'example_count': 200,
 'accuracy_score': 0.835,
 'recall_score': 0.8349514563106796,
 'precision_score': 0.8431372549019608,
 'f1_score': 0.8390243902439024}

In [31]:
# Addition of extra Metrics

from mlflow.metrics import make_metric
from sklearn.metrics import f1_score


In [32]:
def custom_accuracy(df,__builtin_metrics):
    targets= df["target"]
    predictions =df["prediction"]
    return sum(targets==predictions)/len(targets)

def custom_f1_score(df,__builtin_metrics):
    targets=df["target"]
    predictions =df["prediction"]
    return f1_score(targets, predictions, average="weighted")

In [33]:
custom_metric_accuracy=make_metric(
    eval_fn=custom_accuracy,
    name="custom_accuracy",
    greater_is_better = True
)

custom_metric_f1_score=make_metric(
    eval_fn=custom_f1_score,
    name="custom_f1_score",
    greater_is_better = True
)

In [34]:
result = mlflow.evaluate(
    model = random_forest_clf,
    data= eval_df_for_model,
    model_type="classifier",
    targets="target",
    predictions = "predictions",
    extra_metrics=[custom_metric_accuracy,custom_metric_f1_score]
)

2024/12/15 20:57:32 INFO mlflow.models.evaluation.evaluators.classifier: The evaluation dataset is inferred as binary dataset, positive label is 1, negative label is 0.
2024/12/15 20:57:32 INFO mlflow.models.evaluation.default_evaluator: Testing metrics on first row...


🏃 View run resilient-shrike-248 at: http://127.0.0.1:5000/#/experiments/159889069568961193/runs/0f15f20db3c945ae893e3026662627d3
🧪 View experiment at: http://127.0.0.1:5000/#/experiments/159889069568961193


In [36]:
print(eval_df_for_model.columns)

Index(['feature_1', 'feature_2', 'feature_3', 'feature_4', 'feature_5',
       'target'],
      dtype='object')


In [37]:
result.artifacts

{'confusion_matrix': ImageEvaluationArtifact(uri='mlflow-artifacts:/159889069568961193/0f15f20db3c945ae893e3026662627d3/artifacts/confusion_matrix.png')}

In [44]:
# Adding custom artifacts

from sklearn.metrics import PrecisionRecallDisplay
import matplotlib.pyplot as plt



In [47]:
def custom_precision_recall_curve(df, __builtin_metrics, _artifacts_dir):
    targets =df["target"],
    predictions=df["prediction"],
    pr_display = PrecisionRecallDisplay.from_predictions(targets, predictions)
    return {"precision_recall_curve":pr_display.figure_}

In [48]:
result = mlflow.evaluate(
    data=eval_df,
    model_type= "classifier",
    targets ="target",
    predictions ="predictions",
    extra_metrics=[custom_metric_accuracy,custom_metric_f1_score],
    custom_artifacts = [custom_precision_recall_curve]

)

2024/12/15 21:39:51 INFO mlflow.models.evaluation.evaluators.classifier: The evaluation dataset is inferred as binary dataset, positive label is 1, negative label is 0.
2024/12/15 21:39:52 INFO mlflow.models.evaluation.default_evaluator: Testing metrics on first row...


🏃 View run sedate-cow-732 at: http://127.0.0.1:5000/#/experiments/159889069568961193/runs/efeddcf064074505ad005e969b2acf75
🧪 View experiment at: http://127.0.0.1:5000/#/experiments/159889069568961193


ValueError: The target y is not binary. Got multilabel-indicator type of target.

In [49]:
print(eval_df['target'].unique())



[1 0]


In [50]:
print(eval_df['target'].head())
print(eval_df['target'].shape)


0    1
1    1
2    1
3    1
4    0
Name: target, dtype: int32
(200,)
