In [1]:
import mlflow
import mlflow.sklearn
from sklearn.ensemble import RandomForestClassifier
from sklearn.datasets import load_breast_cancer
from sklearn.model_selection import train_test_split
from sklearn.metrics import accuracy_score, f1_score, precision_score, recall_score
from sklearn.preprocessing import StandardScaler



In [2]:
mlflow.set_experiment("breast_cancer_classification/random_forest/fine_tuning")

2025/10/22 19:46:49 INFO mlflow.tracking.fluent: Experiment with name 'breast_cancer_classification/random_forest/fine_tuning' does not exist. Creating a new experiment.


<Experiment: artifact_location='file:///Users/dongnd/MLOps/mlops_tutorial_02/mlruns/443590215586828930', creation_time=1761137209646, experiment_id='443590215586828930', last_update_time=1761137209646, lifecycle_stage='active', name='breast_cancer_classification/random_forest/fine_tuning', tags={}>

In [4]:
data = load_breast_cancer()
X = data.data
y = data.target
X_train, X_test, y_train, y_test = train_test_split(X, y,
                    test_size=0.2, random_state=42)

scaler = StandardScaler()
X_train = scaler.fit_transform(X_train)
X_test = scaler.transform(X_test)

In [5]:
params_grid = {
    'n_estimators': [100, 300, 500],
    'max_depth': [5, 8, 10]
}

In [None]:
import os
import joblib
import itertools

best_accuracy = 0.0
best_params = None
best_run_id = None

keys = params_grid.keys()
values = params_grid.values()
combinations = [dict(zip(keys, v)) for v in itertools.product(*values)]

for idx, params in enumerate(combinations): 
    with mlflow.start_run(run_name=f'run_{idx+1}_rf_{params["n_estimators"]}_{params["max_depth"]}'):

        mlflow.log_params(params)
        mlflow.log_param('scaler', 'StandardScaler')

        rf = RandomForestClassifier(**params)
        rf.fit(X_train, y_train)

        y_pred = rf.predict(X_test)

        accuracy = accuracy_score(y_test, y_pred)
        f1 = f1_score(y_test, y_pred)
        precision = precision_score(y_test, y_pred)
        recall = recall_score(y_test, y_pred)

        mlflow.log_metrics(
            {
                'accuracy': accuracy,
                'f1_score': f1,
                'precision': precision,
                'recall': recall
            }
        )
        mlflow.sklearn.log_model(
            sk_model=rf,
            artifact_path=f'models/',
        )
        # os.makedirs('models', exist_ok=True)
        scaler_path = os.path.join('models', 'scaler.pkl')
        joblib.dump(scaler, scaler_path)
        mlflow.log_artifact(
            scaler_path,
            artifact_path='preprocessor'
        )
        if accuracy > best_accuracy:
            best_accuracy = accuracy
            best_params = params
            best_run_id = mlflow.active_run().info.run_id





In [7]:
best_run_id

'da28a89148d54b40bec9ed6dfd6850db'

In [8]:
best_params

{'n_estimators': 100, 'max_depth': 5}

In [9]:
model = mlflow.sklearn.load_model(
    model_uri=f"runs:/{best_run_id}/model"
)

In [10]:
y_pred = model.predict(X_test)
accuracy = accuracy_score(y_test, y_pred)
print('Accuracy of the best model:', accuracy)

Accuracy of the best model: 0.9649122807017544


In [11]:
mlflow.register_model(
    model_uri=f"runs:/{best_run_id}/model",
    name="brest_cancer_predictor",
)

Successfully registered model 'brest_cancer_predictor'.
Created version '1' of model 'brest_cancer_predictor'.


<ModelVersion: aliases=[], creation_timestamp=1761138005690, current_stage='None', description=None, last_updated_timestamp=1761138005690, name='brest_cancer_predictor', run_id='da28a89148d54b40bec9ed6dfd6850db', run_link=None, source='file:///Users/dongnd/MLOps/mlops_tutorial_02/mlruns/443590215586828930/da28a89148d54b40bec9ed6dfd6850db/artifacts/model', status='READY', status_message=None, tags={}, user_id=None, version=1>

In [None]:
registered_model = mlflow.sklearn.load_model(
    model_uri=f"models:/brest_cancer_predictor/1"
)

In [13]:
y_pred = registered_model.predict(X_test)
accuracy = accuracy_score(y_test, y_pred)
print('Accuracy of the registered model:', accuracy)

Accuracy of the registered model: 0.9649122807017544
