In [1]:
import numpy as np
import sklearn

In [2]:
from sklearn.datasets import load_digits

X, y = load_digits(return_X_y=True)

In [3]:
from sklearn.model_selection import train_test_split

X_train, X_test, y_train, y_test = train_test_split(X, y, train_size=0.6)
X_val, X_test, y_val, y_test = train_test_split(X_test, y_test , train_size=0.5)

In [31]:
import mlflow

def train(model, X_train, X_val, y_train, y_val):
    with mlflow.start_run() as run:
        model.fit(X_train, y_train)
        y_pred = model.predict(X_val)
        model_name = "classif_model"
        classif = sklearn.metrics.classification_report(y_val, y_pred, output_dict=True)
        mlflow.log_dict(classif, "classification_report.json")
        mlflow.log_metric("accuracy", classif["accuracy"])
        print(f"\n{classif['accuracy']}")
        run_id = run.info.run_id
        mlflow.sklearn.log_model(model, model_name)
    mlflow.end_run()
    uri = f"runs:/{run_id}/{model_name}"
    return model, uri

In [49]:
from sklearn.ensemble import RandomForestClassifier
from sklearn.linear_model import LogisticRegression
from sklearn.svm import SVC

models_list = []

model, uri = train(RandomForestClassifier(n_estimators=200, max_depth=5),
                   X_train, X_val, y_train, y_val)
models_list.append(uri)

model, uri = train(RandomForestClassifier(n_estimators=200, max_depth=5),
                   X_train, X_val, y_train, y_val)
models_list.append(uri)

model, uri = train(LogisticRegression(),
                   X_train, X_val, y_train, y_val)
models_list.append(uri)

model, uri = train(SVC(),
                   X_train, X_val, y_train, y_val)
models_list.append(uri)


0.935933147632312

0.9331476323119777


STOP: TOTAL NO. of ITERATIONS REACHED LIMIT.

Increase the number of iterations (max_iter) or scale the data as shown in:
    https://scikit-learn.org/stable/modules/preprocessing.html
Please also refer to the documentation for alternative solver options:
    https://scikit-learn.org/stable/modules/linear_model.html#logistic-regression
  n_iter_i = _check_optimize_result(



0.9498607242339833

0.9749303621169917


In [60]:
%cd ..
from mlflow_ensemble.ensemble_model import Ensemble
%cd exemple
from sklearn.preprocessing import OneHotEncoder
import numpy as np

def stack_and_onehot_encode(scores):
    new_scores_l= []
    for score in scores:
        n_score = score
        if len(score.shape) == 1:
            enc = OneHotEncoder()
            enc.fit([[i] for i in range(10)])
            n_score = enc.transform(np.expand_dims(score, -1)).toarray()

        new_scores_l.append(np.expand_dims(n_score, axis=-1))
    return np.concatenate(new_scores_l, axis=-1)

with mlflow.start_run() as run:
    model_name = "ensemble_of_models"
    ensemble = Ensemble(models_list, stack_scores=stack_and_onehot_encode)
    y_pred = ensemble.predict(None, X_val)
    classif = sklearn.metrics.classification_report(y_val, y_pred, output_dict=True)
    mlflow.log_dict(classif, "classification_report.json")
    mlflow.log_metric("accuracy", classif["accuracy"])
    print(f"\n{classif['accuracy']}")
    run_id = run.info.run_id
    mlflow.pyfunc.log_model(model_name, python_model=ensemble, code_path=["../mlflow_ensemble/"])
    uri = f"runs:/{run_id}/{model_name}"
mlflow.end_run()

/home/alex/Documents/MLflow_ensemble
/home/alex/Documents/MLflow_ensemble/exemple

0.9693593314763231


In [63]:
uri

'runs:/694016acca8d426c80b9aec4780e09a9/ensemble_of_models'

In [62]:
model = mlflow.pyfunc.load_model(uri)
y_pred = model.predict(X_val)
print(sklearn.metrics.classification_report(y_val, y_pred, output_dict=True)['accuracy'])

0.9693593314763231


In [44]:
model._predict_fn

<bound method BaseSVC.predict of SVC()>

In [17]:
models_list

['runs:/3c7be5215a9747e8b5c756dd86b78505/classif_model',
 'runs:/9171e646440a469db55546c00a1c533d/classif_model',
 'runs:/e95a8b3065cb4c34b86350a0d763b33c/classif_model',
 'runs:/3acd297f62e247688de67546503e8b3e/classif_model']