In [1]:
import numpy as np
import sklearn

In [2]:
from sklearn.datasets import load_digits

X, y = load_digits(return_X_y=True)

In [3]:
from sklearn.model_selection import train_test_split

X_train, X_test, y_train, y_test = train_test_split(X, y, train_size=0.6)
X_val, X_test, y_val, y_test = train_test_split(X_test, y_test , train_size=0.5)

In [4]:
import mlflow


def log_metrics(true, pred):
    classif = sklearn.metrics.classification_report(true, pred, output_dict=True)
    mlflow.log_dict(classif, "classification_report.json")
    mlflow.log_metric("accuracy", classif["accuracy"])
    print(f"\n{classif['accuracy']}")


def train(model, X_train, X_val, y_train, y_val):
    with mlflow.start_run() as run:
        model.fit(X_train, y_train)
        y_pred = model.predict(X_val)
        model_name = "classif_model"
        log_metrics(y_val, y_pred)
        run_id = run.info.run_id
        mlflow.sklearn.log_model(model, model_name)
    mlflow.end_run()
    uri = f"runs:/{run_id}/{model_name}"
    return model, uri

## Models training

In [5]:
from sklearn.ensemble import RandomForestClassifier
models_list = []

model, uri = train(RandomForestClassifier(n_estimators=200, max_depth=5),
                   X_train, X_val, y_train, y_val)
models_list.append(uri)
model, uri = train(RandomForestClassifier(n_estimators=50, max_depth=5),
                   X_train, X_val, y_train, y_val)
models_list.append(uri)
models_list_2RF = models_list


0.947075208913649





0.9387186629526463


## From a list of models, we can create an ensemble model
By default it expects all models to give same format outputs, it makes prediction by averaging the models scores.

In [6]:
%cd ..
from mlflow_ensemble.ensemble_model import Ensemble
%cd exemple

with mlflow.start_run() as run:
    model_name = "ensemble_of_models"

    ensemble = Ensemble(models_list)
    y_pred = ensemble.predict(None, X_val)
    log_metrics(y_val, y_pred)
    mlflow.pyfunc.log_model(model_name, python_model=ensemble,
                            pip_requirements=["scikit-learn"],
                            code_path=["../mlflow_ensemble/"])
    uri = f"runs:/{run.info.run_id}/{model_name}"
mlflow.end_run()

/home/alex/Documents/MLflow_ensemble
/home/alex/Documents/MLflow_ensemble/exemple

0.9498607242339833


## Model loading and inference
After ensemble creation and logging, you can load it and use it for predictions.

In [7]:
model = mlflow.pyfunc.load_model(uri)
print(sklearn.metrics.classification_report(y_val, y_pred, output_dict=False))
y_pred = model.predict(X_test)
print(sklearn.metrics.classification_report(y_test, y_pred, output_dict=False))

              precision    recall  f1-score   support

           0       0.97      0.97      0.97        37
           1       0.94      0.97      0.96        33
           2       0.97      0.89      0.93        38
           3       0.93      0.98      0.95        41
           4       0.88      0.97      0.92        30
           5       0.95      0.97      0.96        40
           6       0.98      0.98      0.98        43
           7       0.92      1.00      0.96        33
           8       0.97      0.89      0.93        35
           9       1.00      0.86      0.93        29

    accuracy                           0.95       359
   macro avg       0.95      0.95      0.95       359
weighted avg       0.95      0.95      0.95       359

              precision    recall  f1-score   support

           0       0.95      0.97      0.96        37
           1       0.89      0.95      0.92        41
           2       1.00      1.00      1.00        41
           3       0.94 

## More models and different outputs
Here we train more models, from different natures.

In [8]:
from sklearn.ensemble import RandomForestClassifier
from sklearn.linear_model import LogisticRegression
from sklearn.svm import SVC

models_list = []

model, uri = train(RandomForestClassifier(n_estimators=200, max_depth=5),
                   X_train, X_val, y_train, y_val)
models_list.append(uri)

model, uri = train(RandomForestClassifier(n_estimators=200, max_depth=5),
                   X_train, X_val, y_train, y_val)
models_list.append(uri)

model, uri = train(LogisticRegression(),
                   X_train, X_val, y_train, y_val)
models_list.append(uri)

model, uri = train(SVC(),
                   X_train, X_val, y_train, y_val)
models_list.append(uri)


0.9498607242339833

0.9498607242339833


STOP: TOTAL NO. of ITERATIONS REACHED LIMIT.

Increase the number of iterations (max_iter) or scale the data as shown in:
    https://scikit-learn.org/stable/modules/preprocessing.html
Please also refer to the documentation for alternative solver options:
    https://scikit-learn.org/stable/modules/linear_model.html#logistic-regression
  n_iter_i = _check_optimize_result(



0.9665738161559888

0.9972144846796658


## Custom stacking function
Here, the SVC model can't output class probabilities (predict_proba() not available so ensemble model calls predict()).
Here is an exemple where we give a custom scores stacking function to ensure the default ensembling method (averaging then argmax) still works. If a models output is a 1d array, we one-hot encode it in a 2d array.

In [9]:
%cd ..
from mlflow_ensemble.ensemble_model import Ensemble
%cd exemple
from sklearn.preprocessing import OneHotEncoder
import numpy as np


def stack_with_onehot(scores):
    new_scores= []
    for score in scores:
        print(score.shape)
        if len(score.shape) == 1:
            print("OneHotEncoding")
            enc = OneHotEncoder()
            enc.fit([[i] for i in range(np.min(score), np.max(score) + 1)])
            score = enc.transform(np.expand_dims(score, -1)).toarray()
        new_scores.append(score)

    scores = np.array(new_scores)
    print(scores.shape)
    return scores

with mlflow.start_run() as run:
    model_name = "ensemble_of_models"

    ensemble = Ensemble(models_list, stack_scores=stack_with_onehot)

    y_pred = ensemble.predict(None, X_val)
    log_metrics(y_val, y_pred)
    mlflow.pyfunc.log_model(model_name, python_model=ensemble,
                            pip_requirements=["scikit-learn"],
                            code_path=["../mlflow_ensemble/"])
    uri = f"runs:/{run.info.run_id}/{model_name}"
mlflow.end_run()

/home/alex/Documents/MLflow_ensemble
/home/alex/Documents/MLflow_ensemble/exemple
(359, 10)
(359, 10)
(359, 10)
(359,)
OneHotEncoding
(4, 359, 10)

0.9944289693593314


In [10]:
model = mlflow.pyfunc.load_model(uri)
y_pred = model.predict(X_val)
print(sklearn.metrics.classification_report(y_val, y_pred, output_dict=False))
y_pred = model.predict(X_test)
print(sklearn.metrics.classification_report(y_test, y_pred, output_dict=False))

(359, 10)
(359, 10)
(359, 10)
(359,)
OneHotEncoding
(4, 359, 10)
              precision    recall  f1-score   support

           0       1.00      0.97      0.99        37
           1       1.00      1.00      1.00        33
           2       1.00      1.00      1.00        38
           3       1.00      1.00      1.00        41
           4       0.97      1.00      0.98        30
           5       1.00      1.00      1.00        40
           6       1.00      0.98      0.99        43
           7       1.00      1.00      1.00        33
           8       0.97      1.00      0.99        35
           9       1.00      1.00      1.00        29

    accuracy                           0.99       359
   macro avg       0.99      0.99      0.99       359
weighted avg       0.99      0.99      0.99       359

(360, 10)
(360, 10)
(360, 10)
(360,)
OneHotEncoding
(4, 360, 10)
              precision    recall  f1-score   support

           0       1.00      1.00      1.00        37
  

## Meta model
We can train a new model on top of models scores. Be carefull, It may overfit easily.

In [11]:
%cd ..
from mlflow_ensemble.ensemble_model import Ensemble
%cd exemple
import numpy as np

def hstack_with_onehot(scores):
    new_scores_l= []
    for score in scores:
        n_score = score
        if len(score.shape) == 1:
            enc = OneHotEncoder()
            enc.fit([[i] for i in range(10)])
            n_score = enc.transform(np.expand_dims(score, -1)).toarray()
        new_scores_l.append(n_score)
    scores = np.array(new_scores_l)
    scores = np.reshape(scores, (scores.shape[1], -1))
    return scores


def hstack_class_preds(scores):
    new_scores_l= []
    for score in scores:
        if len(score.shape) > 1:
            score = np.argmax(score, -1)
        new_scores_l.append(score)
    scores = np.array(new_scores_l)
    scores = np.reshape(scores, (scores.shape[1], -1))
    return scores


ensemble = Ensemble(models_list, ensemble_method="meta_model", stack_scores=hstack_with_onehot)
ensemble.fit(RandomForestClassifier(n_estimators=200, max_depth=8), X_val, y_val, force_scores_compute=True)
y_pred = ensemble.predict(None, X_val)
print(sklearn.metrics.classification_report(y_val, y_pred, output_dict=False))
y_pred = ensemble.predict(None, X_test)
print(sklearn.metrics.classification_report(y_test, y_pred, output_dict=False))

/home/alex/Documents/MLflow_ensemble
/home/alex/Documents/MLflow_ensemble/exemple
              precision    recall  f1-score   support

           0       1.00      1.00      1.00        37
           1       0.97      1.00      0.99        33
           2       1.00      1.00      1.00        38
           3       1.00      1.00      1.00        41
           4       1.00      1.00      1.00        30
           5       1.00      1.00      1.00        40
           6       1.00      1.00      1.00        43
           7       1.00      1.00      1.00        33
           8       1.00      1.00      1.00        35
           9       1.00      0.97      0.98        29

    accuracy                           1.00       359
   macro avg       1.00      1.00      1.00       359
weighted avg       1.00      1.00      1.00       359

              precision    recall  f1-score   support

           0       0.07      0.08      0.07        37
           1       0.12      0.07      0.09       

## Weighting average
One can create a custom weigthed average meta-model, wich we can optimize with optuna
We just have to specify fit() and predict().
Be sure to also specify optuna in requirements when you log the ensemble model.

In [12]:
import optuna
import sklearn.metrics
import numpy as np


class WeightAvg:
    def __init__(self):
        self.weights = None

    def fit(self, X, y):
        print(X.shape)
        def objective(trial):
            weights = []
            for i in range(X.shape[0]):
                weights.append(trial.suggest_float(f"x_{i}", 0, 1))
            pred = np.average(X, weights=weights, axis=0)
            pred = np.argmax(pred, -1)
            return sklearn.metrics.accuracy_score(y, pred)

        study = optuna.create_study(direction="maximize")
        study.optimize(objective, n_trials=150)
        print(study.best_params)
        self.weights = list(study.best_params.values())
        return self

    def predict(self, X):
        scores = np.average(X, weights=self.weights, axis=0)
        return np.argmax(scores, -1)

def stack_with_onehot(scores):
    new_scores_l= []
    for score in scores:
        n_score = score
        if len(score.shape) == 1:
            enc = OneHotEncoder()
            enc.fit([[i] for i in range(10)])
            n_score = enc.transform(np.expand_dims(score, -1)).toarray()
        new_scores_l.append(n_score)
    scores = np.array(new_scores_l)
    return scores

with mlflow.start_run() as run:
    model_name = "optuna_ensemble"

    ensemble = Ensemble(models_list, ensemble_method="meta_model",
                        stack_scores=stack_with_onehot)
    ensemble.fit(WeightAvg(), X_val, y_val, force_scores_compute=True)
    y_pred = ensemble.predict(None, X_val)
    print(sklearn.metrics.accuracy_score(y_val, y_pred))

    y_pred = ensemble.predict(None, X_test)
    log_metrics(y_test, y_pred)
    mlflow.pyfunc.log_model(model_name, python_model=ensemble,
                            pip_requirements=["scikit-learn", "optuna"],
                            code_path=["../mlflow_ensemble/"])
    uri = f"runs:/{run.info.run_id}/{model_name}"
mlflow.end_run()

[32m[I 2022-12-12 17:46:32,706][0m A new study created in memory with name: no-name-f1d4930c-f20f-4ab1-afbf-862b3889803f[0m
[32m[I 2022-12-12 17:46:32,710][0m Trial 0 finished with value: 0.9805013927576601 and parameters: {'x_0': 0.679614986596895, 'x_1': 0.23861832368024705, 'x_2': 0.8220517193605509, 'x_3': 0.5054519097141983}. Best is trial 0 with value: 0.9805013927576601.[0m
[32m[I 2022-12-12 17:46:32,712][0m Trial 1 finished with value: 0.9832869080779945 and parameters: {'x_0': 0.1504298538467277, 'x_1': 0.29820146349610654, 'x_2': 0.9340464969048645, 'x_3': 0.7024013167295257}. Best is trial 1 with value: 0.9832869080779945.[0m
[32m[I 2022-12-12 17:46:32,714][0m Trial 2 finished with value: 0.9972144846796658 and parameters: {'x_0': 0.9327162169547715, 'x_1': 0.785062759512272, 'x_2': 0.07663807406041412, 'x_3': 0.9299024439876066}. Best is trial 2 with value: 0.9972144846796658.[0m
[32m[I 2022-12-12 17:46:32,717][0m Trial 3 finished with value: 0.972144846796657

(4, 359, 10)


[32m[I 2022-12-12 17:46:32,903][0m Trial 15 finished with value: 0.9972144846796658 and parameters: {'x_0': 0.6005873396642156, 'x_1': 0.9895139709535713, 'x_2': 0.455701264318713, 'x_3': 0.8729114127677584}. Best is trial 2 with value: 0.9972144846796658.[0m
[32m[I 2022-12-12 17:46:32,940][0m Trial 16 finished with value: 0.9944289693593314 and parameters: {'x_0': 0.843080348600729, 'x_1': 0.35138039685105815, 'x_2': 0.19693695537439448, 'x_3': 0.3746906752025694}. Best is trial 2 with value: 0.9972144846796658.[0m
[32m[I 2022-12-12 17:46:32,960][0m Trial 17 finished with value: 0.9972144846796658 and parameters: {'x_0': 0.7819683827419739, 'x_1': 0.6240555827037012, 'x_2': 0.3996842008740444, 'x_3': 0.679690026781487}. Best is trial 2 with value: 0.9972144846796658.[0m
[32m[I 2022-12-12 17:46:32,982][0m Trial 18 finished with value: 0.9972144846796658 and parameters: {'x_0': 0.6084823609247133, 'x_1': 0.9851529462244442, 'x_2': 0.46944124999486214, 'x_3': 0.596678632835099

{'x_0': 0.9327162169547715, 'x_1': 0.785062759512272, 'x_2': 0.07663807406041412, 'x_3': 0.9299024439876066}
0.9972144846796658

0.9805555555555555
