In [None]:
import os
import datetime
import requests

import pandas as pd
import numpy as np

import sklearn

import mlflow
import mlflow.sklearn
from sklearn.ensemble import RandomForestRegressor
from sklearn.metrics import mean_squared_error
from sklearn.model_selection import train_test_split
from sklearn import datasets

from wrapper import SklearnModelWrapper

In [None]:
# !pip install mlflow minio scikit-learn pandas numpy boto3

## Запустим MLFlow клиент

In [None]:
tracking_uri = "http://95.216.168.89:5000"
os.environ["MLFLOW_S3_ENDPOINT_URL"] = "http://95.216.168.89:19001"
os.environ["MLFLOW_TRACKING_URI"] = tracking_uri
os.environ["AWS_ACCESS_KEY_ID"] = "IAM_ACCESS_KEY"
os.environ["AWS_SECRET_ACCESS_KEY"] = "IAM_SECRET_KEY"

mlflow.set_tracking_uri(tracking_uri)
client = mlflow.tracking.MlflowClient()

### Получим эксперимент в который будет записываться все данные

In [None]:
# Явно в UI создадим то что будем использовать
experiment = client.get_experiment_by_name("iris_sklearn")
# experiment = client.get_experiment_by_name("iris_pyfunc")
assert experiment is not None, "Not found experiment"

In [None]:
iris = datasets.load_iris()
x = iris.data[:, 2:]
y = iris.target
X_train, X_test, y_train, y_test = train_test_split(x, y, test_size = 0.2, random_state=7)

In [None]:
with mlflow.start_run(experiment_id=experiment.experiment_id, run_name='iris') as  run:
    for num_estimators in [100, 120]:
        with mlflow.start_run(experiment_id = experiment.experiment_id, nested=True) as  nested:
            mlflow.log_param("num_estimators", num_estimators)
            
            rf = RandomForestRegressor(n_estimators=num_estimators)
            rf.fit(X_train, y_train)
            predictions = rf.predict(X_test)
            
            rf_wrapper = SklearnModelWrapper(model=rf)
            
            # Artifact location if deafult
            mlflow.sklearn.log_model(rf, "random-forest-model", registered_model_name="iris_sklearn")
#             mlflow.pyfunc.log_model("model",
#                                     python_model=rf_wrapper,
#                                     code_path=["/root/9/9_practice/wrapper.py"])
            
            mse = mean_squared_error(y_test, predictions)
            mlflow.log_metric("mse", mse)
            